From bffccfcde777c45aa31a2491b99bf041f46a44e2 Mon Sep 17 00:00:00 2001 From: Digital Studium Date: Sun, 25 Feb 2024 18:24:22 +0300 Subject: [PATCH] Initial commit --- backup_etcd.sh | 9 +++ convert_submodule_to_folder.sh | 19 +++++ create_systemd_service.py | 69 ++++++++++++++++ create_systemd_timer.py | 51 ++++++++++++ ds_up_down.sh | 7 ++ kube_ha.sh | 144 +++++++++++++++++++++++++++++++++ prepare.sh | 44 ++++++++++ 7 files changed, 343 insertions(+) create mode 100755 backup_etcd.sh create mode 100755 convert_submodule_to_folder.sh create mode 100755 create_systemd_service.py create mode 100755 create_systemd_timer.py create mode 100755 ds_up_down.sh create mode 100755 kube_ha.sh create mode 100755 prepare.sh diff --git a/backup_etcd.sh b/backup_etcd.sh new file mode 100755 index 0000000..968d4a7 --- /dev/null +++ b/backup_etcd.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -e +trap "echo 'sending fail backup status to vmagent...' && curl -k -d 'etcd_backup,hostname=ml-cbt-01 status=0.0' -X POST https:///write" ERR # отсылаем алёрт в VictoriaMetrics, если одна из команд была неуспешной +cd /share/kubernetes/backups/etcd/ # переходим в папку с бэкапами +timestamp=$(date +"%Y-%m-%d-%H-%M-%S") +ETCDCTL_API=3 /usr/bin/etcdctl --cacert /etc/kubernetes/pki/etcd/ca.crt --cert /etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd/server.key --endpoints=https://127.0.0.1:2379 snapshot save $timestamp.db # бэкапим etcd +ETCDCTL_API=3 etcdctl --write-out=table snapshot status $timestamp.db # проверяем, что с бэкапом всё ок +rm `ls -t | awk 'NR>7'` # оставляем только 7 последних бэкапов, остальные удаляем +echo 'sending success backup status to vmagent...' && curl -k -d 'etcd_backup,hostname=ml-cbt-01 status=1' -X POST https:///write # отправляем информацию об успешном бэкапе. В результате получится метрика etcd_backup_status со значением 1 diff --git a/convert_submodule_to_folder.sh b/convert_submodule_to_folder.sh new file mode 100755 index 0000000..78b1395 --- /dev/null +++ b/convert_submodule_to_folder.sh @@ -0,0 +1,19 @@ +# $1 - path/to/module/ +# $2 - number-of-submodule +# $3 - submodule merging branch +# $4 - repo name +git remote rm submodule_origin +git rm $1 +git commit -m "Remove $4 submodule" +git remote add submodule_origin ssh:///$4.git +git fetch submodule_origin +git lfs fetch submodule_origin --all +git branch merge-branch-$2 submodule_origin/$3 +git checkout merge-branch-$2 +git lfs fetch submodule_origin --all +mkdir -p $1 +git ls-tree -z --name-only HEAD | xargs -0 -I {} git mv {} $1 +git commit -m "Moved files to $1" +git checkout feature/merge-submodules +git merge --allow-unrelated-histories merge-branch-$2 +git push --set-upstream origin feature/merge-submodules diff --git a/create_systemd_service.py b/create_systemd_service.py new file mode 100755 index 0000000..6862104 --- /dev/null +++ b/create_systemd_service.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +import sys, os, subprocess + +help_text = f""" +Usage: {os.path.basename(__file__)} $1 $2 $3 + +required: + $1 - name of service + $2 - absolute path to script + +optional: + $3 - description of service +""" +number_of_required_arguments = 2 +number_of_optional_arguments = 1 + +if len(sys.argv) == 1: + print(help_text) + sys.exit() +elif sys.argv[1] in ["help", "-h"]: + print(help_text) + sys.exit() +elif len(sys.argv) - 1 < number_of_required_arguments: + print(f"You provided not enough arguments") + print(help_text) + sys.exit(1) +elif len(sys.argv) > number_of_required_arguments + number_of_optional_arguments + 1: + print(f"You provided extra arguments") + print(help_text) + sys.exit(1) + +name_of_service = sys.argv[1] +path_to_script = sys.argv[2] +description = sys.argv[3] if sys.argv[3:4] else "" # empty if no description + +if not os.path.isabs(path_to_script): + print("Path to script should be absolute!") + print(help_text) + sys.exit(1) +elif not os.path.isfile(path_to_script): + print("Path to script should exist and must be file!") + print(help_text) + sys.exit(1) + + +service_file = f""" +[Unit] +Description={description} +After=network-online.target + +[Service] +Type=oneshot +ExecStart={path_to_script} +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target +""" + +try: + with open(f"/lib/systemd/system/{name_of_service}.service", "w") as f: + f.write(service_file) + subprocess.run(f"chmod +x {path_to_script}", shell=True, check=True) + os.system(f"systemctl enable --now {name_of_service} && echo Success!!!") +except: + print("Something went wrong...") + sys.exit(1) + diff --git a/create_systemd_timer.py b/create_systemd_timer.py new file mode 100755 index 0000000..96dca1a --- /dev/null +++ b/create_systemd_timer.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +import sys, os, subprocess + +help_text = f""" +Usage: {os.path.basename(__file__)} $1 $2 + +required: + $1 - name of service + $2 - calendar () +""" + +number_of_required_arguments = 2 +number_of_optional_arguments = 0 + +if len(sys.argv) == 1: + print(help_text) + sys.exit() +elif sys.argv[1] in ["help", "-h"]: + print(help_text) +elif len(sys.argv) - 1 < number_of_required_arguments: + print(f"You provided not enough arguments") + print(help_text) + sys.exit(1) +elif len(sys.argv) > number_of_required_arguments + number_of_optional_arguments + 1: + print(f"You provided extra arguments") + print(help_text) + sys.exit(1) + +name_of_service = sys.argv[1] +calendar = sys.argv[2] + +timer_file = f""" +[Unit] +Description={name_of_service} timer + +[Timer] +Unit={name_of_service}.service +OnCalendar={calendar} + +[Install] +WantedBy=timers.target +""" + +try: + with open(f"/lib/systemd/system/{name_of_service}.timer", "w") as f: + f.write(timer_file) + os.system(f"systemctl enable --now {name_of_service}.timer && echo Success!!!") +except: + print("Something went wrong...") + sys.exit(1) + diff --git a/ds_up_down.sh b/ds_up_down.sh new file mode 100755 index 0000000..56a8346 --- /dev/null +++ b/ds_up_down.sh @@ -0,0 +1,7 @@ +#!/bin/bash +if [ $1 = "down" ]; then + KUBECONFIG=/etc/kubernetes/admin.conf kubectl -n $2 patch daemonset $3 -p '{"spec": {"template": {"spec": {"nodeSelector": {"non-existing": "true"}}}}}' +elif [ $1 = "up" ]; then + KUBECONFIG=/etc/kubernetes/admin.conf kubectl -n $2 patch daemonset $3 --type json -p='[{"op": "remove", "path": "/spec/template/spec/nodeSelector/non-existing"}]' +fi + diff --git a/kube_ha.sh b/kube_ha.sh new file mode 100755 index 0000000..97f0502 --- /dev/null +++ b/kube_ha.sh @@ -0,0 +1,144 @@ +# Add Docker's official GPG key: +sudo apt-get update +sudo apt-get install ca-certificates curl nfs-common # nfs-common needed for nfs-client storage class +sudo install -m 0755 -d /etc/apt/keyrings +sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc +sudo chmod a+r /etc/apt/keyrings/docker.asc + +# Add the repository to Apt sources: +echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null +sudo apt-get update + +# install docker/containerd +sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +# install kubelet kubeadm kubectl +curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg +echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.29/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list +sudo apt update +sudo apt install -y kubelet kubeadm kubectl +apt-mark hold kubelet kubeadm kubectl + +# install nvidia-container-toolkit and make it default runtime + +curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ + && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ + sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ + sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + +sudo apt update && sudo apt install nvidia-container-toolkit -y +cat << EOF > /etc/containerd/config.toml +version = 2 + +[plugins] + + [plugins."io.containerd.grpc.v1.cri"] + + [plugins."io.containerd.grpc.v1.cri".containerd] + default_runtime_name = "nvidia" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia] + privileged_without_host_devices = false + runtime_engine = "" + runtime_root = "" + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options] + BinaryName = "/usr/bin/nvidia-container-runtime" + systemdCgroup = true +EOF +sudo service containerd restart + +sudo nvidia-ctk runtime configure --runtime=docker +sudo systemctl restart docker +# install nginx for load balancing +sudo apt install nginx-light libnginx-mod-stream -y + +cat << EOF > /etc/nginx/nginx.conf +error_log stderr notice; +load_module /lib/nginx/modules/ngx_stream_module.so; + +worker_processes auto; +worker_rlimit_nofile 130048; +worker_shutdown_timeout 10s; + +events { + multi_accept on; + use epoll; + worker_connections 16384; +} + +stream { + upstream kube_apiserver { + least_conn; + server 127.0.0.1:6443; + server 10.239.10.222:6443; + server 10.239.10.223:6443; + } + + server { + listen 127.0.0.1:8080; + proxy_pass kube_apiserver; + proxy_timeout 10m; + proxy_connect_timeout 1s; + } +} + +http { + aio threads; + aio_write on; + tcp_nopush on; + tcp_nodelay on; + + keepalive_timeout 5m; + keepalive_requests 100; + reset_timedout_connection on; + server_tokens off; + autoindex off; + + server { + listen 8081; + location /healthz { + access_log off; + return 200; + } + location /stub_status { + stub_status on; + access_log off; + } + } + } +EOF + +sudo service nginx restart +kubeadm init --upload-certs --control-plane-endpoint=127.0.0.1:8080 + +kubeadm join 127.0.0.1:8080 --token e01u52.c9uq77rkvl3qm86u --discovery-token-ca-cert-hash sha256:a7fc076bdcd7391e8bc7577b54ecc492d319298b5699293f8390042b57866700 --control-plane --certificate-key 2931030dff3041c185715298ab833895c6f36028a97b2f139857641bbe7f66b5 # master + +kubeadm join 127.0.0.1:8080 --token e01u52.c9uq77rkvl3qm86u --discovery-token-ca-cert-hash sha256:a7fc076bdcd7391e8bc7577b54ecc492d319298b5699293f8390042b57866700 # worker + +# install cilium +CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt) +CLI_ARCH=amd64 +if [ "$(uname -m)" = "aarch64" ]; then CLI_ARCH=arm64; fi +curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum} +sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum +sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin +rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum} + +cilium install --version 1.15.1 + +# taint masters +kubectl taint nodes ml-cbt-02 ml-cbt-03 node-role.kubernetes.io/control-plane:NoSchedule-# install tools + +# install tools +curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash # helm +helm plugin install https://github.com/databus23/helm-diff # helm-diff needed for helmfile + + + diff --git a/prepare.sh b/prepare.sh new file mode 100755 index 0000000..02fb618 --- /dev/null +++ b/prepare.sh @@ -0,0 +1,44 @@ +nala install -y sssd-ldap sssd-tools ldap-utils +mkdir /etc/ldap/ca/ +#vim /etc/ldap/ca/ninv.crt +#vim /etc/ldap/ldap.conf +#vim /etc/hosts +#vim /etc/sssd/sssd.conf +chmod 600 /etc/sssd/sssd.conf +pam-auth-update --enable mkhomedir +service sssd restart +nala install -y apt-transport-https ca-certificates curl gpg +curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg +echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.29/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list +nala update +nala install -y kubelet kubeadm kubectl +apt-mark hold kubelet kubeadm kubectl +nala install -y containerd +swapoff -a +#vim /etc/fstab +cat <