initial commit

This commit is contained in:
Константин Шуткин 2024-02-22 20:08:28 +03:00
commit 75517e5eb5
6 changed files with 241 additions and 0 deletions

9
backup_etcd.sh Executable file
View File

@ -0,0 +1,9 @@
#!/bin/bash
set -e
trap "echo 'sending fail backup status to vmagent...' && curl -k -d 'etcd_backup,hostname=ml-cbt-01 status=0.0' -X POST https://vmagent.at-kube.mosmetro.ru/write" ERR # отсылаем алёрт в VictoriaMetrics, если одна из команд была неуспешной
cd /share/kubernetes/backups/etcd/ # переходим в папку с бэкапами
timestamp=$(date +"%Y-%m-%d-%H-%M-%S")
ETCDCTL_API=3 /usr/bin/etcdctl --cacert /etc/kubernetes/pki/etcd/ca.crt --cert /etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd/server.key --endpoints=https://127.0.0.1:2379 snapshot save $timestamp.db # бэкапим etcd
ETCDCTL_API=3 etcdctl --write-out=table snapshot status $timestamp.db # проверяем, что с бэкапом всё ок
rm `ls -t | awk 'NR>7'` # оставляем только 7 последних бэкапов, остальные удаляем
echo 'sending success backup status to vmagent...' && curl -k -d 'etcd_backup,hostname=ml-cbt-01 status=1' -X POST https://vmagent.at-kube.mosmetro.ru/write # отправляем информацию об успешном бэкапе

19
convert_submodule_to_folder.sh Executable file
View File

@ -0,0 +1,19 @@
# $1 - path/to/module/
# $2 - number-of-submodule
# $3 - submodule merging branch
# $4 - repo name
git remote rm submodule_origin
git rm $1
git commit -m "Remove $4 submodule"
git remote add submodule_origin ssh://git@workbench.mosmetro.ru:2286/autonomous-tram/$4.git
git fetch submodule_origin
git lfs fetch submodule_origin --all
git branch merge-branch-$2 submodule_origin/$3
git checkout merge-branch-$2
git lfs fetch submodule_origin --all
mkdir -p $1
git ls-tree -z --name-only HEAD | xargs -0 -I {} git mv {} $1
git commit -m "Moved files to $1"
git checkout feature/merge-submodules
git merge --allow-unrelated-histories merge-branch-$2
git push --set-upstream origin feature/merge-submodules

18
create_systemd_service.sh Executable file
View File

@ -0,0 +1,18 @@
# $1 - name of service
# $2 - description of service
# $3 path to script
cat << EOF >> /lib/systemd/system/$1.service
[Unit]
Description=$2
After=network-online.target
[Service]
ExecStart=$3
Type=Oneshot
StandardOutput=journal
[Install]
WantedBy=multi-user.target
EOF
systemctl enable --now $1

7
ds_up_down.sh Executable file
View File

@ -0,0 +1,7 @@
#!/bin/bash
if [ $1 = "down" ]; then
KUBECONFIG=/etc/kubernetes/admin.conf kubectl -n $2 patch daemonset $3 -p '{"spec": {"template": {"spec": {"nodeSelector": {"non-existing": "true"}}}}}'
elif [ $1 = "up" ]; then
KUBECONFIG=/etc/kubernetes/admin.conf kubectl -n $2 patch daemonset $3 --type json -p='[{"op": "remove", "path": "/spec/template/spec/nodeSelector/non-existing"}]'
fi

144
kube_ha.sh Executable file
View File

@ -0,0 +1,144 @@
# Add Docker's official GPG key:
sudo apt-get update
sudo apt-get install ca-certificates curl nfs-common # nfs-common needed for nfs-client storage class
sudo install -m 0755 -d /etc/apt/keyrings
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
sudo chmod a+r /etc/apt/keyrings/docker.asc
# Add the repository to Apt sources:
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
# install docker/containerd
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
# install kubelet kubeadm kubectl
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.29/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
sudo apt update
sudo apt install -y kubelet kubeadm kubectl
apt-mark hold kubelet kubeadm kubectl
# install nvidia-container-toolkit and make it default runtime
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sudo apt update && sudo apt install nvidia-container-toolkit -y
cat << EOF > /etc/containerd/config.toml
version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "/usr/bin/nvidia-container-runtime"
systemdCgroup = true
EOF
sudo service containerd restart
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
# install nginx for load balancing
sudo apt install nginx-light libnginx-mod-stream -y
cat << EOF > /etc/nginx/nginx.conf
error_log stderr notice;
load_module /lib/nginx/modules/ngx_stream_module.so;
worker_processes auto;
worker_rlimit_nofile 130048;
worker_shutdown_timeout 10s;
events {
multi_accept on;
use epoll;
worker_connections 16384;
}
stream {
upstream kube_apiserver {
least_conn;
server 127.0.0.1:6443;
server 10.239.10.222:6443;
server 10.239.10.223:6443;
}
server {
listen 127.0.0.1:8080;
proxy_pass kube_apiserver;
proxy_timeout 10m;
proxy_connect_timeout 1s;
}
}
http {
aio threads;
aio_write on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 5m;
keepalive_requests 100;
reset_timedout_connection on;
server_tokens off;
autoindex off;
server {
listen 8081;
location /healthz {
access_log off;
return 200;
}
location /stub_status {
stub_status on;
access_log off;
}
}
}
EOF
sudo service nginx restart
kubeadm init --upload-certs --control-plane-endpoint=127.0.0.1:8080
kubeadm join 127.0.0.1:8080 --token e01u52.c9uq77rkvl3qm86u --discovery-token-ca-cert-hash sha256:a7fc076bdcd7391e8bc7577b54ecc492d319298b5699293f8390042b57866700 --control-plane --certificate-key 2931030dff3041c185715298ab833895c6f36028a97b2f139857641bbe7f66b5 # master
kubeadm join 127.0.0.1:8080 --token e01u52.c9uq77rkvl3qm86u --discovery-token-ca-cert-hash sha256:a7fc076bdcd7391e8bc7577b54ecc492d319298b5699293f8390042b57866700 # worker
# install cilium
CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
CLI_ARCH=amd64
if [ "$(uname -m)" = "aarch64" ]; then CLI_ARCH=arm64; fi
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum
sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin
rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
cilium install --version 1.15.1
# taint masters
kubectl taint nodes ml-cbt-02 ml-cbt-03 node-role.kubernetes.io/control-plane:NoSchedule-# install tools
# install tools
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash # helm
helm plugin install https://github.com/databus23/helm-diff # helm-diff needed for helmfile

44
prepare.sh Executable file
View File

@ -0,0 +1,44 @@
nala install -y sssd-ldap sssd-tools ldap-utils
mkdir /etc/ldap/ca/
#vim /etc/ldap/ca/ninv.crt
#vim /etc/ldap/ldap.conf
#vim /etc/hosts
#vim /etc/sssd/sssd.conf
chmod 600 /etc/sssd/sssd.conf
pam-auth-update --enable mkhomedir
service sssd restart
nala install -y apt-transport-https ca-certificates curl gpg
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.29/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
nala update
nala install -y kubelet kubeadm kubectl
apt-mark hold kubelet kubeadm kubectl
nala install -y containerd
swapoff -a
#vim /etc/fstab
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
modprobe overlay
modprobe br_netfilter
# sysctl params required by setup, params persist across reboots
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
# Apply sysctl params without reboot
sudo sysctl --system
kubeadm join 10.239.10.221:6443 --token z51k9o.144c6ntyob9ut43y --discovery-token-ca-cert-hash sha256:baaa860fb0cf4007b31979e0e21fdc45ec12ad2857aba3a82b63ec26044da597
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
nala update && nala install nvidia-driver-535 nvidia-cuda-toolkit nvidia-container-toolkit -y
nvidia-ctk runtime configure --runtime=containerd
service containerd restart