useful_scripts/kube_ha.sh

145 lines
4.9 KiB
Bash
Executable File

# Add Docker's official GPG key:
sudo apt-get update
sudo apt-get install ca-certificates curl nfs-common # nfs-common needed for nfs-client storage class
sudo install -m 0755 -d /etc/apt/keyrings
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
sudo chmod a+r /etc/apt/keyrings/docker.asc
# Add the repository to Apt sources:
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
# install docker/containerd
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
# install kubelet kubeadm kubectl
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.29/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
sudo apt update
sudo apt install -y kubelet kubeadm kubectl
apt-mark hold kubelet kubeadm kubectl
# install nvidia-container-toolkit and make it default runtime
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sudo apt update && sudo apt install nvidia-container-toolkit -y
cat << EOF > /etc/containerd/config.toml
version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "/usr/bin/nvidia-container-runtime"
systemdCgroup = true
EOF
sudo service containerd restart
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
# install nginx for load balancing
sudo apt install nginx-light libnginx-mod-stream -y
cat << EOF > /etc/nginx/nginx.conf
error_log stderr notice;
load_module /lib/nginx/modules/ngx_stream_module.so;
worker_processes auto;
worker_rlimit_nofile 130048;
worker_shutdown_timeout 10s;
events {
multi_accept on;
use epoll;
worker_connections 16384;
}
stream {
upstream kube_apiserver {
least_conn;
server 127.0.0.1:6443;
server 10.239.10.222:6443;
server 10.239.10.223:6443;
}
server {
listen 127.0.0.1:8080;
proxy_pass kube_apiserver;
proxy_timeout 10m;
proxy_connect_timeout 1s;
}
}
http {
aio threads;
aio_write on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 5m;
keepalive_requests 100;
reset_timedout_connection on;
server_tokens off;
autoindex off;
server {
listen 8081;
location /healthz {
access_log off;
return 200;
}
location /stub_status {
stub_status on;
access_log off;
}
}
}
EOF
sudo service nginx restart
kubeadm init --upload-certs --control-plane-endpoint=127.0.0.1:8080
kubeadm join 127.0.0.1:8080 --token e01u52.c9uq77rkvl3qm86u --discovery-token-ca-cert-hash sha256:a7fc076bdcd7391e8bc7577b54ecc492d319298b5699293f8390042b57866700 --control-plane --certificate-key 2931030dff3041c185715298ab833895c6f36028a97b2f139857641bbe7f66b5 # master
kubeadm join 127.0.0.1:8080 --token e01u52.c9uq77rkvl3qm86u --discovery-token-ca-cert-hash sha256:a7fc076bdcd7391e8bc7577b54ecc492d319298b5699293f8390042b57866700 # worker
# install cilium
CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
CLI_ARCH=amd64
if [ "$(uname -m)" = "aarch64" ]; then CLI_ARCH=arm64; fi
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum
sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin
rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
cilium install --version 1.15.1
# taint masters
kubectl taint nodes ml-cbt-02 ml-cbt-03 node-role.kubernetes.io/control-plane:NoSchedule-# install tools
# install tools
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash # helm
helm plugin install https://github.com/databus23/helm-diff # helm-diff needed for helmfile