145 lines
4.9 KiB
Bash
145 lines
4.9 KiB
Bash
|
# Add Docker's official GPG key:
|
||
|
sudo apt-get update
|
||
|
sudo apt-get install ca-certificates curl nfs-common # nfs-common needed for nfs-client storage class
|
||
|
sudo install -m 0755 -d /etc/apt/keyrings
|
||
|
sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc
|
||
|
sudo chmod a+r /etc/apt/keyrings/docker.asc
|
||
|
|
||
|
# Add the repository to Apt sources:
|
||
|
echo \
|
||
|
"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \
|
||
|
$(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \
|
||
|
sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
|
||
|
sudo apt-get update
|
||
|
|
||
|
# install docker/containerd
|
||
|
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||
|
|
||
|
# install kubelet kubeadm kubectl
|
||
|
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
|
||
|
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.29/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
|
||
|
sudo apt update
|
||
|
sudo apt install -y kubelet kubeadm kubectl
|
||
|
apt-mark hold kubelet kubeadm kubectl
|
||
|
|
||
|
# install nvidia-container-toolkit and make it default runtime
|
||
|
|
||
|
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
|
||
|
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
|
||
|
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
|
||
|
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||
|
|
||
|
sudo apt update && sudo apt install nvidia-container-toolkit -y
|
||
|
cat << EOF > /etc/containerd/config.toml
|
||
|
version = 2
|
||
|
|
||
|
[plugins]
|
||
|
|
||
|
[plugins."io.containerd.grpc.v1.cri"]
|
||
|
|
||
|
[plugins."io.containerd.grpc.v1.cri".containerd]
|
||
|
default_runtime_name = "nvidia"
|
||
|
|
||
|
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
|
||
|
|
||
|
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
|
||
|
privileged_without_host_devices = false
|
||
|
runtime_engine = ""
|
||
|
runtime_root = ""
|
||
|
runtime_type = "io.containerd.runc.v2"
|
||
|
|
||
|
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
|
||
|
BinaryName = "/usr/bin/nvidia-container-runtime"
|
||
|
systemdCgroup = true
|
||
|
EOF
|
||
|
sudo service containerd restart
|
||
|
|
||
|
sudo nvidia-ctk runtime configure --runtime=docker
|
||
|
sudo systemctl restart docker
|
||
|
# install nginx for load balancing
|
||
|
sudo apt install nginx-light libnginx-mod-stream -y
|
||
|
|
||
|
cat << EOF > /etc/nginx/nginx.conf
|
||
|
error_log stderr notice;
|
||
|
load_module /lib/nginx/modules/ngx_stream_module.so;
|
||
|
|
||
|
worker_processes auto;
|
||
|
worker_rlimit_nofile 130048;
|
||
|
worker_shutdown_timeout 10s;
|
||
|
|
||
|
events {
|
||
|
multi_accept on;
|
||
|
use epoll;
|
||
|
worker_connections 16384;
|
||
|
}
|
||
|
|
||
|
stream {
|
||
|
upstream kube_apiserver {
|
||
|
least_conn;
|
||
|
server 127.0.0.1:6443;
|
||
|
server 10.239.10.222:6443;
|
||
|
server 10.239.10.223:6443;
|
||
|
}
|
||
|
|
||
|
server {
|
||
|
listen 127.0.0.1:8080;
|
||
|
proxy_pass kube_apiserver;
|
||
|
proxy_timeout 10m;
|
||
|
proxy_connect_timeout 1s;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
http {
|
||
|
aio threads;
|
||
|
aio_write on;
|
||
|
tcp_nopush on;
|
||
|
tcp_nodelay on;
|
||
|
|
||
|
keepalive_timeout 5m;
|
||
|
keepalive_requests 100;
|
||
|
reset_timedout_connection on;
|
||
|
server_tokens off;
|
||
|
autoindex off;
|
||
|
|
||
|
server {
|
||
|
listen 8081;
|
||
|
location /healthz {
|
||
|
access_log off;
|
||
|
return 200;
|
||
|
}
|
||
|
location /stub_status {
|
||
|
stub_status on;
|
||
|
access_log off;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
EOF
|
||
|
|
||
|
sudo service nginx restart
|
||
|
kubeadm init --upload-certs --control-plane-endpoint=127.0.0.1:8080
|
||
|
|
||
|
kubeadm join 127.0.0.1:8080 --token e01u52.c9uq77rkvl3qm86u --discovery-token-ca-cert-hash sha256:a7fc076bdcd7391e8bc7577b54ecc492d319298b5699293f8390042b57866700 --control-plane --certificate-key 2931030dff3041c185715298ab833895c6f36028a97b2f139857641bbe7f66b5 # master
|
||
|
|
||
|
kubeadm join 127.0.0.1:8080 --token e01u52.c9uq77rkvl3qm86u --discovery-token-ca-cert-hash sha256:a7fc076bdcd7391e8bc7577b54ecc492d319298b5699293f8390042b57866700 # worker
|
||
|
|
||
|
# install cilium
|
||
|
CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
|
||
|
CLI_ARCH=amd64
|
||
|
if [ "$(uname -m)" = "aarch64" ]; then CLI_ARCH=arm64; fi
|
||
|
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
|
||
|
sha256sum --check cilium-linux-${CLI_ARCH}.tar.gz.sha256sum
|
||
|
sudo tar xzvfC cilium-linux-${CLI_ARCH}.tar.gz /usr/local/bin
|
||
|
rm cilium-linux-${CLI_ARCH}.tar.gz{,.sha256sum}
|
||
|
|
||
|
cilium install --version 1.15.1
|
||
|
|
||
|
# taint masters
|
||
|
kubectl taint nodes ml-cbt-02 ml-cbt-03 node-role.kubernetes.io/control-plane:NoSchedule-# install tools
|
||
|
|
||
|
# install tools
|
||
|
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash # helm
|
||
|
helm plugin install https://github.com/databus23/helm-diff # helm-diff needed for helmfile
|
||
|
|
||
|
|
||
|
|