|> [!attention]

部署节点建议使用 k8s 集群完的节点

主机名 IP 配置信息 系统 内核
console 192.168.10.100 CPU: 4C 内存: 8GB 磁盘: 500GB AlmaLinux 9.6 6.1.147
test-master-101 192.168.10.101 CPU: 4C 内存: 8GB 磁盘: 500GB AlmaLinux 9.6 6.1.147
test-worker-102 192.168.10.102 CPU: 16 C 内存: 64 GB 磁盘: 500GB AlmaLinux 9.6 6.1.147
test-worker-103 192.168.10.103 CPU: 16 C 内存: 64 GB 磁盘: 500 GB AlmaLinux 9.6 6.1.147
test-worker-104 192.168.10.104 CPU: 16 C 内存: 64 GB 磁盘: 500 GB AlmaLinux 9.6 6.1.147

1 准备环境

1.1 节点

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# cat <<EOF > /etc/sysctl.d/k8s.conf
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-iptables = 1
fs.may_detach_mounts = 1
vm.overcommit_memory=1
vm.panic_on_oom=0
fs.inotify.max_user_watches=89100
fs.file-max=52706963
fs.nr_open=52706963
net.netfilter.nf_conntrack_max=2310720

net.ipv4.tcp_keepalive_time = 600
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl =15
net.ipv4.tcp_max_tw_buckets = 36000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_max_orphans = 327680
net.ipv4.tcp_orphan_retries = 3
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.ip_conntrack_max = 65536
net.ipv4.tcp_max_syn_backlog = 16384
net.ipv4.tcp_timestamps = 0
net.core.somaxconn = 16384

net.ipv6.conf.all.disable_ipv6 = 0
net.ipv6.conf.default.disable_ipv6 = 0
net.ipv6.conf.lo.disable_ipv6 = 0
net.ipv6.conf.all.forwarding = 1
EOF

# sysctl --system 生效
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
mkdir -p /data/server/kubespray
cd /data/server/kubespray
# 这里用2.28.0版本
wget https://github.com/kubernetes-sigs/kubespray/archive/refs/tags/v2.28.0.tar.gz
tar -zxvf kubespray-2.28.0.tar.gz
cd kubespray-2.28.0.tar.gz

cat docker-compose.yaml
services:
kubespray:
image: quay.io/kubespray/kubespray:v2.28.0
command: tail -f /dev/null
restart: always
container_name: kubespray
volumes:
- ./kubespray-2.28.0:/kubespray
- /home/kubespray-cluster:/kubespray/inventory # 单独把配置文件拆出来更好管理
- /root/.ssh:/root/.ssh

for image in $(cat images.list); do skopeo copy --insecure-policy --src-tls-verify=false --dest-tls-verify=false docker://${image} docker://tst-registry.bluethink.local/library/${image#*/}; done

1.2 获取离线资源

1
2
3
4
5
6
7
8
9
10
11
12
13
14
docker-compose up -d
docker exec -it kubespray bash
cd contrib/offline && bash generate_list.sh
mv temp /kubespray
exit
cd kubespray-2.28.0/temp
# 修改files.list文件,加上files.m.daocloud.io前缀
sed -i "s#https://#https://files.m.daocloud.io/#g" files.list

# 修改images.list文件,修改成daocloud的镜像加速配置
sed -i "s@quay.io@quay.m.daocloud.io@g" images.list
sed -i "s@docker.io@docker.m.daocloud.io@g" images.list
sed -i "s@registry.k8s.io@k8s.m.daocloud.io@g" images.list
sed -i "s@ghcr.io@ghcr.m.daocloud.io@g" images.list

1.2.1 二进制包

Info

Wget -x 会根据远程文件的 URL 路径,在本地创建完整的目录结构。

这里使用 dufs 当做 webdav 服务, 数据目录存放在 /data/server/dufs/data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# 执行以下命令将依赖的静态文件全部下载到 temp/kubespray 目录下
mkdir -p /data/server/dufs/data
wget -x -P temp/kubespray -i temp/files.list

cd /data/server/dufs
cat docker-compose.yaml
services:
dufs:
image: sigoden/dufs
container_name: dufs
network_mode: host
expose:
- "5000"
command: ["data", "--allow-upload", "--allow-search"]
volumes:
- ./data:/data
restart: always
environment:
TZ: 'Asia/Shanghai'

docker-compose up -d

# 浏览器访问测试
http://$ip:5000/kubespray/

1.2.2 容器镜像

Info

${image#*/},Shell 参数扩展语法,移除镜像名称中第一个/及其之前的所有字符

1
2
cd /data/server/kubespray/kubespray-2.28.0/temp
for image in $(cat images.list); do skopeo copy --insecure-policy --src-tls-verify=false --dest-tls-verify=false docker://${image} docker://tst-registry.bluethink.local/library/${image#*/}; done

2 配置 Kubespray

2.1 节点配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
cd /data/server/kubespray/kubespray-2.28.0
cp -rf inventory/sample/ /home/kubespray-cluster/test_cluster/
cd /home/kubespray-cluster/test_cluster/
cat inventory.ini
[kube_control_plane]
master-101 ansible_host=192.168.10.101 ip=192.168.10.101 etcd_member_name=etcd1

[etcd:children]
kube_control_plane

[kube_node]
test-worker-102 ansible_host=192.168.10.102 ip=192.168.10.102
test-worker-103 ansible_host=192.168.10.103 ip=192.168.10.103
test-worker-104 ansible_host=192.168.10.104 ip=192.168.10.104

# 修改ntp服务器地址
cat group_vars/all/all.yml
ntp_enabled: true
ntp_manage_config: false
ntp_servers:
- "ntp1.aliyun.com"
- "ntp2.aliyun.com"
- "ntp3.aliyun.com"
- "s1c.time.edu.cn"
- "ntp.ntsc.ac.cn"
- "cn.ntp.org.cn"
# 开启日志
unsafe_show_logs: true

2.2 k8s 配置

此文件修改以下内容

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
cat group_vars/k8s_cluster/k8s-cluster.yml
kube_network_plugin: calico # Kubernetes 网络插件
kube_service_addresses: 10.233.0.0/18 # Service 的 ClusterIP 地址范围
kube_pods_subnet: 10.233.64.0/18 # Pod 的 IP 地址范围
kube_network_node_prefix: 24 # 分配给每个节点的 Pod IP 子网前缀长度
kube_apiserver_port: 6443  # (https) # API Server 端口
kube_proxy_mode: ipvs # Kube-proxy 代理模式
cluster_name: tst-cluster.local # 集群名称/DNS 域
dns_mode: coredns # DNS 模式
container_manager: containerd # 容器运行时
k8s_image_pull_policy: IfNotPresent # 镜像拉取策略
event_ttl_duration: "1h0m0s" # 事件保留时长
auto_renew_certificates: true # 自动续订证书
kube_proxy_nodeport_addresses: ["primary"]
# 离线下载相关配置
download_run_once: false
download_localhost: false
download_force_cache: false

2.3 Containerd 配置

Attention

当前部署脚本使用的 containerd 2.x, 导致以前的私有库配置不生效

参考: Issue #12296

需要每个节点手动添加 /root/.docker/config.json,pull镜像使用的nerdctl, 提前 login

手动批量登录

ansible -i k8s host-all -m command -a "nerdctl login --insecure-registry hjq-registry.bluethink.cn: 80 -u ccops -p 1123 "

手动批量替换

ansible -i k8s redis -m command -a “sed -i ‘s/registry.bluethink.cn:80/registry.bluethink.cn/g’ /root/.docker/config.json”

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

vim ../../roles/container-engine/containerd/templates/config.toml.j2
[plugins."io.containerd.cri.v1.images".registry]
config_path = "{{ containerd_cfg_dir }}/certs.d"
# 此段下面添加
{% for registry in containerd_registry_auth if registry['registry'] is defined %}
{% if (registry['username'] is defined and registry['password'] is defined) or registry['auth'] is defined %}
[plugins."io.containerd.cri.v1.images".registry.configs."{{ registry['registry'] }}".auth]
{% if registry['username'] is defined and registry['password'] is defined %}
password = "{{ registry['password'] }}"
username = "{{ registry['username'] }}"
{% else %}
auth = "{{ registry['auth'] }}"
{% endif %}
{% endif %}
{% endfor %}

# 默认从https改为http
vim ../../roles/container-engine/containerd/templates/hosts.toml.j2
server = "{{ item.server | default("http://" + item.prefix) }}"

2.3.1 配置私有库

1
2
3
4
5
6
7
8
9
10
11
12
13
cat group_vars/all/containerd.yml
---
containerd_storage_dir: "/data/containerd"
containerd_state_dir: "/run/containerd"
containerd_oom_score: 0

containerd_grpc_max_recv_message_size: 16777216
containerd_grpc_max_send_message_size: 16777216

containerd_debug_level: "info"


containerd_max_container_log_line_size: 16384

2.3.2 Calico 配置

1
2
3
4
5
6
7
8
cat group_vars/k8s_cluster/k8s-net-calico.yml
calico_cni_name: k8s-pod-network
calico_pool_blocksize: 26
global_as_num: "64512"
calico_network_backend: bird
calico_ipip_mode: 'CrossSubnet'
calico_vxlan_mode: 'Never'
calico_ip_auto_method: "interface=ens192"

2.4 配置离线安装

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
cat group_vars/all/offline.yml
---
registry_host: "tst-registry.bluethink.local/library" #镜像仓库地址
files_repo: "http://192.168.10.100:5000/kubespray" #dufs地址

containerd_registries_mirrors:
- prefix: "{{ registry_host }}"
mirrors:
- host: "{{ registry_host }}"
capabilities: ["pull", "resolve"]
skip_verify: true
containerd_registry_auth:
- registry: tst-registry.bluethink.local
username: bluethink
password: 1qaz@WSX

kube_image_repo: "{{ registry_host }}"
gcr_image_repo: "{{ registry_host }}"
github_image_repo: "{{ registry_host }}"
docker_image_repo: "{{ registry_host }}"
quay_image_repo: "{{ registry_host }}"

kubeadm_download_url: "{{ files_repo }}/dl.k8s.io/release/v{{ kube_version }}/bin/linux/{{ image_arch }}/kubeadm"
kubectl_download_url: "{{ files_repo }}/dl.k8s.io/release/v{{ kube_version }}/bin/linux/{{ image_arch }}/kubectl"
kubelet_download_url: "{{ files_repo }}/dl.k8s.io/release/v{{ kube_version }}/bin/linux/{{ image_arch }}/kubelet"

cni_download_url: "{{ files_repo }}/github.com/containernetworking/plugins/releases/download/v{{ cni_version }}/cni-plugins-linux-{{ image_arch }}-v{{ cni_version }}.tgz"

crictl_download_url: "{{ files_repo }}/github.com/kubernetes-sigs/cri-tools/releases/download/v{{ crictl_version }}/crictl-v{{ crictl_version }}-{{ ansible_system | lower }}-{{ image_arch }}.tar.gz"

etcd_download_url: "{{ files_repo }}/github.com/etcd-io/etcd/releases/download/v{{ etcd_version }}/etcd-v{{ etcd_version }}-linux-{{ image_arch }}.tar.gz"

calicoctl_download_url: "{{ files_repo }}/github.com/projectcalico/calico/releases/download/v{{ calico_ctl_version }}/calicoctl-linux-{{ image_arch }}"
calico_crds_download_url: "{{ files_repo }}/github.com/projectcalico/calico/archive/v{{ calico_version }}.tar.gz"

ciliumcli_download_url: "{{ files_repo }}/github.com/cilium/cilium-cli/releases/download/v{{ cilium_cli_version }}/cilium-linux-{{ image_arch }}.tar.gz"

helm_download_url: "{{ files_repo }}/get.helm.sh/helm-v{{ helm_version }}-linux-{{ image_arch }}.tar.gz"

crun_download_url: "{{ files_repo }}/github.com/containers/crun/releases/download/{{ crun_version }}/crun-{{ crun_version }}-linux-{{ image_arch }}"

kata_containers_download_url: "{{ files_repo }}/github.com/kata-containers/kata-containers/releases/download/{{ kata_containers_version }}/kata-static-{{ kata_containers_version }}-{{ image_arch }}.tar.xz"

cri_dockerd_download_url: "{{ files_repo }}/github.com/Mirantis/cri-dockerd/releases/download/v{{ cri_dockerd_version }}/cri-dockerd-{{ cri_dockerd_version }}.{{ image_arch }}.tgz"

runc_download_url: "{{ files_repo }}/github.com/opencontainers/runc/releases/download/v{{ runc_version }}/runc.{{ image_arch }}"

crio_download_url: "{{ files_repo }}/storage.googleapis.com/cri-o/artifacts/cri-o.{{ image_arch }}.v{{ crio_version }}.tar.gz"
skopeo_download_url: "{{ files_repo }}/github.com/lework/skopeo-binary/releases/download/v{{ skopeo_version }}/skopeo-linux-{{ image_arch }}"

containerd_download_url: "{{ files_repo }}/github.com/containerd/containerd/releases/download/v{{ containerd_version }}/containerd-{{ containerd_version }}-linux-{{ image_arch }}.tar.gz"
nerdctl_download_url: "{{ files_repo }}/github.com/containerd/nerdctl/releases/download/v{{ nerdctl_version }}/nerdctl-{{ nerdctl_version }}-{{ ansible_system | lower }}-{{ image_arch }}.tar.gz"

gvisor_runsc_download_url: "{{ files_repo }}/storage.googleapis.com/gvisor/releases/release/{{ gvisor_version }}/{{ ansible_architecture }}/runsc"
gvisor_containerd_shim_runsc_download_url: "{{ files_repo }}/storage.googleapis.com/gvisor/releases/release/{{ gvisor_version }}/{{ ansible_architecture }}/containerd-shim-runsc-v1"

3 部署集群

1
2
3
4
5
# ansible-playbook -i inventory/test_cluster/inventory.ini reset.yml -b # 重置k8s集群
# 单独安装某个组件
# ansible-playbook -i inventory/mycluster/hosts.yaml cluster.yml --tags=containerd --user=root -b -v cluster.yml
# ansible-playbook -i inventory/mycluster/hosts.yaml cluster.yml --tags=download --user=root -b -v cluster.yml
ansible-playbook -i inventory/test_cluster/inventory.ini --user=root -b -v cluster.yml # 重新部署