kube-flannel pod 不断重启

kube-flannel always restaring

Posted by BlueFat on Thursday, November 23, 2023
wget https://raw.githubusercontent.com/flannel-io/flannel/master/Documentation/kube-flannel.yml

安装flannel网络插件后发现pod不断在重启 STATUS状态重复这几种变化 Runnging、Complete、CrashLoopBackOff RESTART计数增加

root@k8s-master01:~# kubectl get pod -A
NAMESPACE      NAME                    READY   STATUS             RESTARTS       AGE
kube-flannel   kube-flannel-ds-fxb4w   0/1     CrashLoopBackOff   5 (107s ago)   12m
kube-flannel   kube-flannel-ds-gjdpt   0/1     CrashLoopBackOff   5 (107s ago)   12m
kube-flannel   kube-flannel-ds-srx82   0/1     CrashLoopBackOff   5 (109s ago)   12m

pod日志

root@k8s-master01:~# kubectl logs kube-flannel-ds-gjdpt  -nkube-flannel
Defaulted container "kube-flannel" out of: kube-flannel, install-cni-plugin (init), install-cni (init)
I1230 09:40:47.185872       1 main.go:209] CLI flags config: {etcdEndpoints:http://127.0.0.1:4001,http://127.0.0.1:2379 etcdPrefix:/coreos.com/network etcdKeyfile: etcdCertfile: etcdCAFile: etcdUsername: etcdPassword: version:false kubeSubnetMgr:true kubeApiUrl: kubeAnnotationPrefix:flannel.alpha.coreos.com kubeConfigFile: iface:[] ifaceRegex:[] ipMasq:true ifaceCanReach: subnetFile:/run/flannel/subnet.env publicIP: publicIPv6: subnetLeaseRenewMargin:60 healthzIP:0.0.0.0 healthzPort:0 iptablesResyncSeconds:5 iptablesForwardRules:true netConfPath:/etc/kube-flannel/net-conf.json setNodeNetworkUnavailable:true}
W1230 09:40:47.186231       1 client_config.go:617] Neither --kubeconfig nor --master was specified.  Using the inClusterConfig.  This might not work.
I1230 09:40:47.215784       1 kube.go:137] Waiting 10m0s for node controller to sync
I1230 09:40:47.216228       1 kube.go:458] Starting kube subnet manager
I1230 09:40:47.222392       1 kube.go:479] Creating the node lease for IPv4. This is the n.Spec.PodCIDRs: [10.244.0.0/24]
I1230 09:40:47.222743       1 kube.go:479] Creating the node lease for IPv4. This is the n.Spec.PodCIDRs: [10.244.1.0/24]
I1230 09:40:47.222876       1 kube.go:479] Creating the node lease for IPv4. This is the n.Spec.PodCIDRs: [10.244.2.0/24]
I1230 09:40:48.216674       1 kube.go:144] Node controller sync successful
I1230 09:40:48.217037       1 main.go:229] Created subnet manager: Kubernetes Subnet Manager - k8s-master02
I1230 09:40:48.217177       1 main.go:232] Installing signal handlers
I1230 09:40:48.217846       1 main.go:540] Found network config - Backend type: vxlan
I1230 09:40:48.218027       1 match.go:206] Determining IP address of default interface
I1230 09:40:48.219693       1 match.go:259] Using interface with name ens160 and address 192.168.77.82
I1230 09:40:48.219945       1 match.go:281] Defaulting external address to interface address (192.168.77.82)
I1230 09:40:48.220296       1 vxlan.go:141] VXLAN config: VNI=1 Port=0 GBP=false Learning=false DirectRouting=false
I1230 09:40:48.221742       1 main.go:354] Setting up masking rules
I1230 09:40:48.253803       1 main.go:405] Changing default FORWARD chain policy to ACCEPT
I1230 09:40:48.256084       1 iptables.go:290] generated 7 rules
I1230 09:40:48.271480       1 iptables.go:290] generated 3 rules
I1230 09:40:48.271919       1 main.go:433] Wrote subnet file to /run/flannel/subnet.env
I1230 09:40:48.272299       1 main.go:437] Running backend.
I1230 09:40:48.272992       1 vxlan_network.go:65] watching for new subnet leases
I1230 09:40:48.273410       1 subnet.go:159] Batch elem [0] is { lease.Event{Type:0, Lease:lease.Lease{EnableIPv4:true, EnableIPv6:false, Subnet:ip.IP4Net{IP:0xaf40000, PrefixLen:0x18}, IPv6Subnet:ip.IP6Net{IP:(*ip.IP6)(nil), PrefixLen:0x0}, Attrs:lease.LeaseAttrs{PublicIP:0xc0a84d51, PublicIPv6:(*ip.IP6)(nil), BackendType:"vxlan", BackendData:json.RawMessage{0x7b, 0x22, 0x56, 0x4e, 0x49, 0x22, 0x3a, 0x31, 0x2c, 0x22, 0x56, 0x74, 0x65, 0x70, 0x4d, 0x41, 0x43, 0x22, 0x3a, 0x22, 0x64, 0x61, 0x3a, 0x30, 0x30, 0x3a, 0x34, 0x34, 0x3a, 0x61, 0x37, 0x3a, 0x31, 0x65, 0x3a, 0x31, 0x33, 0x22, 0x7d}, BackendV6Data:json.RawMessage(nil)}, Expiration:time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC), Asof:0}} }
I1230 09:40:48.274306       1 subnet.go:159] Batch elem [0] is { lease.Event{Type:0, Lease:lease.Lease{EnableIPv4:true, EnableIPv6:false, Subnet:ip.IP4Net{IP:0xaf40200, PrefixLen:0x18}, IPv6Subnet:ip.IP6Net{IP:(*ip.IP6)(nil), PrefixLen:0x0}, Attrs:lease.LeaseAttrs{PublicIP:0xc0a84d53, PublicIPv6:(*ip.IP6)(nil), BackendType:"vxlan", BackendData:json.RawMessage{0x7b, 0x22, 0x56, 0x4e, 0x49, 0x22, 0x3a, 0x31, 0x2c, 0x22, 0x56, 0x74, 0x65, 0x70, 0x4d, 0x41, 0x43, 0x22, 0x3a, 0x22, 0x63, 0x61, 0x3a, 0x63, 0x39, 0x3a, 0x37, 0x35, 0x3a, 0x64, 0x34, 0x3a, 0x63, 0x38, 0x3a, 0x32, 0x36, 0x22, 0x7d}, BackendV6Data:json.RawMessage(nil)}, Expiration:time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC), Asof:0}} }
I1230 09:40:48.286993       1 main.go:458] Waiting for all goroutines to exit
I1230 09:40:48.320728       1 iptables.go:283] bootstrap done
I1230 09:40:48.331969       1 iptables.go:283] bootstrap done

kubelet日志

root@k8s-master01:~# journalctl -u kubelet -f
Dec 30 22:30:36 k8s-master01 kubelet[135631]: I1230 22:30:36.727338  135631 scope.go:115] "RemoveContainer" containerID="7553448f20a9a6a69467432209422415a28f04efb90e228d35e077037d499c9e"
Dec 30 22:30:37 k8s-master01 kubelet[135631]: I1230 22:30:37.977688  135631 kubelet_node_status.go:493] "Fast updating node status as it just became ready"
Dec 30 22:30:38 k8s-master01 kubelet[135631]: I1230 22:30:38.729795  135631 scope.go:115] "RemoveContainer" containerID="f35970dda58772797b3a1d1382130baab0657f95e86b45606f4cd65dc43a3998"
Dec 30 22:31:52 k8s-master01 kubelet[135631]: I1230 22:31:52.926661  135631 pod_container_deletor.go:80] "Container not found in pod's containers" containerID="c84fab8c0cd0bfa2c327810beeb3344614f194a4622d224aa2b6185985207fb9"
Dec 30 22:31:52 k8s-master01 kubelet[135631]: I1230 22:31:52.926738  135631 scope.go:115] "RemoveContainer" containerID="f35970dda58772797b3a1d1382130baab0657f95e86b45606f4cd65dc43a3998"
Dec 30 22:31:52 k8s-master01 kubelet[135631]: I1230 22:31:52.929270  135631 scope.go:115] "RemoveContainer" containerID="6f178626d890de16ddaa51edb225c6d826b260d9f566c9d4d1d7c31b70f91171"
Dec 30 22:31:52 k8s-master01 kubelet[135631]: I1230 22:31:52.937718  135631 scope.go:115] "RemoveContainer" containerID="9655c21a7f8d84a79dc4c59ed5167218222ee5b51a968c9abe3cffd6e4a371f5"
Dec 30 22:31:54 k8s-master01 kubelet[135631]: I1230 22:31:54.940260  135631 scope.go:115] "RemoveContainer" containerID="0b15b894cba338e61876014d8e763efe335ebb75b5e69a212e95079950266a86"
Dec 30 22:31:54 k8s-master01 kubelet[135631]: E1230 22:31:54.940799  135631 pod_workers.go:1294] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"kube-flannel\" with CrashLoopBackOff: \"back-off 10s restarting failed container=kube-flannel pod=kube-flannel-ds-vp9db_kube-flannel(f65ce16f-6888-44f6-b51d-b6060767956c)\"" pod="kube-flannel/kube-flannel-ds-vp9db" podUID=f65ce16f-6888-44f6-b51d-b6060767956c
Dec 30 22:32:07 k8s-master01 kubelet[135631]: I1230 22:32:07.652951  135631 scope.go:115] "RemoveContainer" containerID="0b15b894cba338e61876014d8e763efe335ebb75b5e69a212e95079950266a86"
Dec 30 22:33:38 k8s-master01 kubelet[135631]: I1230 22:33:38.189415  135631 pod_container_deletor.go:80] "Container not found in pod's containers" containerID="ada7b5d5380721f953f97533b11791ca06851d21909f34a1e0fdc4c5e915a18a"
Dec 30 22:33:38 k8s-master01 kubelet[135631]: I1230 22:33:38.189456  135631 scope.go:115] "RemoveContainer" containerID="0b15b894cba338e61876014d8e763efe335ebb75b5e69a212e95079950266a86"
Dec 30 22:33:38 k8s-master01 kubelet[135631]: I1230 22:33:38.198987  135631 scope.go:115] "RemoveContainer" containerID="0e721cbb3a4231d59cbc8cbba4ff0527752fdc009d9b5bb9504a929b02ab37d2"
Dec 30 22:33:38 k8s-master01 kubelet[135631]: I1230 22:33:38.210216  135631 scope.go:115] "RemoveContainer" containerID="0cfbf3e28cab8a6398918dfebe2ab04b26591f86a3240dcc133ec6bcf1bb9489"
Dec 30 22:33:40 k8s-master01 kubelet[135631]: I1230 22:33:40.201672  135631 scope.go:115] "RemoveContainer" containerID="65a42886cc991087f592b87cf11e61f11634478692b35b80d9e335a60c41d01f"
Dec 30 22:33:40 k8s-master01 kubelet[135631]: E1230 22:33:40.203341  135631 pod_workers.go:1294] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"kube-flannel\" with CrashLoopBackOff: \"back-off 20s restarting failed container=kube-flannel pod=kube-flannel-ds-vp9db_kube-flannel(f65ce16f-6888-44f6-b51d-b6060767956c)\"" pod="kube-flannel/kube-flannel-ds-vp9db" podUID=f65ce16f-6888-44f6-b51d-b6060767956c
Dec 30 22:33:54 k8s-master01 kubelet[135631]: I1230 22:33:54.652847  135631 scope.go:115] "RemoveContainer" containerID="65a42886cc991087f592b87cf11e61f11634478692b35b80d9e335a60c41d01f"
Dec 30 22:33:54 k8s-master01 kubelet[135631]: E1230 22:33:54.653341  135631 pod_workers.go:1294] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"kube-flannel\" with CrashLoopBackOff: \"back-off 20s restarting failed container=kube-flannel pod=kube-flannel-ds-vp9db_kube-flannel(f65ce16f-6888-44f6-b51d-b6060767956c)\"" pod="kube-flannel/kube-flannel-ds-vp9db" podUID=f65ce16f-6888-44f6-b51d-b6060767956c
Dec 30 22:34:07 k8s-master01 kubelet[135631]: I1230 22:34:07.652281  135631 scope.go:115] "RemoveContainer" containerID="65a42886cc991087f592b87cf11e61f11634478692b35b80d9e335a60c41d01f"

解决

#containerd config default | tee /etc/containerd/config.toml

sed -i 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml  
systemctl restart containerd kubelet

正常了

root@k8s-master01:~# kubectl get pod -A
NAMESPACE      NAME                    READY   STATUS    RESTARTS   AGE
default        nginx-f6dc544c7-rwbrr   1/1     Running   0          14m
kube-flannel   kube-flannel-ds-2flbt   1/1     Running   0          31m
kube-flannel   kube-flannel-ds-d4cpv   1/1     Running   0          31m
kube-flannel   kube-flannel-ds-mccsd   1/1     Running   0          31m

https://stackoverflow.com/questions/76390577/kubernetes-frequently-unable-to-communicate-with-kublet-api-connection-refused