0

I installed rke2-server and rke2-agent on the control plane machine and worker node machine respectively. (machine-0 is the control-plane, machine-1 and 2 are worker nodes). These machines are physical machines and I'm trying to install on them manually (https://github.com/clemenko/rke_install_blog) The installation went without any errors. But when I look at kubectl get nodes the nodes (even the control node) is in NotReady state. I see that many of the canal and coredns pods are in pending state. How do I fix that to get them out of Pending state? I did free abut 20GB of memory on the control node, but the pods state did not change. (is there a way to restart the pod?)

NAME              STATUS     ROLES                       AGE     VERSION           INTERNAL-IP    EXTERNAL-IP   OS-IMAGE             KERNEL-VERSION           CONTAINER-RUNTIME
machine-0       NotReady   control-plane,etcd,master   4h8m    v1.26.12+rke2r1   192.169.0.10   <none>        Ubuntu 22.04.3 LTS   5.15.0-91-generic        containerd://1.7.11-k3s2
machine-1       NotReady   <none>                      3h56m   v1.26.12+rke2r1   192.169.0.11   <none>        Ubuntu 22.04.3 LTS   5.15.45-051545-generic   containerd://1.7.11-k3s2
machine-2       NotReady   <none>                      3h53m   v1.26.12+rke2r1   192.169.1.3    <none>        Ubuntu 22.04.3 LTS   6.2.0-25-generic         containerd://1.7.11-k3s2
root@machine-0:/home/normal#

kubectl get all --all-namespaces

root@machine-0:/home/normal# kubectl get all --all-namespaces
NAMESPACE       NAME                                                      READY   STATUS    RESTARTS       AGE
cattle-system   pod/rancher-64db9f48c-mc7vg                               0/1     Pending   0              12m
cert-manager    pod/cert-manager-55cf8685cb-5zmn9                         0/1     Pending   0              26m
cert-manager    pod/cert-manager-cainjector-fbd548cb8-9lzqt               0/1     Pending   0              26m
cert-manager    pod/cert-manager-startupapicheck-7cjzs                    0/1     Pending   0              26m
cert-manager    pod/cert-manager-webhook-655b4d58fb-rcws6                 0/1     Pending   0              26m
kube-system     pod/cloud-controller-manager-machine-0                  1/1     Running   3 (170m ago)   4h10m
kube-system     pod/etcd-machine-0                                      1/1     Running   1              4h9m
kube-system     pod/helm-install-rke2-canal-c45mv                         0/1     Pending   0              4h10m
kube-system     pod/helm-install-rke2-canal-kmhh6                         0/1     Evicted   0              4h10m
kube-system     pod/helm-install-rke2-coredns-n8g9t                       0/1     Pending   0              4h10m
kube-system     pod/helm-install-rke2-coredns-tcdmd                       0/1     Evicted   0              4h10m
kube-system     pod/helm-install-rke2-coredns-wrnh5                       0/1     Evicted   0              4h10m
kube-system     pod/helm-install-rke2-metrics-server-ms788                0/1     Pending   0              4h10m
kube-system     pod/helm-install-rke2-snapshot-controller-crd-rqjkj       0/1     Pending   0              4h10m
kube-system     pod/helm-install-rke2-snapshot-controller-j78sj           0/1     Pending   0              4h10m
kube-system     pod/helm-install-rke2-snapshot-validation-webhook-t5t24   0/1     Pending   0              4h10m
kube-system     pod/kube-apiserver-machine-0                            1/1     Running   1              4h10m
kube-system     pod/kube-controller-manager-machine-0                   1/1     Running   2 (170m ago)   4h10m
kube-system     pod/kube-proxy-machine-0                                1/1     Running   0              169m
kube-system     pod/kube-proxy-machine-1                                1/1     Running   0              3h58m
kube-system     pod/kube-proxy-machine-2                            1/1     Running   0              3h55m
kube-system     pod/kube-scheduler-machine-0                            1/1     Running   1 (170m ago)   4h10m

NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE cattle-system service/rancher ClusterIP 10.43.138.79 <none> 80/TCP,443/TCP 12m cert-manager service/cert-manager ClusterIP 10.43.219.64 <none> 9402/TCP 26m cert-manager service/cert-manager-webhook ClusterIP 10.43.15.146 <none> 443/TCP 26m default service/kubernetes ClusterIP 10.43.0.1 <none> 443/TCP 4h10m

NAMESPACE NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE kube-system daemonset.apps/kube-vip-ds 1 0 0 0 0 <none> 4h10m

NAMESPACE NAME READY UP-TO-DATE AVAILABLE AGE cattle-system deployment.apps/rancher 0/1 1 0 12m cert-manager deployment.apps/cert-manager 0/1 1 0 26m cert-manager deployment.apps/cert-manager-cainjector 0/1 1 0 26m cert-manager deployment.apps/cert-manager-webhook 0/1 1 0 26m

NAMESPACE NAME DESIRED CURRENT READY AGE cattle-system replicaset.apps/rancher-64db9f48c 1 1 0 12m cert-manager replicaset.apps/cert-manager-55cf8685cb 1 1 0 26m cert-manager replicaset.apps/cert-manager-cainjector-fbd548cb8 1 1 0 26m cert-manager replicaset.apps/cert-manager-webhook-655b4d58fb 1 1 0 26m

NAMESPACE NAME COMPLETIONS DURATION AGE cert-manager job.batch/cert-manager-startupapicheck 0/1 26m 26m kube-system job.batch/helm-install-rke2-canal 0/1 4h10m 4h10m kube-system job.batch/helm-install-rke2-coredns 0/1 4h10m 4h10m kube-system job.batch/helm-install-rke2-metrics-server 0/1 4h10m 4h10m kube-system job.batch/helm-install-rke2-snapshot-controller 0/1 4h10m 4h10m kube-system job.batch/helm-install-rke2-snapshot-controller-crd 0/1 4h10m 4h10m kube-system job.batch/helm-install-rke2-snapshot-validation-webhook 0/1 4h10m 4h10m

Kubectl event logs

root@machine-0:/home/normal# kubectl get events --all-namespaces
NAMESPACE       LAST SEEN   TYPE      REASON                OBJECT                                                    MESSAGE
cattle-system   4m27s       Warning   FailedScheduling      pod/rancher-64db9f48c-mc7vg                               0/3 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }, 2 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling..
cattle-system   24m         Normal    SuccessfulCreate      replicaset/rancher-64db9f48c                              Created pod: rancher-64db9f48c-mc7vg
cattle-system   24m         Normal    ScalingReplicaSet     deployment/rancher                                        Scaled up replica set rancher-64db9f48c to 1
cert-manager    3m57s       Warning   FailedScheduling      pod/cert-manager-55cf8685cb-5zmn9                         0/3 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }, 2 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling..
cert-manager    39m         Normal    SuccessfulCreate      replicaset/cert-manager-55cf8685cb                        Created pod: cert-manager-55cf8685cb-5zmn9
cert-manager    3m57s       Warning   FailedScheduling      pod/cert-manager-cainjector-fbd548cb8-9lzqt               0/3 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }, 2 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling..
cert-manager    39m         Normal    SuccessfulCreate      replicaset/cert-manager-cainjector-fbd548cb8              Created pod: cert-manager-cainjector-fbd548cb8-9lzqt
cert-manager    39m         Normal    ScalingReplicaSet     deployment/cert-manager-cainjector                        Scaled up replica set cert-manager-cainjector-fbd548cb8 to 1
cert-manager    3m57s       Warning   FailedScheduling      pod/cert-manager-startupapicheck-7cjzs                    0/3 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }, 2 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling..
cert-manager    39m         Normal    SuccessfulCreate      job/cert-manager-startupapicheck                          Created pod: cert-manager-startupapicheck-7cjzs
cert-manager    3m57s       Warning   FailedScheduling      pod/cert-manager-webhook-655b4d58fb-rcws6                 0/3 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }, 2 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling..
cert-manager    39m         Normal    SuccessfulCreate      replicaset/cert-manager-webhook-655b4d58fb                Created pod: cert-manager-webhook-655b4d58fb-rcws6
cert-manager    39m         Normal    ScalingReplicaSet     deployment/cert-manager-webhook                           Scaled up replica set cert-manager-webhook-655b4d58fb to 1
cert-manager    39m         Normal    ScalingReplicaSet     deployment/cert-manager                                   Scaled up replica set cert-manager-55cf8685cb to 1
default         2m53s       Warning   FreeDiskSpaceFailed   node/machine-0                                          (combined from similar events): Failed to garbage collect required amount of images. Attempted to free 165305339904 bytes, but only found 0 bytes eligible to free.
kube-system     117s        Warning   FailedScheduling      pod/helm-install-rke2-canal-c45mv                         0/3 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }, 2 node(s) didn't match Pod's node affinity/selector. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling..
kube-system     117s        Warning   FailedScheduling      pod/helm-install-rke2-coredns-n8g9t                       0/3 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }, 2 node(s) didn't match Pod's node affinity/selector. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling..
kube-system     2m27s       Warning   FailedScheduling      pod/helm-install-rke2-metrics-server-ms788                0/3 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }, 2 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling..
kube-system     117s        Warning   FailedScheduling      pod/helm-install-rke2-snapshot-controller-crd-rqjkj       0/3 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }, 2 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling..
kube-system     117s        Warning   FailedScheduling      pod/helm-install-rke2-snapshot-controller-j78sj           0/3 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }, 2 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling..
kube-system     117s        Warning   FailedScheduling      pod/helm-install-rke2-snapshot-validation-webhook-t5t24   0/3 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/disk-pressure: }, 2 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/3 nodes are available: 3 Preemption is not helpful for scheduling..
kube-system     10m         Warning   FailedCreate          daemonset/kube-vip-ds                                     Error creating: pods "kube-vip-ds-" is forbidden: error looking up service account kube-system/kube-vip: serviceaccount "kube-vip" not found

Control plane node description

root@machine-0:/home/normal# kubectl describe node machine-0
Name:               machine-0
Roles:              control-plane,etcd,master
Labels:             beta.kubernetes.io/arch=amd64
                    beta.kubernetes.io/instance-type=rke2
                    beta.kubernetes.io/os=linux
                    kubernetes.io/arch=amd64
                    kubernetes.io/hostname=machine-0
                    kubernetes.io/os=linux
                    node-role.kubernetes.io/control-plane=true
                    node-role.kubernetes.io/etcd=true
                    node-role.kubernetes.io/master=true
                    node.kubernetes.io/instance-type=rke2
Annotations:        alpha.kubernetes.io/provided-node-ip: 192.169.0.10
                    etcd.rke2.cattle.io/local-snapshots-timestamp: 2024-01-24T19:07:31Z
                    etcd.rke2.cattle.io/node-address: 192.169.0.10
                    etcd.rke2.cattle.io/node-name: machine-0-0a9550ae
                    node.alpha.kubernetes.io/ttl: 0
                    rke2.io/encryption-config-hash: start-4e1ae4c38a662bdf4101d2ff62004a662978e39159b4bee9cd399cc50fc7c74b
                    rke2.io/hostname: machine-0
                    rke2.io/internal-ip: 192.169.0.10
                    rke2.io/node-args:
                      ["server","--tls-san","192.169.3.50","--tls-san","192.169.0.11","--write-kubeconfig-mode","420","--disable","rke2-ingress-nginx"]
                    rke2.io/node-config-hash: WJDPFSWIXMXMLY4CUXWVURZC7VSCEYFGRCD2ZU326HX7AW2YXJTA====
                    rke2.io/node-env: {}
                    volumes.kubernetes.io/controller-managed-attach-detach: true
CreationTimestamp:  Wed, 24 Jan 2024 17:46:10 +0000
Taints:             node.kubernetes.io/disk-pressure:NoSchedule
                    node.kubernetes.io/not-ready:NoSchedule
Unschedulable:      false
Lease:
  HolderIdentity:  machine-0
  AcquireTime:     <unset>
  RenewTime:       Wed, 24 Jan 2024 22:10:45 +0000
Conditions:
  Type             Status  LastHeartbeatTime                 LastTransitionTime                Reason                       Message
  ----             ------  -----------------                 ------------------                ------                       -------
  MemoryPressure   False   Wed, 24 Jan 2024 22:10:11 +0000   Wed, 24 Jan 2024 18:51:02 +0000   KubeletHasSufficientMemory   kubelet has sufficient memory available
  DiskPressure     True    Wed, 24 Jan 2024 22:10:11 +0000   Wed, 24 Jan 2024 19:06:28 +0000   KubeletHasDiskPressure       kubelet has disk pressure
  PIDPressure      False   Wed, 24 Jan 2024 22:10:11 +0000   Wed, 24 Jan 2024 18:51:02 +0000   KubeletHasSufficientPID      kubelet has sufficient PID available
  Ready            False   Wed, 24 Jan 2024 22:10:11 +0000   Wed, 24 Jan 2024 18:51:02 +0000   KubeletNotReady              container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: cni plugin not initialized
  EtcdIsVoter      True    Wed, 24 Jan 2024 17:46:13 +0000   Wed, 24 Jan 2024 17:46:13 +0000   MemberNotLearner             Node is a voting member of the etcd cluster
Addresses:
  InternalIP:  192.169.0.10
  Hostname:    machine-0
Capacity:
  cpu:                96
  ephemeral-storage:  919162120Ki
  hugepages-1Gi:      0
  hugepages-2Mi:      0
  memory:             528186908Ki
  pods:               110
Allocatable:
  cpu:                96
  ephemeral-storage:  894160909635
  hugepages-1Gi:      0
  hugepages-2Mi:      0
  memory:             528186908Ki
  pods:               110
System Info:
  Machine ID:                 937916b85ff34816bd088d35e639c67f
  System UUID:                26360400-dbe5-11eb-8000-3ceceff3813a
  Boot ID:                    f16b8922-f6e9-4037-bd56-ce123feae027
  Kernel Version:             5.15.0-91-generic
  OS Image:                   Ubuntu 22.04.3 LTS
  Operating System:           linux
  Architecture:               amd64
  Container Runtime Version:  containerd://1.7.11-k3s2
  Kubelet Version:            v1.26.12+rke2r1
  Kube-Proxy Version:         v1.26.12+rke2r1
PodCIDR:                      10.42.0.0/24
PodCIDRs:                     10.42.0.0/24
ProviderID:                   rke2://machine-0
Non-terminated Pods:          (6 in total)
  Namespace                   Name                                    CPU Requests  CPU Limits  Memory Requests  Memory Limits  Age
  ---------                   ----                                    ------------  ----------  ---------------  -------------  ---
  kube-system                 cloud-controller-manager-machine-0    100m (0%)     0 (0%)      128Mi (0%)       0 (0%)         4h24m
  kube-system                 etcd-machine-0                        200m (0%)     0 (0%)      512Mi (0%)       0 (0%)         4h24m
  kube-system                 kube-apiserver-machine-0              250m (0%)     0 (0%)      1Gi (0%)         0 (0%)         4h24m
  kube-system                 kube-controller-manager-machine-0     200m (0%)     0 (0%)      256Mi (0%)       0 (0%)         4h24m
  kube-system                 kube-proxy-machine-0                  250m (0%)     0 (0%)      128Mi (0%)       0 (0%)         3h3m
  kube-system                 kube-scheduler-machine-0              100m (0%)     0 (0%)      128Mi (0%)       0 (0%)         4h24m
Allocated resources:
  (Total limits may be over 100 percent, i.e., overcommitted.)
  Resource           Requests     Limits
  --------           --------     ------
  cpu                1100m (1%)   0 (0%)
  memory             2176Mi (0%)  0 (0%)
  ephemeral-storage  0 (0%)       0 (0%)
  hugepages-1Gi      0 (0%)       0 (0%)
  hugepages-2Mi      0 (0%)       0 (0%)
Events:
  Type     Reason               Age                    From     Message
  ----     ------               ----                   ----     -------
  Warning  FreeDiskSpaceFailed  4m33s (x27 over 134m)  kubelet  (combined from similar events): Failed to garbage collect required amount of images. Attempted to free 165305339904 bytes, but only found 0 bytes eligible to free.

For some reason df -h is filled with these overlays and shm

Filesystem                         Size  Used Avail Use% Mounted on
tmpfs                               51G   25M   51G   1% /run
/dev/mapper/ubuntu--vg-ubuntu--lv  877G  805G   36G  96% /
tmpfs                              252G   24K  252G   1% /dev/shm
tmpfs                              5.0M     0  5.0M   0% /run/lock
/dev/sda2                          2.0G  306M  1.5G  17% /boot
/dev/sda1                          1.1G  6.1M  1.1G   1% /boot/efi
shm                                 64M     0   64M   0% /run/k3s/containerd/io.containerd.grpc.v1.cri/sandboxes/86d1e80c0ca12b1b307204e61507ff1b8a632192fe537000bc201724051fa132/shm
shm                                 64M     0   64M   0% /run/k3s/containerd/io.containerd.grpc.v1.cri/sandboxes/b2a52985151b1344962a653977184abff5a25d565217cc332fcda646fafd087b/shm
shm                                 64M     0   64M   0% /run/k3s/containerd/io.containerd.grpc.v1.cri/sandboxes/fa42998d60f6a0764f6f67c8f94316dc1b05549fa1df559afdf0899eb9e6721f/shm
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/86d1e80c0ca12b1b307204e61507ff1b8a632192fe537000bc201724051fa132/rootfs
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/fa42998d60f6a0764f6f67c8f94316dc1b05549fa1df559afdf0899eb9e6721f/rootfs
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/b2a52985151b1344962a653977184abff5a25d565217cc332fcda646fafd087b/rootfs
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/106276b255dc3e8c2b607a9cf8884f59da0b744268f6215bd72854e74a108057/rootfs
shm                                 64M     0   64M   0% /run/k3s/containerd/io.containerd.grpc.v1.cri/sandboxes/72f6e448e221991030687587ec2e1b0fc35cc1046e94071f8ae302a11eaf4e79/shm
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/72f6e448e221991030687587ec2e1b0fc35cc1046e94071f8ae302a11eaf4e79/rootfs
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/ae3f0a4e0d9680c30a0814db9055d13e374db44f601756e0a63684496622ab2a/rootfs
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/6ddfec478b12b5fbb045934d244bb652e290b2371532c6d5374a7a89f9a68879/rootfs
shm                                 64M     0   64M   0% /run/k3s/containerd/io.containerd.grpc.v1.cri/sandboxes/4c686224be121bfa63afa2b5b9d7d7ea6223ec1dd3d864ab8cd67ffb70c65d1c/shm
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/4c686224be121bfa63afa2b5b9d7d7ea6223ec1dd3d864ab8cd67ffb70c65d1c/rootfs
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/626435c560b90557e8c3488b5e746951d5a7a9a9336fdfff0f6f4d90dd8b8883/rootfs
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/d83aef92824ae10737a1994fcebb57aa5cca7e7d42cc9ca0fbd85431ac2c154f/rootfs
shm                                 64M     0   64M   0% /run/k3s/containerd/io.containerd.grpc.v1.cri/sandboxes/3cc4fedaa79032bf5fa18c9e939a73aecb352d6ed54e70167ffda8ae69a8854e/shm
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/3cc4fedaa79032bf5fa18c9e939a73aecb352d6ed54e70167ffda8ae69a8854e/rootfs
overlay                            877G  805G   36G  96% /run/k3s/containerd/io.containerd.runtime.v2.task/k8s.io/21221658a71644d3fda4c89a0564aba0c1c2ca57e59dd884478ef2d8c762244d/rootfs
tmpfs                               51G  4.0K   51G   1% /run/user/1001
overlay                            877G  805G   36G  96% /mnt/docker/overlay2/478bd2a43988031366edb415fb01b880c0cbadfa079ec17b5ac7ab9de3b93c07/merged
tmpfs                               51G  4.0K   51G   1% /run/user/1000
192.169.0.1:/mnt/storage/models    200T  3.9T  186T   3% /models
overlay                            877G  805G   36G  96% /mnt/docker/overlay2/b4ff783dad30e510f8dfafcdb50c2c6995182a3cf7815e2d92b19de40ec54505/merged
overlay                            877G  805G   36G  96% /mnt/docker/overlay2/82695fb56dcef0928078b78e06bcee26090fddffe381b7d7bf7d24328f4ec5b0/merged

0 Answers0