Hi, I’ve deployed K8ssandra but I’ve encountered kind of situation like this.
[root@node1 ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
k8ssandra-cass-operator-766b945f65-ntb9s 1/1 Running 0 24m
k8ssandra-dc1-default-sts-0 1/2 Running 0 24m
k8ssandra-dc1-default-sts-1 1/2 Running 0 24m
k8ssandra-dc1-default-sts-2 1/2 Running 0 24m
k8ssandra-dc1-default-sts-3 1/2 Running 1 24m
k8ssandra-dc1-stargate-7d79856946-qjjl7 0/1 Init:0/1 0 24m
k8ssandra-grafana-dfdb5cc5c-4zq4n 2/2 Running 0 24m
k8ssandra-kube-prometheus-operator-7dcccdcc86-tv7qc 1/1 Running 0 24m
k8ssandra-reaper-operator-566cdc787-nz5mf 1/1 Running 0 24m
prometheus-k8ssandra-kube-prometheus-prometheus-0 2/2 Running 1 24m
also i’ve reinstalled k8ssandra for many times, sometimes just one of statefulset pod completely runs as expected for example:
[root@node1 ~]# kubectl get pods
NAME READY STATUS RESTARTS AGE
k8ssandra-cass-operator-766b945f65-ntb9s 1/1 Running 0 24m
k8ssandra-dc1-default-sts-0 1/2 Running 0 24m
k8ssandra-dc1-default-sts-1 1/2 Running 0 24m
k8ssandra-dc1-default-sts-2 2/2 Running 0 24m
k8ssandra-dc1-default-sts-3 1/2 Running 1 24m
k8ssandra-dc1-stargate-7d79856946-qjjl7 0/1 Init:0/1 0 24m
k8ssandra-grafana-dfdb5cc5c-4zq4n 2/2 Running 0 24m
k8ssandra-kube-prometheus-operator-7dcccdcc86-tv7qc 1/1 Running 0 24m
k8ssandra-reaper-operator-566cdc787-nz5mf 1/1 Running 0 24m
prometheus-k8ssandra-kube-prometheus-prometheus-0 2/2 Running 1 24m
helm values:
[root@node1 ~]# helm get values k8ssandra
USER-SUPPLIED VALUES:
cassandra:
allowMultipleNodesPerWorker: false
cassandraLibDirVolume:
size: 5Gi
storageClass: rook-ceph-block
datacenters:
- name: dc1
racks:
- name: default
size: 4
enabled: true
heap:
newGenSize: 24G
size: 24G
resources:
limits:
cpu: 3000m
memory: 24Gi
requests:
cpu: 3000m
memory: 24Gi
version: 3.11.10
kube-prometheus-stack:
grafana:
adminPassword: admin123
adminUser: admin
stargate:
cpuLimMillicores: 1000
cpuReqMillicores: 200
enabled: true
heapMB: 1024
replicas: 1
[root@node1 ~]# kubectl logs k8ssandra-dc1-default-sts-0 -c cassandra
INFO [nioEventLoopGroup-2-2] 2021-05-27 09:21:52,340 Cli.java:617 - address=/10.233.96.0:34922 url=/api/v0/probes/readiness status=500 Internal Server Error
INFO [nioEventLoopGroup-2-1] 2021-05-27 09:22:01,047 Cli.java:617 - address=/10.233.96.0:34954 url=/api/v0/probes/liveness status=200 OK
INFO [epollEventLoopGroup-170-1] 2021-05-27 09:22:02,337 Clock.java:47 - Using native clock for microsecond precision
WARN [epollEventLoopGroup-170-2] 2021-05-27 09:22:02,338 AbstractBootstrap.java:452 - Unknown channel option 'TCP_NODELAY' for channel '[id: 0xa37e9fbc]'
WARN [epollEventLoopGroup-170-2] 2021-05-27 09:22:02,339 Loggers.java:39 - [s165] Error connecting to Node(endPoint=/tmp/cassandra.sock, hostId=null, hashCode=762c7772), trying next node (FileNotFoundException: null)
[root@node1 ~]# kubectl get pvc
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
server-data-k8ssandra-dc1-default-sts-0 Bound pvc-3e796c50-1dc0-4b10-a02c-94e83def42dd 5Gi RWO rook-ceph-block 32m
server-data-k8ssandra-dc1-default-sts-1 Bound pvc-27ecb64f-97f4-401b-944d-161650784be0 5Gi RWO rook-ceph-block 32m
server-data-k8ssandra-dc1-default-sts-2 Bound pvc-174c6237-c386-401e-8551-a1d39e266838 5Gi RWO rook-ceph-block 32m
server-data-k8ssandra-dc1-default-sts-3 Bound pvc-5d0fa6fd-e7c9-459c-91c9-8226d363536e 5Gi RWO rook-ceph-block 32m
[root@node1 ~]# kubectl describe pod k8ssandra-dc1-default-sts-0
Name: k8ssandra-dc1-default-sts-0
Namespace: k8ssandra
Priority: 0
Node: node7/172.16.11.183
Start Time: Thu, 27 May 2021 11:51:35 +0300
Labels: app.kubernetes.io/managed-by=cass-operator
cassandra.datastax.com/cluster=k8ssandra
cassandra.datastax.com/datacenter=dc1
cassandra.datastax.com/node-state=Ready-to-Start
cassandra.datastax.com/rack=default
controller-revision-hash=k8ssandra-dc1-default-sts-865d88bd4
statefulset.kubernetes.io/pod-name=k8ssandra-dc1-default-sts-0
Annotations: <none>
Status: Running
IP: 10.233.96.6
IPs:
IP: 10.233.96.6
Controlled By: StatefulSet/k8ssandra-dc1-default-sts
Init Containers:
base-config-init:
Container ID: docker://752e5e85c3cdde14d850998552809d3e98a85c2dfa647cb608034b6a180b1e83
Image: k8ssandra/cass-management-api:3.11.10-v0.1.25
Image ID: docker-pullable://k8ssandra/cass-management-api@sha256:ef5e007d37b57d905c706c1221c96228c4387abb8a96f994af8aae3423dc9f2a
Port: <none>
Host Port: <none>
Command:
/bin/sh
Args:
-c
cp -r /etc/cassandra/* /cassandra-base-config/
State: Terminated
Reason: Completed
Exit Code: 0
Started: Thu, 27 May 2021 11:53:52 +0300
Finished: Thu, 27 May 2021 11:53:52 +0300
Ready: True
Restart Count: 0
Environment: <none>
Mounts:
/cassandra-base-config/ from cassandra-config (rw)
/var/run/secrets/kubernetes.io/serviceaccount from default-token-mtdjk (ro)
server-config-init:
Container ID: docker://683835e66c8a9b4fd42900e0cc7f7b6930254bb042aab3e326b8b047f3665b63
Image: docker.io/datastax/cass-config-builder:1.0.4
Image ID: docker-pullable://datastax/cass-config-builder@sha256:0cfa1f1270f1c211ae4ac8eb690dd9e909cf690126e5ed5ddb08bba78902d1a1
Port: <none>
Host Port: <none>
State: Terminated
Reason: Completed
Exit Code: 0
Started: Thu, 27 May 2021 11:53:59 +0300
Finished: Thu, 27 May 2021 11:54:01 +0300
Ready: True
Restart Count: 0
Limits:
cpu: 1
memory: 256M
Requests:
cpu: 1
memory: 256M
Environment:
POD_IP: (v1:status.podIP)
HOST_IP: (v1:status.hostIP)
USE_HOST_IP_FOR_BROADCAST: false
RACK_NAME: default
PRODUCT_VERSION: 3.11.10
PRODUCT_NAME: cassandra
DSE_VERSION: 3.11.10
CONFIG_FILE_DATA: {"cassandra-yaml":{"authenticator":"PasswordAuthenticator","authorizer":"CassandraAuthorizer","credentials_update_interval_in_ms":3600000,"credentials_validity_in_ms":3600000,"num_tokens":256,"permissions_update_interval_in_ms":3600000,"permissions_validity_in_ms":3600000,"role_manager":"CassandraRoleManager","roles_update_interval_in_ms":3600000,"roles_validity_in_ms":3600000},"cluster-info":{"name":"k8ssandra","seeds":"k8ssandra-seed-service"},"datacenter-info":{"graph-enabled":0,"name":"dc1","solr-enabled":0,"spark-enabled":0},"jvm-options":{"additional-jvm-opts":["-Dcassandra.system_distributed_replication_dc_names=dc1","-Dcassandra.system_distributed_replication_per_dc=4"],"heap_size_young_generation":"24G","initial_heap_size":"24G","max_heap_size":"24G"}}
Mounts:
/config from server-config (rw)
/var/run/secrets/kubernetes.io/serviceaccount from default-token-mtdjk (ro)
jmx-credentials:
Container ID: docker://d152b98f82f2d628069b567622e7a32168169195ef9ab22b59591af37138d5cb
Image: busybox
Image ID: docker-pullable://busybox@sha256:b5fc1d7b2e4ea86a06b0cf88de915a2c43a99a00b6b3c0af731e5f4c07ae8eff
Port: <none>
Host Port: <none>
Args:
/bin/sh
-c
echo "$REAPER_JMX_USERNAME $REAPER_JMX_PASSWORD" > /config/jmxremote.password && echo "$SUPERUSER_JMX_USERNAME $SUPERUSER_JMX_PASSWORD" >> /config/jmxremote.password
State: Terminated
Reason: Completed
Exit Code: 0
Started: Thu, 27 May 2021 11:54:02 +0300
Finished: Thu, 27 May 2021 11:54:02 +0300
Ready: True
Restart Count: 0
Environment:
REAPER_JMX_USERNAME: <set to the key 'username' in secret 'k8ssandra-reaper-jmx'> Optional: false
REAPER_JMX_PASSWORD: <set to the key 'password' in secret 'k8ssandra-reaper-jmx'> Optional: false
SUPERUSER_JMX_USERNAME: <set to the key 'username' in secret 'k8ssandra-superuser'> Optional: false
SUPERUSER_JMX_PASSWORD: <set to the key 'password' in secret 'k8ssandra-superuser'> Optional: false
Mounts:
/config from server-config (rw)
/var/run/secrets/kubernetes.io/serviceaccount from default-token-mtdjk (ro)
Containers:
cassandra:
Container ID: docker://217d76c7eb3153e77000da0043bfa31b4b45f1500002f9c8aac8a8e8ab94731d
Image: k8ssandra/cass-management-api:3.11.10-v0.1.25
Image ID: docker-pullable://k8ssandra/cass-management-api@sha256:ef5e007d37b57d905c706c1221c96228c4387abb8a96f994af8aae3423dc9f2a
Ports: 9042/TCP, 9142/TCP, 7000/TCP, 7001/TCP, 7199/TCP, 8080/TCP, 9103/TCP, 9160/TCP
Host Ports: 0/TCP, 0/TCP, 0/TCP, 0/TCP, 0/TCP, 0/TCP, 0/TCP, 0/TCP
State: Running
Started: Thu, 27 May 2021 11:54:03 +0300
Ready: False
Restart Count: 0
Limits:
cpu: 3
memory: 24Gi
Requests:
cpu: 3
memory: 24Gi
Liveness: http-get http://:8080/api/v0/probes/liveness delay=15s timeout=1s period=15s #success=1 #failure=3
Readiness: http-get http://:8080/api/v0/probes/readiness delay=20s timeout=1s period=10s #success=1 #failure=3
Environment:
LOCAL_JMX: no
DS_LICENSE: accept
DSE_AUTO_CONF_OFF: all
USE_MGMT_API: true
MGMT_API_EXPLICIT_START: true
DSE_MGMT_EXPLICIT_START: true
Mounts:
/config from server-config (rw)
/etc/encryption/ from encryption-cred-storage (rw)
/var/lib/cassandra from server-data (rw)
/var/log/cassandra from server-logs (rw)
/var/run/secrets/kubernetes.io/serviceaccount from default-token-mtdjk (ro)
server-system-logger:
Container ID: docker://23b2501e7c93b5923c43d5f596ea9d9ce268f835ba0181b25364a01cc37c8c0a
Image: k8ssandra/system-logger:9c4c3692
Image ID: docker-pullable://k8ssandra/system-logger@sha256:6208a1e3d710d022c9e922c8466fe7d76ca206f97bf92902ff5327114696f8b1
Port: <none>
Host Port: <none>
State: Running
Started: Thu, 27 May 2021 11:54:07 +0300
Ready: True
Restart Count: 0
Limits:
cpu: 100m
memory: 64M
Requests:
cpu: 100m
memory: 64M
Environment: <none>
Mounts:
/var/log/cassandra from server-logs (rw)
/var/run/secrets/kubernetes.io/serviceaccount from default-token-mtdjk (ro)
Conditions:
Type Status
Initialized True
Ready False
ContainersReady False
PodScheduled True
Volumes:
server-data:
Type: PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
ClaimName: server-data-k8ssandra-dc1-default-sts-0
ReadOnly: false
cassandra-config:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
server-config:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
server-logs:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
encryption-cred-storage:
Type: Secret (a volume populated by a Secret)
SecretName: dc1-keystore
Optional: false
default-token-mtdjk:
Type: Secret (a volume populated by a Secret)
SecretName: default-token-mtdjk
Optional: false
QoS Class: Burstable
Node-Selectors: <none>
Tolerations: node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
Events:
Type Reason Age From Message
---- ------ ---- ---- -------
Normal Scheduled 32m default-scheduler Successfully assigned k8ssandra/k8ssandra-dc1-default-sts-0 to node7
Normal SuccessfulAttachVolume 32m attachdetach-controller AttachVolume.Attach succeeded for volume "pvc-3e796c50-1dc0-4b10-a02c-94e83def42dd"
Normal Pulling 32m kubelet Pulling image "k8ssandra/cass-management-api:3.11.10-v0.1.25"
Normal Pulled 30m kubelet Successfully pulled image "k8ssandra/cass-management-api:3.11.10-v0.1.25" in 1m59.009421126s
Normal Started 30m kubelet Started container base-config-init
Normal Created 30m kubelet Created container base-config-init
Normal Pulling 30m kubelet Pulling image "docker.io/datastax/cass-config-builder:1.0.4"
Normal Pulled 30m kubelet Successfully pulled image "docker.io/datastax/cass-config-builder:1.0.4" in 6.631303128s
Normal Created 30m kubelet Created container server-config-init
Normal Started 30m kubelet Started container server-config-init
Normal Started 30m kubelet Started container jmx-credentials
Normal Created 30m kubelet Created container jmx-credentials
Normal Pulled 30m kubelet Container image "busybox" already present on machine
Normal Pulled 30m kubelet Container image "k8ssandra/cass-management-api:3.11.10-v0.1.25" already present on machine
Normal Created 30m kubelet Created container cassandra
Normal Started 30m kubelet Started container cassandra
Normal Pulling 30m kubelet Pulling image "k8ssandra/system-logger:9c4c3692"
Normal Pulled 30m kubelet Successfully pulled image "k8ssandra/system-logger:9c4c3692" in 3.859718237s
Normal Created 30m kubelet Created container server-system-logger
Normal Started 30m kubelet Started container server-system-logger
Warning Unhealthy 2m31s (x166 over 30m) kubelet Readiness probe failed: HTTP probe failed with statuscode: 500
My environment specs are:
Kubernetes: 1.20
CNI: Weave
Storage provider for pvcs: Rook-ceph