From c45062cfec3d39907f152cae915fae05af6e730b Mon Sep 17 00:00:00 2001 From: Rook Date: Fri, 25 Aug 2023 18:41:38 +0000 Subject: [PATCH] Deployed a72a4ffc7 to latest in docs/rook with MkDocs 1.5.2 and mike 1.1.2 --- .../Helm-Charts/ceph-cluster-chart/index.html | 2 +- .../Helm-Charts/operator-chart/index.html | 2 +- docs/rook/latest/search/search_index.json | 2 +- docs/rook/latest/sitemap.xml.gz | Bin 1002 -> 1002 bytes 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/rook/latest/Helm-Charts/ceph-cluster-chart/index.html b/docs/rook/latest/Helm-Charts/ceph-cluster-chart/index.html index 622b3e1c4..d2a9be3fd 100644 --- a/docs/rook/latest/Helm-Charts/ceph-cluster-chart/index.html +++ b/docs/rook/latest/Helm-Charts/ceph-cluster-chart/index.html @@ -3,7 +3,7 @@ 3
helm repo add rook-release https://charts.rook.io/release
 helm install --create-namespace --namespace rook-ceph rook-ceph-cluster \
    --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster -f values.yaml
-

Note

--namespace specifies the cephcluster namespace, which may be different from the rook operator namespace.

Configuration

The following table lists the configurable parameters of the rook-operator chart and their default values.

Parameter Description Default
cephBlockPools A list of CephBlockPool configurations to deploy See below
cephBlockPoolsVolumeSnapshotClass Settings for the block pool snapshot class See RBD Snapshots
cephClusterSpec Cluster configuration. See below
cephFileSystemVolumeSnapshotClass Settings for the filesystem snapshot class See CephFS Snapshots
cephFileSystems A list of CephFileSystem configurations to deploy See below
cephObjectStores A list of CephObjectStore configurations to deploy See below
clusterName The metadata.name of the CephCluster CR The same as the namespace
configOverride Cluster ceph.conf override nil
ingress.dashboard Enable an ingress for the ceph-dashboard {}
kubeVersion Optional override of the target kubernetes version nil
monitoring.createPrometheusRules Whether to create the Prometheus rules for Ceph alerts false
monitoring.enabled Enable Prometheus integration, will also create necessary RBAC rules to allow Operator to create ServiceMonitors. Monitoring requires Prometheus to be pre-installed false
monitoring.prometheusRule.annotations Annotations applied to PrometheusRule {}
monitoring.prometheusRule.labels Labels applied to PrometheusRule {}
monitoring.rulesNamespaceOverride The namespace in which to create the prometheus rules, if different from the rook cluster namespace. If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus deployed) to set rulesNamespaceOverride for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions. nil
operatorNamespace Namespace of the main rook operator "rook-ceph"
pspEnable Create & use PSP resources. Set this to the same value as the rook-ceph chart. false
toolbox.affinity Toolbox affinity {}
toolbox.enabled Enable Ceph debugging pod deployment. See toolbox false
toolbox.image Toolbox image, defaults to the image used by the Ceph cluster nil
toolbox.priorityClassName Set the priority class for the toolbox if desired nil
toolbox.resources Toolbox resources {"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"128Mi"}}
toolbox.tolerations Toolbox tolerations []

Ceph Cluster Spec

The CephCluster CRD takes its spec from cephClusterSpec.*. This is not an exhaustive list of parameters. For the full list, see the Cluster CRD topic.

The cluster spec example is for a converged cluster where all the Ceph daemons are running locally, as in the host-based example (cluster.yaml). For a different configuration such as a PVC-based cluster (cluster-on-pvc.yaml), external cluster (cluster-external.yaml), or stretch cluster (cluster-stretched.yaml), replace this entire cephClusterSpec with the specs from those examples.

Ceph Block Pools

The cephBlockPools array in the values file will define a list of CephBlockPool as described in the table below.

Parameter Description Default
name The name of the CephBlockPool ceph-blockpool
spec The CephBlockPool spec, see the CephBlockPool documentation. {}
storageClass.enabled Whether a storage class is deployed alongside the CephBlockPool true
storageClass.isDefault Whether the storage class will be the default storage class for PVCs. See PersistentVolumeClaim documentation for details. true
storageClass.name The name of the storage class ceph-block
storageClass.parameters See Block Storage documentation or the helm values.yaml for suitable values see values.yaml
storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete
storageClass.allowVolumeExpansion Whether volume expansion is allowed by default. true
storageClass.mountOptions Specifies the mount options for storageClass []
storageClass.allowedTopologies Specifies the allowedTopologies for storageClass []

Ceph File Systems

The cephFileSystems array in the values file will define a list of CephFileSystem as described in the table below.

Parameter Description Default
name The name of the CephFileSystem ceph-filesystem
spec The CephFileSystem spec, see the CephFilesystem CRD documentation. see values.yaml
storageClass.enabled Whether a storage class is deployed alongside the CephFileSystem true
storageClass.name The name of the storage class ceph-filesystem
storageClass.pool The name of Data Pool, without the filesystem name prefix data0
storageClass.parameters See Shared Filesystem documentation or the helm values.yaml for suitable values see values.yaml
storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete
storageClass.mountOptions Specifies the mount options for storageClass []

Ceph Object Stores

The cephObjectStores array in the values file will define a list of CephObjectStore as described in the table below.

Parameter Description Default
name The name of the CephObjectStore ceph-objectstore
spec The CephObjectStore spec, see the CephObjectStore CRD documentation. see values.yaml
storageClass.enabled Whether a storage class is deployed alongside the CephObjectStore true
storageClass.name The name of the storage class ceph-bucket
storageClass.parameters See Object Store storage class documentation or the helm values.yaml for suitable values see values.yaml
storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete
ingress.enabled Enable an ingress for the object store false
ingress.annotations Ingress annotations {}
ingress.host.name Ingress hostname ""
ingress.host.path Ingress path prefix /
ingress.tls Ingress tls /
ingress.ingressClassName Ingress tls ""

Existing Clusters

If you have an existing CephCluster CR that was created without the helm chart and you want the helm chart to start managing the cluster:

  1. Extract the spec section of your existing CephCluster CR and copy to the cephClusterSpec section in values.yaml.

  2. Add the following annotations and label to your existing CephCluster CR:

1
+

Note

--namespace specifies the cephcluster namespace, which may be different from the rook operator namespace.

Configuration

The following table lists the configurable parameters of the rook-operator chart and their default values.

Parameter Description Default
cephBlockPools A list of CephBlockPool configurations to deploy See below
cephBlockPoolsVolumeSnapshotClass Settings for the block pool snapshot class See RBD Snapshots
cephClusterSpec Cluster configuration. See below
cephFileSystemVolumeSnapshotClass Settings for the filesystem snapshot class See CephFS Snapshots
cephFileSystems A list of CephFileSystem configurations to deploy See below
cephObjectStores A list of CephObjectStore configurations to deploy See below
clusterName The metadata.name of the CephCluster CR The same as the namespace
configOverride Cluster ceph.conf override nil
ingress.dashboard Enable an ingress for the ceph-dashboard {}
kubeVersion Optional override of the target kubernetes version nil
monitoring.createPrometheusRules Whether to create the Prometheus rules for Ceph alerts false
monitoring.enabled Enable Prometheus integration, will also create necessary RBAC rules to allow Operator to create ServiceMonitors. Monitoring requires Prometheus to be pre-installed false
monitoring.prometheusRule.annotations Annotations applied to PrometheusRule {}
monitoring.prometheusRule.labels Labels applied to PrometheusRule {}
monitoring.rulesNamespaceOverride The namespace in which to create the prometheus rules, if different from the rook cluster namespace. If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus deployed) to set rulesNamespaceOverride for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions. nil
operatorNamespace Namespace of the main rook operator "rook-ceph"
pspEnable Create & use PSP resources. Set this to the same value as the rook-ceph chart. false
toolbox.affinity Toolbox affinity {}
toolbox.containerSecurityContext Toolbox container security context {"capabilities":{"drop":["ALL"]},"runAsGroup":2016,"runAsNonRoot":true,"runAsUser":2016}
toolbox.enabled Enable Ceph debugging pod deployment. See toolbox false
toolbox.image Toolbox image, defaults to the image used by the Ceph cluster nil
toolbox.priorityClassName Set the priority class for the toolbox if desired nil
toolbox.resources Toolbox resources {"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"128Mi"}}
toolbox.tolerations Toolbox tolerations []

Ceph Cluster Spec

The CephCluster CRD takes its spec from cephClusterSpec.*. This is not an exhaustive list of parameters. For the full list, see the Cluster CRD topic.

The cluster spec example is for a converged cluster where all the Ceph daemons are running locally, as in the host-based example (cluster.yaml). For a different configuration such as a PVC-based cluster (cluster-on-pvc.yaml), external cluster (cluster-external.yaml), or stretch cluster (cluster-stretched.yaml), replace this entire cephClusterSpec with the specs from those examples.

Ceph Block Pools

The cephBlockPools array in the values file will define a list of CephBlockPool as described in the table below.

Parameter Description Default
name The name of the CephBlockPool ceph-blockpool
spec The CephBlockPool spec, see the CephBlockPool documentation. {}
storageClass.enabled Whether a storage class is deployed alongside the CephBlockPool true
storageClass.isDefault Whether the storage class will be the default storage class for PVCs. See PersistentVolumeClaim documentation for details. true
storageClass.name The name of the storage class ceph-block
storageClass.parameters See Block Storage documentation or the helm values.yaml for suitable values see values.yaml
storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete
storageClass.allowVolumeExpansion Whether volume expansion is allowed by default. true
storageClass.mountOptions Specifies the mount options for storageClass []
storageClass.allowedTopologies Specifies the allowedTopologies for storageClass []

Ceph File Systems

The cephFileSystems array in the values file will define a list of CephFileSystem as described in the table below.

Parameter Description Default
name The name of the CephFileSystem ceph-filesystem
spec The CephFileSystem spec, see the CephFilesystem CRD documentation. see values.yaml
storageClass.enabled Whether a storage class is deployed alongside the CephFileSystem true
storageClass.name The name of the storage class ceph-filesystem
storageClass.pool The name of Data Pool, without the filesystem name prefix data0
storageClass.parameters See Shared Filesystem documentation or the helm values.yaml for suitable values see values.yaml
storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete
storageClass.mountOptions Specifies the mount options for storageClass []

Ceph Object Stores

The cephObjectStores array in the values file will define a list of CephObjectStore as described in the table below.

Parameter Description Default
name The name of the CephObjectStore ceph-objectstore
spec The CephObjectStore spec, see the CephObjectStore CRD documentation. see values.yaml
storageClass.enabled Whether a storage class is deployed alongside the CephObjectStore true
storageClass.name The name of the storage class ceph-bucket
storageClass.parameters See Object Store storage class documentation or the helm values.yaml for suitable values see values.yaml
storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete
ingress.enabled Enable an ingress for the object store false
ingress.annotations Ingress annotations {}
ingress.host.name Ingress hostname ""
ingress.host.path Ingress path prefix /
ingress.tls Ingress tls /
ingress.ingressClassName Ingress tls ""

Existing Clusters

If you have an existing CephCluster CR that was created without the helm chart and you want the helm chart to start managing the cluster:

  1. Extract the spec section of your existing CephCluster CR and copy to the cephClusterSpec section in values.yaml.

  2. Add the following annotations and label to your existing CephCluster CR:

1
 2
 3
 4
diff --git a/docs/rook/latest/Helm-Charts/operator-chart/index.html b/docs/rook/latest/Helm-Charts/operator-chart/index.html
index 74014a443..07569e6d1 100644
--- a/docs/rook/latest/Helm-Charts/operator-chart/index.html
+++ b/docs/rook/latest/Helm-Charts/operator-chart/index.html
@@ -1,7 +1,7 @@
  Ceph Operator Helm Chart - Rook Ceph Documentation       

Ceph Operator Helm Chart

Installs rook to create, configure, and manage Ceph clusters on Kubernetes.

Introduction

This chart bootstraps a rook-ceph-operator deployment on a Kubernetes cluster using the Helm package manager.

Prerequisites

  • Kubernetes 1.22+
  • Helm 3.x

See the Helm support matrix for more details.

Installing

The Ceph Operator helm chart will install the basic components necessary to create a storage platform for your Kubernetes cluster.

  1. Install the Helm chart
  2. Create a Rook cluster.

The helm install command deploys rook on the Kubernetes cluster in the default configuration. The configuration section lists the parameters that can be configured during installation. It is recommended that the rook operator be installed into the rook-ceph namespace (you will install your clusters into separate namespaces).

Rook currently publishes builds of the Ceph operator to the release and master channels.

Release

The release channel is the most recent release of Rook that is considered stable for the community.

helm repo add rook-release https://charts.rook.io/release
 helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph -f values.yaml
-

For example settings, see the next section or values.yaml

Configuration

The following table lists the configurable parameters of the rook-operator chart and their default values.

Parameter Description Default
admissionController Set tolerations and nodeAffinity 1 for admission controller pod. The admission controller would be best to start on the same nodes as other ceph daemons. nil
allowLoopDevices If true, loop devices are allowed to be used for osds in test clusters false
annotations Pod annotations {}
cephCommandsTimeoutSeconds The timeout for ceph commands in seconds "15"
crds.enabled Whether the helm chart should create and update the CRDs. If false, the CRDs must be managed independently with deploy/examples/crds.yaml. WARNING Only set during first deployment. If later disabled the cluster may be DESTROYED. If the CRDs are deleted in this case, see the disaster recovery guide to restore them. true
csi.allowUnsupportedVersion Allow starting an unsupported ceph-csi image false
csi.attacher.image Kubernetes CSI Attacher image registry.k8s.io/sig-storage/csi-attacher:v4.3.0
csi.cephFSAttachRequired Whether to skip any attach operation altogether for CephFS PVCs. See more details here. If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the CephFS PVC fast. WARNING It's highly discouraged to use this for CephFS RWO volumes. Refer to this issue for more details. true
csi.cephFSFSGroupPolicy Policy for modifying a volume's ownership or permissions when the CephFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html "File"
csi.cephFSKernelMountOptions Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options. Set to "ms_mode=secure" when connections.encrypted is enabled in CephCluster CR nil
csi.cephFSPluginUpdateStrategy CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate
csi.cephFSPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI cephFS plugin daemonset update strategy. 1
csi.cephcsi.image Ceph CSI image quay.io/cephcsi/cephcsi:v3.9.0
csi.cephfsGrpcMetricsPort CSI CephFS driver GRPC metrics port 9091
csi.cephfsLivenessMetricsPort CSI CephFS driver metrics port 9081
csi.cephfsPodLabels Labels to add to the CSI CephFS Deployments and DaemonSets Pods nil
csi.clusterName Cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful in cases like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster nil
csi.csiAddons.enabled Enable CSIAddons false
csi.csiAddons.image CSIAddons Sidecar image "quay.io/csiaddons/k8s-sidecar:v0.7.0"
csi.csiAddonsPort CSI Addons server port 9070
csi.csiCephFSPluginResource CEPH CSI CephFS plugin resource requirement list see values.yaml
csi.csiCephFSPluginVolume The volume of the CephCSI CephFS plugin DaemonSet nil
csi.csiCephFSPluginVolumeMount The volume mounts of the CephCSI CephFS plugin DaemonSet nil
csi.csiCephFSProvisionerResource CEPH CSI CephFS provisioner resource requirement list see values.yaml
csi.csiNFSPluginResource CEPH CSI NFS plugin resource requirement list see values.yaml
csi.csiNFSProvisionerResource CEPH CSI NFS provisioner resource requirement list see values.yaml
csi.csiRBDPluginResource CEPH CSI RBD plugin resource requirement list see values.yaml
csi.csiRBDPluginVolume The volume of the CephCSI RBD plugin DaemonSet nil
csi.csiRBDPluginVolumeMount The volume mounts of the CephCSI RBD plugin DaemonSet nil
csi.csiRBDProvisionerResource CEPH CSI RBD provisioner resource requirement list csi-omap-generator resources will be applied only if enableOMAPGenerator is set to true see values.yaml
csi.enableCSIEncryption Enable Ceph CSI PVC encryption support false
csi.enableCSIHostNetwork Enable host networking for CSI CephFS and RBD nodeplugins. This may be necessary in some network configurations where the SDN does not provide access to an external cluster or there is significant drop in read/write performance true
csi.enableCephfsDriver Enable Ceph CSI CephFS driver true
csi.enableCephfsSnapshotter Enable Snapshotter in CephFS provisioner pod true
csi.enableGrpcMetrics Enable Ceph CSI GRPC Metrics false
csi.enableLiveness Enable Ceph CSI Liveness sidecar deployment false
csi.enableMetadata Enable adding volume metadata on the CephFS subvolumes and RBD images. Not all users might be interested in getting volume/snapshot details as metadata on CephFS subvolume and RBD images. Hence enable metadata is false by default false
csi.enableNFSSnapshotter Enable Snapshotter in NFS provisioner pod true
csi.enableOMAPGenerator OMAP generator generates the omap mapping between the PV name and the RBD image which helps CSI to identify the rbd images for CSI operations. CSI_ENABLE_OMAP_GENERATOR needs to be enabled when we are using rbd mirroring feature. By default OMAP generator is disabled and when enabled, it will be deployed as a sidecar with CSI provisioner pod, to enable set it to true. false
csi.enablePluginSelinuxHostMount Enable Host mount for /etc/selinux directory for Ceph CSI nodeplugins false
csi.enableRBDSnapshotter Enable Snapshotter in RBD provisioner pod true
csi.enableRbdDriver Enable Ceph CSI RBD driver true
csi.forceCephFSKernelClient Enable Ceph Kernel clients on kernel < 4.17. If your kernel does not support quotas for CephFS you may want to disable this setting. However, this will cause an issue during upgrades with the FUSE client. See the upgrade guide true
csi.grpcTimeoutInSeconds Set GRPC timeout for csi containers (in seconds). It should be >= 120. If this value is not set or is invalid, it defaults to 150 150
csi.imagePullPolicy Image pull policy "IfNotPresent"
csi.kubeletDirPath Kubelet root directory path (if the Kubelet uses a different path for the --root-dir flag) /var/lib/kubelet
csi.logLevel Set logging level for cephCSI containers maintained by the cephCSI. Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity. 0
csi.nfs.enabled Enable the nfs csi driver false
csi.nfsAttachRequired Whether to skip any attach operation altogether for NFS PVCs. See more details here. If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the NFS PVC fast. WARNING It's highly discouraged to use this for NFS RWO volumes. Refer to this issue for more details. true
csi.nfsFSGroupPolicy Policy for modifying a volume's ownership or permissions when the NFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html "File"
csi.nfsPluginUpdateStrategy CSI NFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate
csi.nfsPodLabels Labels to add to the CSI NFS Deployments and DaemonSets Pods nil
csi.pluginNodeAffinity The node labels for affinity of the CephCSI RBD plugin DaemonSet 1 nil
csi.pluginPriorityClassName PriorityClassName to be set on csi driver plugin pods "system-node-critical"
csi.pluginTolerations Array of tolerations in YAML format which will be added to CephCSI plugin DaemonSet nil
csi.provisioner.image Kubernetes CSI provisioner image registry.k8s.io/sig-storage/csi-provisioner:v3.5.0
csi.provisionerNodeAffinity The node labels for affinity of the CSI provisioner deployment 1 nil
csi.provisionerPriorityClassName PriorityClassName to be set on csi driver provisioner pods "system-cluster-critical"
csi.provisionerReplicas Set replicas for csi provisioner deployment 2
csi.provisionerTolerations Array of tolerations in YAML format which will be added to CSI provisioner deployment nil
csi.rbdAttachRequired Whether to skip any attach operation altogether for RBD PVCs. See more details here. If set to false it skips the volume attachments and makes the creation of pods using the RBD PVC fast. WARNING It's highly discouraged to use this for RWO volumes as it can cause data corruption. csi-addons operations like Reclaimspace and PVC Keyrotation will also not be supported if set to false since we'll have no VolumeAttachments to determine which node the PVC is mounted on. Refer to this issue for more details. true
csi.rbdFSGroupPolicy Policy for modifying a volume's ownership or permissions when the RBD PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html "File"
csi.rbdGrpcMetricsPort Ceph CSI RBD driver GRPC metrics port 9090
csi.rbdLivenessMetricsPort Ceph CSI RBD driver metrics port 8080
csi.rbdPluginUpdateStrategy CSI RBD plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate
csi.rbdPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI RBD plugin daemonset update strategy. 1
csi.rbdPodLabels Labels to add to the CSI RBD Deployments and DaemonSets Pods nil
csi.readAffinity.crushLocationLabels Define which node labels to use as CRUSH location. This should correspond to the values set in the CRUSH map. labels listed here
csi.readAffinity.enabled Enable read affinity for RBD volumes. Recommended to set to true if running kernel 5.8 or newer. false
csi.registrar.image Kubernetes CSI registrar image registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.8.0
csi.resizer.image Kubernetes CSI resizer image registry.k8s.io/sig-storage/csi-resizer:v1.8.0
csi.serviceMonitor.enabled Enable ServiceMonitor for Ceph CSI drivers false
csi.serviceMonitor.interval Service monitor scrape interval "5s"
csi.serviceMonitor.labels ServiceMonitor additional labels {}
csi.sidecarLogLevel Set logging level for Kubernetes-csi sidecar containers. Supported values from 0 to 5. 0 for general useful logs (the default), 5 for trace level verbosity. 0
csi.snapshotter.image Kubernetes CSI snapshotter image registry.k8s.io/sig-storage/csi-snapshotter:v6.2.2
csi.topology.domainLabels domainLabels define which node labels to use as domains for CSI nodeplugins to advertise their domains nil
csi.topology.enabled Enable topology based provisioning false
currentNamespaceOnly Whether the operator should watch cluster CRD in its own namespace or not false
disableAdmissionController Whether to disable the admission controller true
disableDeviceHotplug Disable automatic orchestration when new devices are discovered. false
discover.nodeAffinity The node labels for affinity of discover-agent 1 nil
discover.podLabels Labels to add to the discover pods nil
discover.resources Add resources to discover daemon pods nil
discover.toleration Toleration for the discover pods. Options: NoSchedule, PreferNoSchedule or NoExecute nil
discover.tolerationKey The specific key of the taint to tolerate nil
discover.tolerations Array of tolerations in YAML format which will be added to discover deployment nil
discoverDaemonUdev Blacklist certain disks according to the regex provided. nil
enableDiscoveryDaemon Enable discovery daemon false
enableOBCWatchOperatorNamespace Whether the OBC provisioner should watch on the operator namespace or not, if not the namespace of the cluster will be used true
hostpathRequiresPrivileged Runs Ceph Pods as privileged to be able to write to hostPaths in OpenShift with SELinux restrictions. false
image.pullPolicy Image pull policy "IfNotPresent"
image.repository Image "rook/ceph"
image.tag Image tag master
imagePullSecrets imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts. nil
logLevel Global log level for the operator. Options: ERROR, WARNING, INFO, DEBUG "INFO"
monitoring.enabled Enable monitoring. Requires Prometheus to be pre-installed. Enabling will also create RBAC rules to allow Operator to create ServiceMonitors false
nodeSelector Kubernetes nodeSelector to add to the Deployment. {}
priorityClassName Set the priority class for the rook operator deployment if desired nil
pspEnable If true, create & use PSP resources false
rbacEnable If true, create & use RBAC resources true
resources Pod resource requests & limits {"limits":{"cpu":"500m","memory":"512Mi"},"requests":{"cpu":"100m","memory":"128Mi"}}
scaleDownOperator If true, scale down the rook operator. This is useful for administrative actions where the rook operator must be scaled down, while using gitops style tooling to deploy your helm charts. false
tolerations List of Kubernetes tolerations to add to the Deployment. []
unreachableNodeTolerationSeconds Delay to use for the node.kubernetes.io/unreachable pod failure toleration to override the Kubernetes default of 5 minutes 5
useOperatorHostNetwork If true, run rook operator on the host network nil

Development Build

To deploy from a local build from your development environment:

  1. Build the Rook docker image: make
  2. Copy the image to your K8s cluster, such as with the docker save then the docker load commands
  3. Install the helm chart:
1
+

For example settings, see the next section or values.yaml

Configuration

The following table lists the configurable parameters of the rook-operator chart and their default values.

Parameter Description Default
admissionController Set tolerations and nodeAffinity 1 for admission controller pod. The admission controller would be best to start on the same nodes as other ceph daemons. nil
allowLoopDevices If true, loop devices are allowed to be used for osds in test clusters false
annotations Pod annotations {}
cephCommandsTimeoutSeconds The timeout for ceph commands in seconds "15"
containerSecurityContext Set the container security context for the operator {"capabilities":{"drop":["ALL"]},"runAsGroup":2016,"runAsNonRoot":true,"runAsUser":2016}
crds.enabled Whether the helm chart should create and update the CRDs. If false, the CRDs must be managed independently with deploy/examples/crds.yaml. WARNING Only set during first deployment. If later disabled the cluster may be DESTROYED. If the CRDs are deleted in this case, see the disaster recovery guide to restore them. true
csi.allowUnsupportedVersion Allow starting an unsupported ceph-csi image false
csi.attacher.image Kubernetes CSI Attacher image registry.k8s.io/sig-storage/csi-attacher:v4.3.0
csi.cephFSAttachRequired Whether to skip any attach operation altogether for CephFS PVCs. See more details here. If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the CephFS PVC fast. WARNING It's highly discouraged to use this for CephFS RWO volumes. Refer to this issue for more details. true
csi.cephFSFSGroupPolicy Policy for modifying a volume's ownership or permissions when the CephFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html "File"
csi.cephFSKernelMountOptions Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options. Set to "ms_mode=secure" when connections.encrypted is enabled in CephCluster CR nil
csi.cephFSPluginUpdateStrategy CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate
csi.cephFSPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI cephFS plugin daemonset update strategy. 1
csi.cephcsi.image Ceph CSI image quay.io/cephcsi/cephcsi:v3.9.0
csi.cephfsGrpcMetricsPort CSI CephFS driver GRPC metrics port 9091
csi.cephfsLivenessMetricsPort CSI CephFS driver metrics port 9081
csi.cephfsPodLabels Labels to add to the CSI CephFS Deployments and DaemonSets Pods nil
csi.clusterName Cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful in cases like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster nil
csi.csiAddons.enabled Enable CSIAddons false
csi.csiAddons.image CSIAddons Sidecar image "quay.io/csiaddons/k8s-sidecar:v0.7.0"
csi.csiAddonsPort CSI Addons server port 9070
csi.csiCephFSPluginResource CEPH CSI CephFS plugin resource requirement list see values.yaml
csi.csiCephFSPluginVolume The volume of the CephCSI CephFS plugin DaemonSet nil
csi.csiCephFSPluginVolumeMount The volume mounts of the CephCSI CephFS plugin DaemonSet nil
csi.csiCephFSProvisionerResource CEPH CSI CephFS provisioner resource requirement list see values.yaml
csi.csiNFSPluginResource CEPH CSI NFS plugin resource requirement list see values.yaml
csi.csiNFSProvisionerResource CEPH CSI NFS provisioner resource requirement list see values.yaml
csi.csiRBDPluginResource CEPH CSI RBD plugin resource requirement list see values.yaml
csi.csiRBDPluginVolume The volume of the CephCSI RBD plugin DaemonSet nil
csi.csiRBDPluginVolumeMount The volume mounts of the CephCSI RBD plugin DaemonSet nil
csi.csiRBDProvisionerResource CEPH CSI RBD provisioner resource requirement list csi-omap-generator resources will be applied only if enableOMAPGenerator is set to true see values.yaml
csi.enableCSIEncryption Enable Ceph CSI PVC encryption support false
csi.enableCSIHostNetwork Enable host networking for CSI CephFS and RBD nodeplugins. This may be necessary in some network configurations where the SDN does not provide access to an external cluster or there is significant drop in read/write performance true
csi.enableCephfsDriver Enable Ceph CSI CephFS driver true
csi.enableCephfsSnapshotter Enable Snapshotter in CephFS provisioner pod true
csi.enableGrpcMetrics Enable Ceph CSI GRPC Metrics false
csi.enableLiveness Enable Ceph CSI Liveness sidecar deployment false
csi.enableMetadata Enable adding volume metadata on the CephFS subvolumes and RBD images. Not all users might be interested in getting volume/snapshot details as metadata on CephFS subvolume and RBD images. Hence enable metadata is false by default false
csi.enableNFSSnapshotter Enable Snapshotter in NFS provisioner pod true
csi.enableOMAPGenerator OMAP generator generates the omap mapping between the PV name and the RBD image which helps CSI to identify the rbd images for CSI operations. CSI_ENABLE_OMAP_GENERATOR needs to be enabled when we are using rbd mirroring feature. By default OMAP generator is disabled and when enabled, it will be deployed as a sidecar with CSI provisioner pod, to enable set it to true. false
csi.enablePluginSelinuxHostMount Enable Host mount for /etc/selinux directory for Ceph CSI nodeplugins false
csi.enableRBDSnapshotter Enable Snapshotter in RBD provisioner pod true
csi.enableRbdDriver Enable Ceph CSI RBD driver true
csi.forceCephFSKernelClient Enable Ceph Kernel clients on kernel < 4.17. If your kernel does not support quotas for CephFS you may want to disable this setting. However, this will cause an issue during upgrades with the FUSE client. See the upgrade guide true
csi.grpcTimeoutInSeconds Set GRPC timeout for csi containers (in seconds). It should be >= 120. If this value is not set or is invalid, it defaults to 150 150
csi.imagePullPolicy Image pull policy "IfNotPresent"
csi.kubeletDirPath Kubelet root directory path (if the Kubelet uses a different path for the --root-dir flag) /var/lib/kubelet
csi.logLevel Set logging level for cephCSI containers maintained by the cephCSI. Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity. 0
csi.nfs.enabled Enable the nfs csi driver false
csi.nfsAttachRequired Whether to skip any attach operation altogether for NFS PVCs. See more details here. If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the NFS PVC fast. WARNING It's highly discouraged to use this for NFS RWO volumes. Refer to this issue for more details. true
csi.nfsFSGroupPolicy Policy for modifying a volume's ownership or permissions when the NFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html "File"
csi.nfsPluginUpdateStrategy CSI NFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate
csi.nfsPodLabels Labels to add to the CSI NFS Deployments and DaemonSets Pods nil
csi.pluginNodeAffinity The node labels for affinity of the CephCSI RBD plugin DaemonSet 1 nil
csi.pluginPriorityClassName PriorityClassName to be set on csi driver plugin pods "system-node-critical"
csi.pluginTolerations Array of tolerations in YAML format which will be added to CephCSI plugin DaemonSet nil
csi.provisioner.image Kubernetes CSI provisioner image registry.k8s.io/sig-storage/csi-provisioner:v3.5.0
csi.provisionerNodeAffinity The node labels for affinity of the CSI provisioner deployment 1 nil
csi.provisionerPriorityClassName PriorityClassName to be set on csi driver provisioner pods "system-cluster-critical"
csi.provisionerReplicas Set replicas for csi provisioner deployment 2
csi.provisionerTolerations Array of tolerations in YAML format which will be added to CSI provisioner deployment nil
csi.rbdAttachRequired Whether to skip any attach operation altogether for RBD PVCs. See more details here. If set to false it skips the volume attachments and makes the creation of pods using the RBD PVC fast. WARNING It's highly discouraged to use this for RWO volumes as it can cause data corruption. csi-addons operations like Reclaimspace and PVC Keyrotation will also not be supported if set to false since we'll have no VolumeAttachments to determine which node the PVC is mounted on. Refer to this issue for more details. true
csi.rbdFSGroupPolicy Policy for modifying a volume's ownership or permissions when the RBD PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html "File"
csi.rbdGrpcMetricsPort Ceph CSI RBD driver GRPC metrics port 9090
csi.rbdLivenessMetricsPort Ceph CSI RBD driver metrics port 8080
csi.rbdPluginUpdateStrategy CSI RBD plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate
csi.rbdPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI RBD plugin daemonset update strategy. 1
csi.rbdPodLabels Labels to add to the CSI RBD Deployments and DaemonSets Pods nil
csi.readAffinity.crushLocationLabels Define which node labels to use as CRUSH location. This should correspond to the values set in the CRUSH map. labels listed here
csi.readAffinity.enabled Enable read affinity for RBD volumes. Recommended to set to true if running kernel 5.8 or newer. false
csi.registrar.image Kubernetes CSI registrar image registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.8.0
csi.resizer.image Kubernetes CSI resizer image registry.k8s.io/sig-storage/csi-resizer:v1.8.0
csi.serviceMonitor.enabled Enable ServiceMonitor for Ceph CSI drivers false
csi.serviceMonitor.interval Service monitor scrape interval "5s"
csi.serviceMonitor.labels ServiceMonitor additional labels {}
csi.sidecarLogLevel Set logging level for Kubernetes-csi sidecar containers. Supported values from 0 to 5. 0 for general useful logs (the default), 5 for trace level verbosity. 0
csi.snapshotter.image Kubernetes CSI snapshotter image registry.k8s.io/sig-storage/csi-snapshotter:v6.2.2
csi.topology.domainLabels domainLabels define which node labels to use as domains for CSI nodeplugins to advertise their domains nil
csi.topology.enabled Enable topology based provisioning false
currentNamespaceOnly Whether the operator should watch cluster CRD in its own namespace or not false
disableAdmissionController Whether to disable the admission controller true
disableDeviceHotplug Disable automatic orchestration when new devices are discovered. false
discover.nodeAffinity The node labels for affinity of discover-agent 1 nil
discover.podLabels Labels to add to the discover pods nil
discover.resources Add resources to discover daemon pods nil
discover.toleration Toleration for the discover pods. Options: NoSchedule, PreferNoSchedule or NoExecute nil
discover.tolerationKey The specific key of the taint to tolerate nil
discover.tolerations Array of tolerations in YAML format which will be added to discover deployment nil
discoverDaemonUdev Blacklist certain disks according to the regex provided. nil
enableDiscoveryDaemon Enable discovery daemon false
enableOBCWatchOperatorNamespace Whether the OBC provisioner should watch on the operator namespace or not, if not the namespace of the cluster will be used true
hostpathRequiresPrivileged Runs Ceph Pods as privileged to be able to write to hostPaths in OpenShift with SELinux restrictions. false
image.pullPolicy Image pull policy "IfNotPresent"
image.repository Image "rook/ceph"
image.tag Image tag master
imagePullSecrets imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts. nil
logLevel Global log level for the operator. Options: ERROR, WARNING, INFO, DEBUG "INFO"
monitoring.enabled Enable monitoring. Requires Prometheus to be pre-installed. Enabling will also create RBAC rules to allow Operator to create ServiceMonitors false
nodeSelector Kubernetes nodeSelector to add to the Deployment. {}
priorityClassName Set the priority class for the rook operator deployment if desired nil
pspEnable If true, create & use PSP resources false
rbacEnable If true, create & use RBAC resources true
resources Pod resource requests & limits {"limits":{"cpu":"500m","memory":"512Mi"},"requests":{"cpu":"100m","memory":"128Mi"}}
scaleDownOperator If true, scale down the rook operator. This is useful for administrative actions where the rook operator must be scaled down, while using gitops style tooling to deploy your helm charts. false
tolerations List of Kubernetes tolerations to add to the Deployment. []
unreachableNodeTolerationSeconds Delay to use for the node.kubernetes.io/unreachable pod failure toleration to override the Kubernetes default of 5 minutes 5
useOperatorHostNetwork If true, run rook operator on the host network nil

Development Build

To deploy from a local build from your development environment:

  1. Build the Rook docker image: make
  2. Copy the image to your K8s cluster, such as with the docker save then the docker load commands
  3. Install the helm chart:
cd deploy/charts/rook-ceph
 helm install --create-namespace --namespace rook-ceph rook-ceph .
 

Uninstalling the Chart

To see the currently installed Rook chart:

helm ls --namespace rook-ceph
diff --git a/docs/rook/latest/search/search_index.json b/docs/rook/latest/search/search_index.json
index 05468d95d..b46dd6eda 100644
--- a/docs/rook/latest/search/search_index.json
+++ b/docs/rook/latest/search/search_index.json
@@ -1 +1 @@
-{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"CRDs/ceph-client-crd/","text":"Rook allows creation and updating clients through the custom resource definitions (CRDs). For more information about user management and capabilities see the Ceph docs . Use Case: Connecting to Ceph \u00b6 Use Client CRD in case you want to integrate Rook with applications that are using LibRBD directly. For example for OpenStack deployment with Ceph backend use Client CRD to create OpenStack services users. The Client CRD is not needed for Flex or CSI driver users. The drivers create the needed users automatically. Prerequisites \u00b6 This guide assumes you have created a Rook cluster as explained in the main Quickstart guide . 1. Creating Ceph User \u00b6 To get you started, here is a simple example of a CRD to configure a Ceph client with capabilities. 1 2 3 4 5 6 7 8 9 10 --- apiVersion : ceph.rook.io/v1 kind : CephClient metadata : name : example namespace : rook-ceph spec : caps : mon : 'profile rbd, allow r' osd : 'profile rbd pool=volumes, profile rbd pool=vms, profile rbd-read-only pool=images' To use CephClient to connect to a Ceph cluster: 2. Find the generated secret for the CephClient \u00b6 Once your CephClient has been processed by Rook, it will be updated to include your secret: 1 kubectl -n rook-ceph get cephclient example -o jsonpath='{.status.info.secretName}' 3. Extract Ceph cluster credentials from the generated secret \u00b6 Extract Ceph cluster credentials from the generated secret (note that the subkey will be your original client name): 1 kubectl --namespace rook-ceph get secret rook-ceph-client-example -o jsonpath=\"{.data.example}\" | base64 -d The base64 encoded value that is returned is the password for your ceph client. 4. Retrieve the mon endpoints \u00b6 To send writes to the cluster, you must retrieve the mons in use: 1 kubectl --namespace rook-ceph get configmap rook-ceph-mon-endpoints -o jsonpath='{.data.data}' | sed 's/.=//g'` This command should produce a line that looks somewhat like this: 1 10.107.72.122:6789,10.103.244.218:6789,10.99.33.227:6789 5. (optional) Generate Ceph configuration files \u00b6 If you choose to generate files for Ceph to use you will need to generate the following files: General configuration file (ex. ceph.conf ) Keyring file (ex. ceph.keyring ) Examples of the files follow: ceph.conf 1 2 3 [global] mon_host = 10.107.72.122:6789,10.103.244.218:6789,10.99.33.227:6789 log file = /tmp/ceph-$pid.log ceph.keyring 1 2 3 4 5 6 [client.example] key = < key, decoded from k8s secret> # The caps below are for a rbd workload -- you may need to edit/modify these capabilities for other workloads # see https://docs.ceph.com/en/latest/cephfs/capabilities caps mon = 'allow r' caps osd = 'profile rbd pool=, profile rb pool=' 6. Connect to the Ceph cluster with your given client ID \u00b6 With the files we've created, you should be able to query the cluster by setting Ceph ENV variables and running ceph status : 1 2 3 4 export CEPH_CONF=/libsqliteceph/ceph.conf; export CEPH_KEYRING=/libsqliteceph/ceph.keyring; export CEPH_ARGS=--id example; ceph status With this config, the ceph tools ( ceph CLI, in-program access, etc) can connect to and utilize the Ceph cluster. Use Case: SQLite \u00b6 The Ceph project contains a SQLite VFS that interacts with RBD directly, called libcephsqlite . First, on your workload ensure that you have the appropriate packages installed that make libcephsqlite.so available: ceph on Alpine libsqlite3-mod-ceph on Ubuntu libcephsqlite on Fedora ceph on CentOS Without the appropriate package (or a from-scratch build of SQLite), you will be unable to load libcephsqlite.so . After creating a CephClient similar to deploy/examples/sqlitevfs-client.yaml and retrieving it's credentials, you may set the following ENV variables: 1 2 3 export CEPH_CONF=/libsqliteceph/ceph.conf; export CEPH_KEYRING=/libsqliteceph/ceph.keyring; export CEPH_ARGS=--id sqlitevfs Then start your SQLite database: 1 2 3 sqlite> .load libcephsqlite.so sqlite> .open file:///poolname:/test.db?vfs=ceph sqlite> If those lines complete without error, you have successfully set up SQLite to access Ceph. See the libcephsqlite documentation for more information on the file URL format.","title":"CephClient CRD"},{"location":"CRDs/ceph-client-crd/#use-case-connecting-to-ceph","text":"Use Client CRD in case you want to integrate Rook with applications that are using LibRBD directly. For example for OpenStack deployment with Ceph backend use Client CRD to create OpenStack services users. The Client CRD is not needed for Flex or CSI driver users. The drivers create the needed users automatically.","title":"Use Case: Connecting to Ceph"},{"location":"CRDs/ceph-client-crd/#prerequisites","text":"This guide assumes you have created a Rook cluster as explained in the main Quickstart guide .","title":"Prerequisites"},{"location":"CRDs/ceph-client-crd/#1-creating-ceph-user","text":"To get you started, here is a simple example of a CRD to configure a Ceph client with capabilities. 1 2 3 4 5 6 7 8 9 10 --- apiVersion : ceph.rook.io/v1 kind : CephClient metadata : name : example namespace : rook-ceph spec : caps : mon : 'profile rbd, allow r' osd : 'profile rbd pool=volumes, profile rbd pool=vms, profile rbd-read-only pool=images' To use CephClient to connect to a Ceph cluster:","title":"1. Creating Ceph User"},{"location":"CRDs/ceph-client-crd/#2-find-the-generated-secret-for-the-cephclient","text":"Once your CephClient has been processed by Rook, it will be updated to include your secret: 1 kubectl -n rook-ceph get cephclient example -o jsonpath='{.status.info.secretName}'","title":"2. Find the generated secret for the CephClient"},{"location":"CRDs/ceph-client-crd/#3-extract-ceph-cluster-credentials-from-the-generated-secret","text":"Extract Ceph cluster credentials from the generated secret (note that the subkey will be your original client name): 1 kubectl --namespace rook-ceph get secret rook-ceph-client-example -o jsonpath=\"{.data.example}\" | base64 -d The base64 encoded value that is returned is the password for your ceph client.","title":"3. Extract Ceph cluster credentials from the generated secret"},{"location":"CRDs/ceph-client-crd/#4-retrieve-the-mon-endpoints","text":"To send writes to the cluster, you must retrieve the mons in use: 1 kubectl --namespace rook-ceph get configmap rook-ceph-mon-endpoints -o jsonpath='{.data.data}' | sed 's/.=//g'` This command should produce a line that looks somewhat like this: 1 10.107.72.122:6789,10.103.244.218:6789,10.99.33.227:6789","title":"4. Retrieve the mon endpoints"},{"location":"CRDs/ceph-client-crd/#5-optional-generate-ceph-configuration-files","text":"If you choose to generate files for Ceph to use you will need to generate the following files: General configuration file (ex. ceph.conf ) Keyring file (ex. ceph.keyring ) Examples of the files follow: ceph.conf 1 2 3 [global] mon_host = 10.107.72.122:6789,10.103.244.218:6789,10.99.33.227:6789 log file = /tmp/ceph-$pid.log ceph.keyring 1 2 3 4 5 6 [client.example] key = < key, decoded from k8s secret> # The caps below are for a rbd workload -- you may need to edit/modify these capabilities for other workloads # see https://docs.ceph.com/en/latest/cephfs/capabilities caps mon = 'allow r' caps osd = 'profile rbd pool=, profile rb pool='","title":"5. (optional) Generate Ceph configuration files"},{"location":"CRDs/ceph-client-crd/#6-connect-to-the-ceph-cluster-with-your-given-client-id","text":"With the files we've created, you should be able to query the cluster by setting Ceph ENV variables and running ceph status : 1 2 3 4 export CEPH_CONF=/libsqliteceph/ceph.conf; export CEPH_KEYRING=/libsqliteceph/ceph.keyring; export CEPH_ARGS=--id example; ceph status With this config, the ceph tools ( ceph CLI, in-program access, etc) can connect to and utilize the Ceph cluster.","title":"6. Connect to the Ceph cluster with your given client ID"},{"location":"CRDs/ceph-client-crd/#use-case-sqlite","text":"The Ceph project contains a SQLite VFS that interacts with RBD directly, called libcephsqlite . First, on your workload ensure that you have the appropriate packages installed that make libcephsqlite.so available: ceph on Alpine libsqlite3-mod-ceph on Ubuntu libcephsqlite on Fedora ceph on CentOS Without the appropriate package (or a from-scratch build of SQLite), you will be unable to load libcephsqlite.so . After creating a CephClient similar to deploy/examples/sqlitevfs-client.yaml and retrieving it's credentials, you may set the following ENV variables: 1 2 3 export CEPH_CONF=/libsqliteceph/ceph.conf; export CEPH_KEYRING=/libsqliteceph/ceph.keyring; export CEPH_ARGS=--id sqlitevfs Then start your SQLite database: 1 2 3 sqlite> .load libcephsqlite.so sqlite> .open file:///poolname:/test.db?vfs=ceph sqlite> If those lines complete without error, you have successfully set up SQLite to access Ceph. See the libcephsqlite documentation for more information on the file URL format.","title":"Use Case: SQLite"},{"location":"CRDs/ceph-nfs-crd/","text":"Rook allows exporting NFS shares of a CephFilesystem or CephObjectStore through the CephNFS custom resource definition. Example \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 apiVersion : ceph.rook.io/v1 kind : CephNFS metadata : name : my-nfs namespace : rook-ceph spec : # Settings for the NFS server server : active : 1 placement : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - nfs-node topologySpreadConstraints : tolerations : - key : nfs-node operator : Exists podAffinity : podAntiAffinity : annotations : my-annotation : something labels : my-label : something resources : limits : cpu : \"3\" memory : \"8Gi\" requests : cpu : \"3\" memory : \"8Gi\" priorityClassName : \"\" logLevel : NIV_INFO security : kerberos : principalName : \"nfs\" domainName : \"DOMAIN1.EXAMPLE.COM\" configFiles : volumeSource : configMap : name : my-krb5-config-files keytabFile : volumeSource : secret : secretName : my-nfs-keytab defaultMode : 0600 # mode must be 0600 sssd : sidecar : image : registry.access.redhat.com/rhel7/sssd:latest sssdConfigFile : volumeSource : configMap : name : my-nfs-sssd-config defaultMode : 0600 # mode must be 0600 debugLevel : 0 resources : {} NFS Settings \u00b6 Server \u00b6 The server spec sets configuration for Rook-created NFS-Ganesha server pods. active : The number of active NFS servers. Rook supports creating more than one active NFS server, but cannot guarantee high availability. For values greater than 1, see the known issue below. placement : Kubernetes placement restrictions to apply to NFS server Pod(s). This is similar to placement defined for daemons configured by the CephCluster CRD . annotations : Kubernetes annotations to apply to NFS server Pod(s) labels : Kubernetes labels to apply to NFS server Pod(s) resources : Kubernetes resource requests and limits to set on NFS server containers priorityClassName : Set priority class name for the NFS server Pod(s) logLevel : The log level that NFS-Ganesha servers should output. Default value: NIV_INFO Supported values: NIV_NULL | NIV_FATAL | NIV_MAJ | NIV_CRIT | NIV_WARN | NIV_EVENT | NIV_INFO | NIV_DEBUG | NIV_MID_DEBUG | NIV_FULL_DEBUG | NB_LOG_LEVEL hostNetwork : Whether host networking is enabled for the NFS server pod(s). If not set, the network settings from the CephCluster CR will be applied. Security \u00b6 The security spec sets security configuration for the NFS cluster. kerberos : Kerberos configures NFS-Ganesha to secure NFS client connections with Kerberos. principalName : this value is combined with (a) the namespace and name of the CephNFS (with a hyphen between) and (b) the Realm configured in the user-provided kerberos config file(s) to determine the full service principal name: /-@ . e.g., nfs/rook-ceph-my-nfs@example.net. For full details, see the NFS security doc . domainName : this is the domain name used in the kerberos credentials. This is used to configure idmap to map the kerberos credentials to uid/gid. Without this configured, NFS-Ganesha will use the anonuid/anongid configured (default: -2) when accessing the local filesystem. eg., DOMAIN1.EXAMPLE.COM. NFS security doc . configFiles : defines where the Kerberos configuration should be sourced from. Config files will be placed into the /etc/krb5.conf.rook/ directory. For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource for Kerberos configuration files like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. The volume may contain multiple files, all of which will be loaded. keytabFile : defines where the Kerberos keytab should be sourced from. The keytab file will be placed into /etc/krb5.keytab . For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource for the Kerberos keytab file like what is normally used to configure Volumes for a Pod. For example, a Secret or HostPath. There are two requirements for the source's content: The config file must be mountable via subPath: krb5.keytab . For example, in a Secret, the data item must be named krb5.keytab , or items must be defined to select the key and give it path krb5.keytab . A HostPath directory must have the krb5.keytab file. The volume or config file must have mode 0600. sssd : SSSD enables integration with System Security Services Daemon (SSSD). See also: ID mapping via SSSD . sidecar : Specifying this configuration tells Rook to run SSSD in a sidecar alongside the NFS server in each NFS pod. image : defines the container image that should be used for the SSSD sidecar. sssdConfigFile : defines where the SSSD configuration should be sourced from. The config file will be placed into /etc/sssd/sssd.conf . For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. There are two requirements for the source's content: The config file must be mountable via subPath: sssd.conf . For example, in a ConfigMap, the data item must be named sssd.conf , or items must be defined to select the key and give it path sssd.conf . A HostPath directory must have the sssd.conf file. The volume or config file must have mode 0600. additionalFiles : adds any number of additional files into the SSSD sidecar. All files will be placed into /etc/sssd/rook-additional/ and can be referenced by the SSSD config file. For example, CA and/or TLS certificates to authenticate with Kerberos. subPath : the sub-path of /etc/sssd/rook-additional to add files into. This can include / to create arbitrarily deep sub-paths if desired. If the volumeSource is a file, this will refer to a file name. volumeSource : this is a standard Kubernetes VolumeSource for additional files like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. The volume may contain multiple files, a single file, or may be a file on its own (e.g., a host path with type: File ). debugLevel : sets the debug level for SSSD. If unset or 0 , Rook does nothing. Otherwise, this may be a value between 1 and 10. See the SSSD docs for more info. resources : Kubernetes resource requests and limits to set on NFS server containers Scaling the active server count \u00b6 It is possible to scale the size of the cluster up or down by modifying the spec.server.active field. Scaling the cluster size up can be done at will. Once the new server comes up, clients can be assigned to it immediately. The CRD always eliminates the highest index servers first, in reverse order from how they were started. Scaling down the cluster requires that clients be migrated from servers that will be eliminated to others. That process is currently a manual one and should be performed before reducing the size of the cluster. Warning See the known issue below about setting this value greater than one. Known issues \u00b6 server.active count greater than 1 \u00b6 Active-active scale out does not work well with the NFS protocol. If one NFS server in a cluster is offline, other servers may block client requests until the offline server returns, which may not always happen due to the Kubernetes scheduler. Workaround: It is safest to run only a single NFS server, but we do not limit this if it benefits your use case. Ceph v17.2.1 \u00b6 Ceph NFS management with the Rook mgr module enabled has a breaking regression with the Ceph Quincy v17.2.1 release. Workaround: Leave Ceph's Rook orchestrator mgr module disabled. If you have enabled it, you must disable it using the snippet below from the toolbox. 1 2 ceph orch set backend \"\" ceph mgr module disable rook","title":"CephNFS CRD"},{"location":"CRDs/ceph-nfs-crd/#example","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 apiVersion : ceph.rook.io/v1 kind : CephNFS metadata : name : my-nfs namespace : rook-ceph spec : # Settings for the NFS server server : active : 1 placement : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - nfs-node topologySpreadConstraints : tolerations : - key : nfs-node operator : Exists podAffinity : podAntiAffinity : annotations : my-annotation : something labels : my-label : something resources : limits : cpu : \"3\" memory : \"8Gi\" requests : cpu : \"3\" memory : \"8Gi\" priorityClassName : \"\" logLevel : NIV_INFO security : kerberos : principalName : \"nfs\" domainName : \"DOMAIN1.EXAMPLE.COM\" configFiles : volumeSource : configMap : name : my-krb5-config-files keytabFile : volumeSource : secret : secretName : my-nfs-keytab defaultMode : 0600 # mode must be 0600 sssd : sidecar : image : registry.access.redhat.com/rhel7/sssd:latest sssdConfigFile : volumeSource : configMap : name : my-nfs-sssd-config defaultMode : 0600 # mode must be 0600 debugLevel : 0 resources : {}","title":"Example"},{"location":"CRDs/ceph-nfs-crd/#nfs-settings","text":"","title":"NFS Settings"},{"location":"CRDs/ceph-nfs-crd/#server","text":"The server spec sets configuration for Rook-created NFS-Ganesha server pods. active : The number of active NFS servers. Rook supports creating more than one active NFS server, but cannot guarantee high availability. For values greater than 1, see the known issue below. placement : Kubernetes placement restrictions to apply to NFS server Pod(s). This is similar to placement defined for daemons configured by the CephCluster CRD . annotations : Kubernetes annotations to apply to NFS server Pod(s) labels : Kubernetes labels to apply to NFS server Pod(s) resources : Kubernetes resource requests and limits to set on NFS server containers priorityClassName : Set priority class name for the NFS server Pod(s) logLevel : The log level that NFS-Ganesha servers should output. Default value: NIV_INFO Supported values: NIV_NULL | NIV_FATAL | NIV_MAJ | NIV_CRIT | NIV_WARN | NIV_EVENT | NIV_INFO | NIV_DEBUG | NIV_MID_DEBUG | NIV_FULL_DEBUG | NB_LOG_LEVEL hostNetwork : Whether host networking is enabled for the NFS server pod(s). If not set, the network settings from the CephCluster CR will be applied.","title":"Server"},{"location":"CRDs/ceph-nfs-crd/#security","text":"The security spec sets security configuration for the NFS cluster. kerberos : Kerberos configures NFS-Ganesha to secure NFS client connections with Kerberos. principalName : this value is combined with (a) the namespace and name of the CephNFS (with a hyphen between) and (b) the Realm configured in the user-provided kerberos config file(s) to determine the full service principal name: /-@ . e.g., nfs/rook-ceph-my-nfs@example.net. For full details, see the NFS security doc . domainName : this is the domain name used in the kerberos credentials. This is used to configure idmap to map the kerberos credentials to uid/gid. Without this configured, NFS-Ganesha will use the anonuid/anongid configured (default: -2) when accessing the local filesystem. eg., DOMAIN1.EXAMPLE.COM. NFS security doc . configFiles : defines where the Kerberos configuration should be sourced from. Config files will be placed into the /etc/krb5.conf.rook/ directory. For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource for Kerberos configuration files like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. The volume may contain multiple files, all of which will be loaded. keytabFile : defines where the Kerberos keytab should be sourced from. The keytab file will be placed into /etc/krb5.keytab . For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource for the Kerberos keytab file like what is normally used to configure Volumes for a Pod. For example, a Secret or HostPath. There are two requirements for the source's content: The config file must be mountable via subPath: krb5.keytab . For example, in a Secret, the data item must be named krb5.keytab , or items must be defined to select the key and give it path krb5.keytab . A HostPath directory must have the krb5.keytab file. The volume or config file must have mode 0600. sssd : SSSD enables integration with System Security Services Daemon (SSSD). See also: ID mapping via SSSD . sidecar : Specifying this configuration tells Rook to run SSSD in a sidecar alongside the NFS server in each NFS pod. image : defines the container image that should be used for the SSSD sidecar. sssdConfigFile : defines where the SSSD configuration should be sourced from. The config file will be placed into /etc/sssd/sssd.conf . For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. There are two requirements for the source's content: The config file must be mountable via subPath: sssd.conf . For example, in a ConfigMap, the data item must be named sssd.conf , or items must be defined to select the key and give it path sssd.conf . A HostPath directory must have the sssd.conf file. The volume or config file must have mode 0600. additionalFiles : adds any number of additional files into the SSSD sidecar. All files will be placed into /etc/sssd/rook-additional/ and can be referenced by the SSSD config file. For example, CA and/or TLS certificates to authenticate with Kerberos. subPath : the sub-path of /etc/sssd/rook-additional to add files into. This can include / to create arbitrarily deep sub-paths if desired. If the volumeSource is a file, this will refer to a file name. volumeSource : this is a standard Kubernetes VolumeSource for additional files like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. The volume may contain multiple files, a single file, or may be a file on its own (e.g., a host path with type: File ). debugLevel : sets the debug level for SSSD. If unset or 0 , Rook does nothing. Otherwise, this may be a value between 1 and 10. See the SSSD docs for more info. resources : Kubernetes resource requests and limits to set on NFS server containers","title":"Security"},{"location":"CRDs/ceph-nfs-crd/#scaling-the-active-server-count","text":"It is possible to scale the size of the cluster up or down by modifying the spec.server.active field. Scaling the cluster size up can be done at will. Once the new server comes up, clients can be assigned to it immediately. The CRD always eliminates the highest index servers first, in reverse order from how they were started. Scaling down the cluster requires that clients be migrated from servers that will be eliminated to others. That process is currently a manual one and should be performed before reducing the size of the cluster. Warning See the known issue below about setting this value greater than one.","title":"Scaling the active server count"},{"location":"CRDs/ceph-nfs-crd/#known-issues","text":"","title":"Known issues"},{"location":"CRDs/ceph-nfs-crd/#serveractive-count-greater-than-1","text":"Active-active scale out does not work well with the NFS protocol. If one NFS server in a cluster is offline, other servers may block client requests until the offline server returns, which may not always happen due to the Kubernetes scheduler. Workaround: It is safest to run only a single NFS server, but we do not limit this if it benefits your use case.","title":"server.active count greater than 1"},{"location":"CRDs/ceph-nfs-crd/#ceph-v1721","text":"Ceph NFS management with the Rook mgr module enabled has a breaking regression with the Ceph Quincy v17.2.1 release. Workaround: Leave Ceph's Rook orchestrator mgr module disabled. If you have enabled it, you must disable it using the snippet below from the toolbox. 1 2 ceph orch set backend \"\" ceph mgr module disable rook","title":"Ceph v17.2.1"},{"location":"CRDs/specification/","text":"Packages: ceph.rook.io/v1 ceph.rook.io/v1 Package v1 is the v1 version of the API. Resource Types: CephBlockPool CephBlockPoolRadosNamespace CephBucketNotification CephBucketTopic CephCOSIDriver CephClient CephCluster CephFilesystem CephFilesystemMirror CephFilesystemSubVolumeGroup CephNFS CephObjectRealm CephObjectStore CephObjectStoreUser CephObjectZone CephObjectZoneGroup CephRBDMirror CephBlockPool CephBlockPool represents a Ceph Storage Pool Field Description apiVersion string ceph.rook.io/v1 kind string CephBlockPool metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec NamedBlockPoolSpec name string (Optional) The desired name of the pool if different from the CephBlockPool CR name. PoolSpec PoolSpec (Members of PoolSpec are embedded into this type.) The core pool configuration status CephBlockPoolStatus CephBlockPoolRadosNamespace CephBlockPoolRadosNamespace represents a Ceph BlockPool Rados Namespace Field Description apiVersion string ceph.rook.io/v1 kind string CephBlockPoolRadosNamespace metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec CephBlockPoolRadosNamespaceSpec Spec represents the specification of a Ceph BlockPool Rados Namespace blockPoolName string BlockPoolName is the name of Ceph BlockPool. Typically it\u2019s the name of the CephBlockPool CR. status CephBlockPoolRadosNamespaceStatus (Optional) Status represents the status of a CephBlockPool Rados Namespace CephBucketNotification CephBucketNotification represents a Bucket Notifications Field Description apiVersion string ceph.rook.io/v1 kind string CephBucketNotification metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec BucketNotificationSpec topic string The name of the topic associated with this notification events []BucketNotificationEvent (Optional) List of events that should trigger the notification filter NotificationFilterSpec (Optional) Spec of notification filter status Status (Optional) CephBucketTopic CephBucketTopic represents a Ceph Object Topic for Bucket Notifications Field Description apiVersion string ceph.rook.io/v1 kind string CephBucketTopic metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec BucketTopicSpec objectStoreName string The name of the object store on which to define the topic objectStoreNamespace string The namespace of the object store on which to define the topic opaqueData string (Optional) Data which is sent in each event persistent bool (Optional) Indication whether notifications to this endpoint are persistent or not endpoint TopicEndpointSpec Contains the endpoint spec of the topic status BucketTopicStatus (Optional) CephCOSIDriver CephCOSIDriver represents the CRD for the Ceph COSI Driver Deployment Field Description apiVersion string ceph.rook.io/v1 kind string CephCOSIDriver metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec CephCOSIDriverSpec Spec represents the specification of a Ceph COSI Driver image string (Optional) Image is the container image to run the Ceph COSI driver objectProvisionerImage string (Optional) ObjectProvisionerImage is the container image to run the COSI driver sidecar deploymentStrategy COSIDeploymentStrategy (Optional) DeploymentStrategy is the strategy to use to deploy the COSI driver. placement Placement (Optional) Placement is the placement strategy to use for the COSI driver resources Kubernetes core/v1.ResourceRequirements (Optional) Resources is the resource requirements for the COSI driver CephClient CephClient represents a Ceph Client Field Description apiVersion string ceph.rook.io/v1 kind string CephClient metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ClientSpec Spec represents the specification of a Ceph Client name string (Optional) caps map[string]string status CephClientStatus (Optional) Status represents the status of a Ceph Client CephCluster CephCluster is a Ceph storage cluster Field Description apiVersion string ceph.rook.io/v1 kind string CephCluster metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ClusterSpec cephVersion CephVersionSpec (Optional) The version information that instructs Rook to orchestrate a particular version of Ceph. storage StorageScopeSpec (Optional) A spec for available storage in the cluster and how it should be used annotations AnnotationsSpec (Optional) The annotations-related configuration to add/set on each Pod related object. labels LabelsSpec (Optional) The labels-related configuration to add/set on each Pod related object. placement PlacementSpec (Optional) The placement-related configuration to pass to kubernetes (affinity, node selector, tolerations). network NetworkSpec (Optional) Network related configuration resources ResourceSpec (Optional) Resources set resource requests and limits priorityClassNames PriorityClassNamesSpec (Optional) PriorityClassNames sets priority classes on components dataDirHostPath string (Optional) The path on the host where config and data can be persisted skipUpgradeChecks bool (Optional) SkipUpgradeChecks defines if an upgrade should be forced even if one of the check fails continueUpgradeAfterChecksEvenIfNotHealthy bool (Optional) ContinueUpgradeAfterChecksEvenIfNotHealthy defines if an upgrade should continue even if PGs are not clean waitTimeoutForHealthyOSDInMinutes time.Duration (Optional) WaitTimeoutForHealthyOSDInMinutes defines the time the operator would wait before an OSD can be stopped for upgrade or restart. If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one if continueUpgradeAfterChecksEvenIfNotHealthy is false . If continueUpgradeAfterChecksEvenIfNotHealthy is true , then operator would continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won\u2019t be applied if skipUpgradeChecks is true . The default wait timeout is 10 minutes. disruptionManagement DisruptionManagementSpec (Optional) A spec for configuring disruption management. mon MonSpec (Optional) A spec for mon related options crashCollector CrashCollectorSpec (Optional) A spec for the crash controller dashboard DashboardSpec (Optional) Dashboard settings monitoring MonitoringSpec (Optional) Prometheus based Monitoring settings external ExternalSpec (Optional) Whether the Ceph Cluster is running external to this Kubernetes cluster mon, mgr, osd, mds, and discover daemons will not be created for external clusters. mgr MgrSpec (Optional) A spec for mgr related options removeOSDsIfOutAndSafeToRemove bool (Optional) Remove the OSD that is out and safe to remove only if this option is true cleanupPolicy CleanupPolicySpec (Optional) Indicates user intent when deleting a cluster; blocks orchestration and should not be set if cluster deletion is not imminent. healthCheck CephClusterHealthCheckSpec (Optional) Internal daemon healthchecks and liveness probe security SecuritySpec (Optional) Security represents security settings logCollector LogCollectorSpec (Optional) Logging represents loggings settings status ClusterStatus (Optional) CephFilesystem CephFilesystem represents a Ceph Filesystem Field Description apiVersion string ceph.rook.io/v1 kind string CephFilesystem metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec FilesystemSpec metadataPool PoolSpec The metadata pool settings dataPools []NamedPoolSpec The data pool settings, with optional predefined pool name. preservePoolsOnDelete bool (Optional) Preserve pools on filesystem deletion preserveFilesystemOnDelete bool (Optional) Preserve the fs in the cluster on CephFilesystem CR deletion. Setting this to true automatically implies PreservePoolsOnDelete is true. metadataServer MetadataServerSpec The mds pod info mirroring FSMirroringSpec (Optional) The mirroring settings statusCheck MirrorHealthCheckSpec The mirroring statusCheck status CephFilesystemStatus CephFilesystemMirror CephFilesystemMirror is the Ceph Filesystem Mirror object definition Field Description apiVersion string ceph.rook.io/v1 kind string CephFilesystemMirror metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec FilesystemMirroringSpec placement Placement (Optional) The affinity to place the rgw pods (default is to place on any available node) annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the cephfs-mirror pods priorityClassName string (Optional) PriorityClassName sets priority class on the cephfs-mirror pods status Status (Optional) CephFilesystemSubVolumeGroup CephFilesystemSubVolumeGroup represents a Ceph Filesystem SubVolumeGroup Field Description apiVersion string ceph.rook.io/v1 kind string CephFilesystemSubVolumeGroup metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec CephFilesystemSubVolumeGroupSpec Spec represents the specification of a Ceph Filesystem SubVolumeGroup filesystemName string FilesystemName is the name of Ceph Filesystem SubVolumeGroup volume name. Typically it\u2019s the name of the CephFilesystem CR. If not coming from the CephFilesystem CR, it can be retrieved from the list of Ceph Filesystem volumes with ceph fs volume ls . To learn more about Ceph Filesystem abstractions see https://docs.ceph.com/en/latest/cephfs/fs-volumes/#fs-volumes-and-subvolumes status CephFilesystemSubVolumeGroupStatus (Optional) Status represents the status of a CephFilesystem SubvolumeGroup CephNFS CephNFS represents a Ceph NFS Field Description apiVersion string ceph.rook.io/v1 kind string CephNFS metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec NFSGaneshaSpec rados GaneshaRADOSSpec (Optional) RADOS is the Ganesha RADOS specification server GaneshaServerSpec Server is the Ganesha Server specification security NFSSecuritySpec (Optional) Security allows specifying security configurations for the NFS cluster status Status (Optional) CephObjectRealm CephObjectRealm represents a Ceph Object Store Gateway Realm Field Description apiVersion string ceph.rook.io/v1 kind string CephObjectRealm metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ObjectRealmSpec (Optional) pull PullSpec status Status (Optional) CephObjectStore CephObjectStore represents a Ceph Object Store Gateway Field Description apiVersion string ceph.rook.io/v1 kind string CephObjectStore metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ObjectStoreSpec metadataPool PoolSpec (Optional) The metadata pool settings dataPool PoolSpec (Optional) The data pool settings preservePoolsOnDelete bool (Optional) Preserve pools on object store deletion gateway GatewaySpec (Optional) The rgw pod info zone ZoneSpec (Optional) The multisite info healthCheck ObjectHealthCheckSpec (Optional) The RGW health probes security ObjectStoreSecuritySpec (Optional) Security represents security settings status ObjectStoreStatus CephObjectStoreUser CephObjectStoreUser represents a Ceph Object Store Gateway User Field Description apiVersion string ceph.rook.io/v1 kind string CephObjectStoreUser metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ObjectStoreUserSpec store string (Optional) The store the user will be created in displayName string (Optional) The display name for the ceph users capabilities ObjectUserCapSpec (Optional) quotas ObjectUserQuotaSpec (Optional) status ObjectStoreUserStatus (Optional) CephObjectZone CephObjectZone represents a Ceph Object Store Gateway Zone Field Description apiVersion string ceph.rook.io/v1 kind string CephObjectZone metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ObjectZoneSpec zoneGroup string The display name for the ceph users metadataPool PoolSpec The metadata pool settings dataPool PoolSpec The data pool settings customEndpoints []string (Optional) If this zone cannot be accessed from other peer Ceph clusters via the ClusterIP Service endpoint created by Rook, you must set this to the externally reachable endpoint(s). You may include the port in the definition. For example: \u201c https://my-object-store.my-domain.net:443\u201d . In many cases, you should set this to the endpoint of the ingress resource that makes the CephObjectStore associated with this CephObjectStoreZone reachable to peer clusters. The list can have one or more endpoints pointing to different RGW servers in the zone. If a CephObjectStore endpoint is omitted from this list, that object store\u2019s gateways will not receive multisite replication data (see CephObjectStore.spec.gateway.disableMultisiteSyncTraffic). preservePoolsOnDelete bool (Optional) Preserve pools on object zone deletion status Status (Optional) CephObjectZoneGroup CephObjectZoneGroup represents a Ceph Object Store Gateway Zone Group Field Description apiVersion string ceph.rook.io/v1 kind string CephObjectZoneGroup metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ObjectZoneGroupSpec realm string The display name for the ceph users status Status (Optional) CephRBDMirror CephRBDMirror represents a Ceph RBD Mirror Field Description apiVersion string ceph.rook.io/v1 kind string CephRBDMirror metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec RBDMirroringSpec count int Count represents the number of rbd mirror instance to run peers MirroringPeerSpec (Optional) Peers represents the peers spec placement Placement (Optional) The affinity to place the rgw pods (default is to place on any available node) annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the rbd mirror pods priorityClassName string (Optional) PriorityClassName sets priority class on the rbd mirror pods status Status (Optional) AMQPEndpointSpec ( Appears on: TopicEndpointSpec ) AMQPEndpointSpec represent the spec of an AMQP endpoint of a Bucket Topic Field Description uri string The URI of the AMQP endpoint to push notification to exchange string Name of the exchange that is used to route messages based on topics disableVerifySSL bool (Optional) Indicate whether the server certificate is validated by the client or not ackLevel string (Optional) The ack level required for this topic (none/broker/routeable) Annotations ( map[string]string alias) ( Appears on: FilesystemMirroringSpec , GaneshaServerSpec , GatewaySpec , MetadataServerSpec , RBDMirroringSpec , RGWServiceSpec ) Annotations are annotations AnnotationsSpec ( map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]github.com/rook/rook/pkg/apis/ceph.rook.io/v1.Annotations alias) ( Appears on: ClusterSpec ) AnnotationsSpec is the main spec annotation for all daemons BucketNotificationEvent ( string alias) ( Appears on: BucketNotificationSpec ) BucketNotificationSpec represent the event type of the bucket notification BucketNotificationSpec ( Appears on: CephBucketNotification ) BucketNotificationSpec represent the spec of a Bucket Notification Field Description topic string The name of the topic associated with this notification events []BucketNotificationEvent (Optional) List of events that should trigger the notification filter NotificationFilterSpec (Optional) Spec of notification filter BucketTopicSpec ( Appears on: CephBucketTopic ) BucketTopicSpec represent the spec of a Bucket Topic Field Description objectStoreName string The name of the object store on which to define the topic objectStoreNamespace string The namespace of the object store on which to define the topic opaqueData string (Optional) Data which is sent in each event persistent bool (Optional) Indication whether notifications to this endpoint are persistent or not endpoint TopicEndpointSpec Contains the endpoint spec of the topic BucketTopicStatus ( Appears on: CephBucketTopic ) BucketTopicStatus represents the Status of a CephBucketTopic Field Description phase string (Optional) ARN string (Optional) The ARN of the topic generated by the RGW observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. COSIDeploymentStrategy ( string alias) ( Appears on: CephCOSIDriverSpec ) COSIDeploymentStrategy represents the strategy to use to deploy the Ceph COSI driver Value Description \"Always\" Always means the Ceph COSI driver will be deployed even if the object store is not present \"Auto\" Auto means the Ceph COSI driver will be deployed automatically if object store is present \"Never\" Never means the Ceph COSI driver will never deployed Capacity ( Appears on: CephStatus ) Capacity is the capacity information of a Ceph Cluster Field Description bytesTotal uint64 bytesUsed uint64 bytesAvailable uint64 lastUpdated string CephBlockPoolRadosNamespaceSpec ( Appears on: CephBlockPoolRadosNamespace ) CephBlockPoolRadosNamespaceSpec represents the specification of a CephBlockPool Rados Namespace Field Description blockPoolName string BlockPoolName is the name of Ceph BlockPool. Typically it\u2019s the name of the CephBlockPool CR. CephBlockPoolRadosNamespaceStatus ( Appears on: CephBlockPoolRadosNamespace ) CephBlockPoolRadosNamespaceStatus represents the Status of Ceph BlockPool Rados Namespace Field Description phase ConditionType (Optional) info map[string]string (Optional) CephBlockPoolStatus ( Appears on: CephBlockPool ) CephBlockPoolStatus represents the mirroring status of Ceph Storage Pool Field Description phase ConditionType (Optional) mirroringStatus MirroringStatusSpec (Optional) mirroringInfo MirroringInfoSpec (Optional) snapshotScheduleStatus SnapshotScheduleStatusSpec (Optional) info map[string]string (Optional) observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. conditions []Condition CephCOSIDriverSpec ( Appears on: CephCOSIDriver ) CephCOSIDriverSpec represents the specification of a Ceph COSI Driver Field Description image string (Optional) Image is the container image to run the Ceph COSI driver objectProvisionerImage string (Optional) ObjectProvisionerImage is the container image to run the COSI driver sidecar deploymentStrategy COSIDeploymentStrategy (Optional) DeploymentStrategy is the strategy to use to deploy the COSI driver. placement Placement (Optional) Placement is the placement strategy to use for the COSI driver resources Kubernetes core/v1.ResourceRequirements (Optional) Resources is the resource requirements for the COSI driver CephClientStatus ( Appears on: CephClient ) CephClientStatus represents the Status of Ceph Client Field Description phase ConditionType (Optional) info map[string]string (Optional) observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. CephClusterHealthCheckSpec ( Appears on: ClusterSpec ) CephClusterHealthCheckSpec represent the healthcheck for Ceph daemons Field Description daemonHealth DaemonHealthSpec (Optional) DaemonHealth is the health check for a given daemon livenessProbe map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]*github.com/rook/rook/pkg/apis/ceph.rook.io/v1.ProbeSpec (Optional) LivenessProbe allows changing the livenessProbe configuration for a given daemon startupProbe map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]*github.com/rook/rook/pkg/apis/ceph.rook.io/v1.ProbeSpec (Optional) StartupProbe allows changing the startupProbe configuration for a given daemon CephDaemonsVersions ( Appears on: CephStatus ) CephDaemonsVersions show the current ceph version for different ceph daemons Field Description mon map[string]int (Optional) Mon shows Mon Ceph version mgr map[string]int (Optional) Mgr shows Mgr Ceph version osd map[string]int (Optional) Osd shows Osd Ceph version rgw map[string]int (Optional) Rgw shows Rgw Ceph version mds map[string]int (Optional) Mds shows Mds Ceph version rbd-mirror map[string]int (Optional) RbdMirror shows RbdMirror Ceph version cephfs-mirror map[string]int (Optional) CephFSMirror shows CephFSMirror Ceph version overall map[string]int (Optional) Overall shows overall Ceph version CephFilesystemStatus ( Appears on: CephFilesystem ) CephFilesystemStatus represents the status of a Ceph Filesystem Field Description phase ConditionType (Optional) snapshotScheduleStatus FilesystemSnapshotScheduleStatusSpec (Optional) info map[string]string (Optional) Use only info and put mirroringStatus in it? mirroringStatus FilesystemMirroringInfoSpec (Optional) MirroringStatus is the filesystem mirroring status conditions []Condition observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. CephFilesystemSubVolumeGroupSpec ( Appears on: CephFilesystemSubVolumeGroup ) CephFilesystemSubVolumeGroupSpec represents the specification of a Ceph Filesystem SubVolumeGroup Field Description filesystemName string FilesystemName is the name of Ceph Filesystem SubVolumeGroup volume name. Typically it\u2019s the name of the CephFilesystem CR. If not coming from the CephFilesystem CR, it can be retrieved from the list of Ceph Filesystem volumes with ceph fs volume ls . To learn more about Ceph Filesystem abstractions see https://docs.ceph.com/en/latest/cephfs/fs-volumes/#fs-volumes-and-subvolumes CephFilesystemSubVolumeGroupStatus ( Appears on: CephFilesystemSubVolumeGroup ) CephFilesystemSubVolumeGroupStatus represents the Status of Ceph Filesystem SubVolumeGroup Field Description phase ConditionType (Optional) info map[string]string (Optional) observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. CephHealthMessage ( Appears on: CephStatus ) CephHealthMessage represents the health message of a Ceph Cluster Field Description severity string message string CephStatus ( Appears on: ClusterStatus ) CephStatus is the details health of a Ceph Cluster Field Description health string details map[string]github.com/rook/rook/pkg/apis/ceph.rook.io/v1.CephHealthMessage lastChecked string lastChanged string previousHealth string capacity Capacity versions CephDaemonsVersions (Optional) fsid string CephStorage ( Appears on: ClusterStatus ) CephStorage represents flavors of Ceph Cluster Storage Field Description deviceClasses []DeviceClasses osd OSDStatus CephVersionSpec ( Appears on: ClusterSpec ) CephVersionSpec represents the settings for the Ceph version that Rook is orchestrating. Field Description image string (Optional) Image is the container image used to launch the ceph daemons, such as quay.io/ceph/ceph: The full list of images can be found at https://quay.io/repository/ceph/ceph?tab=tags allowUnsupported bool (Optional) Whether to allow unsupported versions (do not set to true in production) imagePullPolicy Kubernetes core/v1.PullPolicy (Optional) ImagePullPolicy describes a policy for if/when to pull a container image One of Always, Never, IfNotPresent. CleanupConfirmationProperty ( string alias) ( Appears on: CleanupPolicySpec ) CleanupConfirmationProperty represents the cleanup confirmation Value Description \"yes-really-destroy-data\" DeleteDataDirOnHostsConfirmation represents the validation to destroy dataDirHostPath CleanupPolicySpec ( Appears on: ClusterSpec ) CleanupPolicySpec represents a Ceph Cluster cleanup policy Field Description confirmation CleanupConfirmationProperty (Optional) Confirmation represents the cleanup confirmation sanitizeDisks SanitizeDisksSpec (Optional) SanitizeDisks represents way we sanitize disks allowUninstallWithVolumes bool (Optional) AllowUninstallWithVolumes defines whether we can proceed with the uninstall if they are RBD images still present ClientSpec ( Appears on: CephClient ) ClientSpec represents the specification of a Ceph Client Field Description name string (Optional) caps map[string]string ClusterSpec ( Appears on: CephCluster ) ClusterSpec represents the specification of Ceph Cluster Field Description cephVersion CephVersionSpec (Optional) The version information that instructs Rook to orchestrate a particular version of Ceph. storage StorageScopeSpec (Optional) A spec for available storage in the cluster and how it should be used annotations AnnotationsSpec (Optional) The annotations-related configuration to add/set on each Pod related object. labels LabelsSpec (Optional) The labels-related configuration to add/set on each Pod related object. placement PlacementSpec (Optional) The placement-related configuration to pass to kubernetes (affinity, node selector, tolerations). network NetworkSpec (Optional) Network related configuration resources ResourceSpec (Optional) Resources set resource requests and limits priorityClassNames PriorityClassNamesSpec (Optional) PriorityClassNames sets priority classes on components dataDirHostPath string (Optional) The path on the host where config and data can be persisted skipUpgradeChecks bool (Optional) SkipUpgradeChecks defines if an upgrade should be forced even if one of the check fails continueUpgradeAfterChecksEvenIfNotHealthy bool (Optional) ContinueUpgradeAfterChecksEvenIfNotHealthy defines if an upgrade should continue even if PGs are not clean waitTimeoutForHealthyOSDInMinutes time.Duration (Optional) WaitTimeoutForHealthyOSDInMinutes defines the time the operator would wait before an OSD can be stopped for upgrade or restart. If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one if continueUpgradeAfterChecksEvenIfNotHealthy is false . If continueUpgradeAfterChecksEvenIfNotHealthy is true , then operator would continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won\u2019t be applied if skipUpgradeChecks is true . The default wait timeout is 10 minutes. disruptionManagement DisruptionManagementSpec (Optional) A spec for configuring disruption management. mon MonSpec (Optional) A spec for mon related options crashCollector CrashCollectorSpec (Optional) A spec for the crash controller dashboard DashboardSpec (Optional) Dashboard settings monitoring MonitoringSpec (Optional) Prometheus based Monitoring settings external ExternalSpec (Optional) Whether the Ceph Cluster is running external to this Kubernetes cluster mon, mgr, osd, mds, and discover daemons will not be created for external clusters. mgr MgrSpec (Optional) A spec for mgr related options removeOSDsIfOutAndSafeToRemove bool (Optional) Remove the OSD that is out and safe to remove only if this option is true cleanupPolicy CleanupPolicySpec (Optional) Indicates user intent when deleting a cluster; blocks orchestration and should not be set if cluster deletion is not imminent. healthCheck CephClusterHealthCheckSpec (Optional) Internal daemon healthchecks and liveness probe security SecuritySpec (Optional) Security represents security settings logCollector LogCollectorSpec (Optional) Logging represents loggings settings ClusterState ( string alias) ( Appears on: ClusterStatus ) ClusterState represents the state of a Ceph Cluster Value Description \"Connected\" ClusterStateConnected represents the Connected state of a Ceph Cluster \"Connecting\" ClusterStateConnecting represents the Connecting state of a Ceph Cluster \"Created\" ClusterStateCreated represents the Created state of a Ceph Cluster \"Creating\" ClusterStateCreating represents the Creating state of a Ceph Cluster \"Error\" ClusterStateError represents the Error state of a Ceph Cluster \"Updating\" ClusterStateUpdating represents the Updating state of a Ceph Cluster ClusterStatus ( Appears on: CephCluster ) ClusterStatus represents the status of a Ceph cluster Field Description state ClusterState phase ConditionType message string conditions []Condition ceph CephStatus storage CephStorage version ClusterVersion observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. ClusterVersion ( Appears on: ClusterStatus ) ClusterVersion represents the version of a Ceph Cluster Field Description image string version string CompressionSpec ( Appears on: ConnectionsSpec ) Field Description enabled bool (Optional) Whether to compress the data in transit across the wire. The default is not set. Requires Ceph Quincy (v17) or newer. Condition ( Appears on: CephBlockPoolStatus , CephFilesystemStatus , ClusterStatus , ObjectStoreStatus , Status ) Condition represents a status condition on any Rook-Ceph Custom Resource. Field Description type ConditionType status Kubernetes core/v1.ConditionStatus reason ConditionReason message string lastHeartbeatTime Kubernetes meta/v1.Time lastTransitionTime Kubernetes meta/v1.Time ConditionReason ( string alias) ( Appears on: Condition ) ConditionReason is a reason for a condition Value Description \"ClusterConnected\" ClusterConnectedReason is cluster connected reason \"ClusterConnecting\" ClusterConnectingReason is cluster connecting reason \"ClusterCreated\" ClusterCreatedReason is cluster created reason \"ClusterDeleting\" ClusterDeletingReason is cluster deleting reason \"ClusterProgressing\" ClusterProgressingReason is cluster progressing reason \"Deleting\" DeletingReason represents when Rook has detected a resource object should be deleted. \"ObjectHasDependents\" ObjectHasDependentsReason represents when a resource object has dependents that are blocking deletion. \"ObjectHasNoDependents\" ObjectHasNoDependentsReason represents when a resource object has no dependents that are blocking deletion. \"ReconcileFailed\" ReconcileFailed represents when a resource reconciliation failed. \"ReconcileStarted\" ReconcileStarted represents when a resource reconciliation started. \"ReconcileSucceeded\" ReconcileSucceeded represents when a resource reconciliation was successful. ConditionType ( string alias) ( Appears on: CephBlockPoolRadosNamespaceStatus , CephBlockPoolStatus , CephClientStatus , CephFilesystemStatus , CephFilesystemSubVolumeGroupStatus , ClusterStatus , Condition , ObjectStoreStatus ) ConditionType represent a resource\u2019s status Value Description \"Connected\" ConditionConnected represents Connected state of an object \"Connecting\" ConditionConnecting represents Connecting state of an object \"Deleting\" ConditionDeleting represents Deleting state of an object \"DeletionIsBlocked\" ConditionDeletionIsBlocked represents when deletion of the object is blocked. \"Failure\" ConditionFailure represents Failure state of an object \"Progressing\" ConditionProgressing represents Progressing state of an object \"Ready\" ConditionReady represents Ready state of an object ConfigFileVolumeSource ( Appears on: KerberosConfigFiles , KerberosKeytabFile , SSSDSidecarAdditionalFile , SSSDSidecarConfigFile ) Represents the source of a volume to mount. Only one of its members may be specified. This is a subset of the full Kubernetes API\u2019s VolumeSource that is reduced to what is most likely to be useful for mounting config files/dirs into Rook pods. Field Description hostPath Kubernetes core/v1.HostPathVolumeSource (Optional) hostPath represents a pre-existing file or directory on the host machine that is directly exposed to the container. This is generally used for system agents or other privileged things that are allowed to see the host machine. Most containers will NOT need this. More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath emptyDir Kubernetes core/v1.EmptyDirVolumeSource (Optional) emptyDir represents a temporary directory that shares a pod\u2019s lifetime. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir secret Kubernetes core/v1.SecretVolumeSource (Optional) secret represents a secret that should populate this volume. More info: https://kubernetes.io/docs/concepts/storage/volumes#secret persistentVolumeClaim Kubernetes core/v1.PersistentVolumeClaimVolumeSource (Optional) persistentVolumeClaimVolumeSource represents a reference to a PersistentVolumeClaim in the same namespace. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims configMap Kubernetes core/v1.ConfigMapVolumeSource (Optional) configMap represents a configMap that should populate this volume projected Kubernetes core/v1.ProjectedVolumeSource projected items for all in one resources secrets, configmaps, and downward API ConnectionsSpec ( Appears on: NetworkSpec ) Field Description encryption EncryptionSpec (Optional) Encryption settings for the network connections. compression CompressionSpec (Optional) Compression settings for the network connections. requireMsgr2 bool (Optional) Whether to require msgr2 (port 3300) even if compression or encryption are not enabled. If true, the msgr1 port (6789) will be disabled. Requires a kernel that supports msgr2 (kernel 5.11 or CentOS 8.4 or newer). CrashCollectorSpec ( Appears on: ClusterSpec ) CrashCollectorSpec represents options to configure the crash controller Field Description disable bool (Optional) Disable determines whether we should enable the crash collector daysToRetain uint (Optional) DaysToRetain represents the number of days to retain crash until they get pruned DaemonHealthSpec ( Appears on: CephClusterHealthCheckSpec ) DaemonHealthSpec is a daemon health check Field Description status HealthCheckSpec (Optional) Status represents the health check settings for the Ceph health mon HealthCheckSpec (Optional) Monitor represents the health check settings for the Ceph monitor osd HealthCheckSpec (Optional) ObjectStorageDaemon represents the health check settings for the Ceph OSDs DashboardSpec ( Appears on: ClusterSpec ) DashboardSpec represents the settings for the Ceph dashboard Field Description enabled bool (Optional) Enabled determines whether to enable the dashboard urlPrefix string (Optional) URLPrefix is a prefix for all URLs to use the dashboard with a reverse proxy port int (Optional) Port is the dashboard webserver port ssl bool (Optional) SSL determines whether SSL should be used Device ( Appears on: Selection ) Device represents a disk to use in the cluster Field Description name string (Optional) fullpath string (Optional) config map[string]string (Optional) DeviceClasses ( Appears on: CephStorage ) DeviceClasses represents device classes of a Ceph Cluster Field Description name string DisruptionManagementSpec ( Appears on: ClusterSpec ) DisruptionManagementSpec configures management of daemon disruptions Field Description managePodBudgets bool (Optional) This enables management of poddisruptionbudgets osdMaintenanceTimeout time.Duration (Optional) OSDMaintenanceTimeout sets how many additional minutes the DOWN/OUT interval is for drained failure domains it only works if managePodBudgets is true. the default is 30 minutes pgHealthCheckTimeout time.Duration (Optional) PGHealthCheckTimeout is the time (in minutes) that the operator will wait for the placement groups to become healthy (active+clean) after a drain was completed and OSDs came back up. Rook will continue with the next drain if the timeout exceeds. It only works if managePodBudgets is true. No values or 0 means that the operator will wait until the placement groups are healthy before unblocking the next drain. manageMachineDisruptionBudgets bool (Optional) Deprecated. This enables management of machinedisruptionbudgets. machineDisruptionBudgetNamespace string (Optional) Deprecated. Namespace to look for MDBs by the machineDisruptionBudgetController EncryptionSpec ( Appears on: ConnectionsSpec ) Field Description enabled bool (Optional) Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network. The default is not set. Even if encryption is not enabled, clients still establish a strong initial authentication for the connection and data integrity is still validated with a crc check. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted. EndpointAddress ( Appears on: GatewaySpec ) EndpointAddress is a tuple that describes a single IP address or host name. This is a subset of Kubernetes\u2019s v1.EndpointAddress. Field Description ip string (Optional) The IP of this endpoint. As a legacy behavior, this supports being given a DNS-adressable hostname as well. hostname string (Optional) The DNS-addressable Hostname of this endpoint. This field will be preferred over IP if both are given. ErasureCodedSpec ( Appears on: PoolSpec ) ErasureCodedSpec represents the spec for erasure code in a pool Field Description codingChunks uint Number of coding chunks per object in an erasure coded storage pool (required for erasure-coded pool type). This is the number of OSDs that can be lost simultaneously before data cannot be recovered. dataChunks uint Number of data chunks per object in an erasure coded storage pool (required for erasure-coded pool type). The number of chunks required to recover an object when any single OSD is lost is the same as dataChunks so be aware that the larger the number of data chunks, the higher the cost of recovery. algorithm string (Optional) The algorithm for erasure coding ExternalSpec ( Appears on: ClusterSpec ) ExternalSpec represents the options supported by an external cluster Field Description enable bool (Optional) Enable determines whether external mode is enabled or not FSMirroringSpec ( Appears on: FilesystemSpec ) FSMirroringSpec represents the setting for a mirrored filesystem Field Description enabled bool (Optional) Enabled whether this filesystem is mirrored or not peers MirroringPeerSpec (Optional) Peers represents the peers spec snapshotSchedules []SnapshotScheduleSpec (Optional) SnapshotSchedules is the scheduling of snapshot for mirrored filesystems snapshotRetention []SnapshotScheduleRetentionSpec (Optional) Retention is the retention policy for a snapshot schedule One path has exactly one retention policy. A policy can however contain multiple count-time period pairs in order to specify complex retention policies FilesystemMirrorInfoPeerSpec ( Appears on: FilesystemsSpec ) FilesystemMirrorInfoPeerSpec is the specification of a filesystem peer mirror Field Description uuid string (Optional) UUID is the peer unique identifier remote PeerRemoteSpec (Optional) Remote are the remote cluster information stats PeerStatSpec (Optional) Stats are the stat a peer mirror FilesystemMirroringInfo ( Appears on: FilesystemMirroringInfoSpec ) FilesystemMirrorInfoSpec is the filesystem mirror status of a given filesystem Field Description daemon_id int (Optional) DaemonID is the cephfs-mirror name filesystems []FilesystemsSpec (Optional) Filesystems is the list of filesystems managed by a given cephfs-mirror daemon FilesystemMirroringInfoSpec ( Appears on: CephFilesystemStatus ) FilesystemMirroringInfo is the status of the pool mirroring Field Description daemonsStatus []FilesystemMirroringInfo (Optional) PoolMirroringStatus is the mirroring status of a filesystem lastChecked string (Optional) LastChecked is the last time time the status was checked lastChanged string (Optional) LastChanged is the last time time the status last changed details string (Optional) Details contains potential status errors FilesystemMirroringSpec ( Appears on: CephFilesystemMirror ) FilesystemMirroringSpec is the filesystem mirroring specification Field Description placement Placement (Optional) The affinity to place the rgw pods (default is to place on any available node) annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the cephfs-mirror pods priorityClassName string (Optional) PriorityClassName sets priority class on the cephfs-mirror pods FilesystemSnapshotScheduleStatusRetention ( Appears on: FilesystemSnapshotSchedulesSpec ) FilesystemSnapshotScheduleStatusRetention is the retention specification for a filesystem snapshot schedule Field Description start string (Optional) Start is when the snapshot schedule starts created string (Optional) Created is when the snapshot schedule was created first string (Optional) First is when the first snapshot schedule was taken last string (Optional) Last is when the last snapshot schedule was taken last_pruned string (Optional) LastPruned is when the last snapshot schedule was pruned created_count int (Optional) CreatedCount is total amount of snapshots pruned_count int (Optional) PrunedCount is total amount of pruned snapshots active bool (Optional) Active is whether the scheduled is active or not FilesystemSnapshotScheduleStatusSpec ( Appears on: CephFilesystemStatus ) FilesystemSnapshotScheduleStatusSpec is the status of the snapshot schedule Field Description snapshotSchedules []FilesystemSnapshotSchedulesSpec (Optional) SnapshotSchedules is the list of snapshots scheduled lastChecked string (Optional) LastChecked is the last time time the status was checked lastChanged string (Optional) LastChanged is the last time time the status last changed details string (Optional) Details contains potential status errors FilesystemSnapshotSchedulesSpec ( Appears on: FilesystemSnapshotScheduleStatusSpec ) FilesystemSnapshotSchedulesSpec is the list of snapshot scheduled for images in a pool Field Description fs string (Optional) Fs is the name of the Ceph Filesystem subvol string (Optional) Subvol is the name of the sub volume path string (Optional) Path is the path on the filesystem rel_path string (Optional) schedule string (Optional) retention FilesystemSnapshotScheduleStatusRetention (Optional) FilesystemSpec ( Appears on: CephFilesystem ) FilesystemSpec represents the spec of a file system Field Description metadataPool PoolSpec The metadata pool settings dataPools []NamedPoolSpec The data pool settings, with optional predefined pool name. preservePoolsOnDelete bool (Optional) Preserve pools on filesystem deletion preserveFilesystemOnDelete bool (Optional) Preserve the fs in the cluster on CephFilesystem CR deletion. Setting this to true automatically implies PreservePoolsOnDelete is true. metadataServer MetadataServerSpec The mds pod info mirroring FSMirroringSpec (Optional) The mirroring settings statusCheck MirrorHealthCheckSpec The mirroring statusCheck FilesystemsSpec ( Appears on: FilesystemMirroringInfo ) FilesystemsSpec is spec for the mirrored filesystem Field Description filesystem_id int (Optional) FilesystemID is the filesystem identifier name string (Optional) Name is name of the filesystem directory_count int (Optional) DirectoryCount is the number of directories in the filesystem peers []FilesystemMirrorInfoPeerSpec (Optional) Peers represents the mirroring peers GaneshaRADOSSpec ( Appears on: NFSGaneshaSpec ) GaneshaRADOSSpec represents the specification of a Ganesha RADOS object Field Description pool string (Optional) The Ceph pool used store the shared configuration for NFS-Ganesha daemons. This setting is required for Ceph v15 and ignored for Ceph v16. As of Ceph Pacific 16.2.7+, this is internally hardcoded to \u201c.nfs\u201d. namespace string (Optional) The namespace inside the Ceph pool (set by \u2018pool\u2019) where shared NFS-Ganesha config is stored. This setting is required for Ceph v15 and ignored for Ceph v16. As of Ceph Pacific v16+, this is internally set to the name of the CephNFS. GaneshaServerSpec ( Appears on: NFSGaneshaSpec ) GaneshaServerSpec represents the specification of a Ganesha Server Field Description active int The number of active Ganesha servers placement Placement (Optional) The affinity to place the ganesha pods annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) Resources set resource requests and limits priorityClassName string (Optional) PriorityClassName sets the priority class on the pods logLevel string (Optional) LogLevel set logging level hostNetwork bool (Optional) Whether host networking is enabled for the Ganesha server. If not set, the network settings from the cluster CR will be applied. GatewaySpec ( Appears on: ObjectStoreSpec ) GatewaySpec represents the specification of Ceph Object Store Gateway Field Description port int32 (Optional) The port the rgw service will be listening on (http) securePort int32 (Optional) The port the rgw service will be listening on (https) instances int32 (Optional) The number of pods in the rgw replicaset. sslCertificateRef string (Optional) The name of the secret that stores the ssl certificate for secure rgw connections caBundleRef string (Optional) The name of the secret that stores custom ca-bundle with root and intermediate certificates. placement Placement (Optional) The affinity to place the rgw pods (default is to place on any available node) disableMultisiteSyncTraffic bool (Optional) DisableMultisiteSyncTraffic, when true, prevents this object store\u2019s gateways from transmitting multisite replication data. Note that this value does not affect whether gateways receive multisite replication traffic: see ObjectZone.spec.customEndpoints for that. If false or unset, this object store\u2019s gateways will be able to transmit multisite replication data. annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the rgw pods priorityClassName string (Optional) PriorityClassName sets priority classes on the rgw pods externalRgwEndpoints []EndpointAddress (Optional) ExternalRgwEndpoints points to external RGW endpoint(s). Multiple endpoints can be given, but for stability of ObjectBucketClaims, we highly recommend that users give only a single external RGW endpoint that is a load balancer that sends requests to the multiple RGWs. service RGWServiceSpec (Optional) The configuration related to add/set on each rgw service. hostNetwork bool (Optional) Whether host networking is enabled for the rgw daemon. If not set, the network settings from the cluster CR will be applied. dashboardEnabled bool (Optional) Whether rgw dashboard is enabled for the rgw daemon. If not set, the rgw dashboard will be enabled. HTTPEndpointSpec ( Appears on: TopicEndpointSpec ) HTTPEndpointSpec represent the spec of an HTTP endpoint of a Bucket Topic Field Description uri string The URI of the HTTP endpoint to push notification to disableVerifySSL bool (Optional) Indicate whether the server certificate is validated by the client or not sendCloudEvents bool (Optional) Send the notifications with the CloudEvents header: https://github.com/cloudevents/spec/blob/main/cloudevents/adapters/aws-s3.md Supported for Ceph Quincy (v17) or newer. HealthCheckSpec ( Appears on: DaemonHealthSpec , MirrorHealthCheckSpec ) HealthCheckSpec represents the health check of an object store bucket Field Description disabled bool (Optional) interval Kubernetes meta/v1.Duration (Optional) Interval is the internal in second or minute for the health check to run like 60s for 60 seconds timeout string (Optional) HybridStorageSpec ( Appears on: ReplicatedSpec ) HybridStorageSpec represents the settings for hybrid storage pool Field Description primaryDeviceClass string PrimaryDeviceClass represents high performance tier (for example SSD or NVME) for Primary OSD secondaryDeviceClass string SecondaryDeviceClass represents low performance tier (for example HDDs) for remaining OSDs IPFamilyType ( string alias) ( Appears on: NetworkSpec ) IPFamilyType represents the single stack Ipv4 or Ipv6 protocol. Value Description \"IPv4\" IPv4 internet protocol version \"IPv6\" IPv6 internet protocol version KafkaEndpointSpec ( Appears on: TopicEndpointSpec ) KafkaEndpointSpec represent the spec of a Kafka endpoint of a Bucket Topic Field Description uri string The URI of the Kafka endpoint to push notification to useSSL bool (Optional) Indicate whether to use SSL when communicating with the broker disableVerifySSL bool (Optional) Indicate whether the server certificate is validated by the client or not ackLevel string (Optional) The ack level required for this topic (none/broker) KerberosConfigFiles ( Appears on: KerberosSpec ) KerberosConfigFiles represents the source(s) from which Kerberos configuration should come. Field Description volumeSource ConfigFileVolumeSource VolumeSource accepts a pared down version of the standard Kubernetes VolumeSource for Kerberos configuration files like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. The volume may contain multiple files, all of which will be loaded. KerberosKeytabFile ( Appears on: KerberosSpec ) KerberosKeytabFile represents the source(s) from which the Kerberos keytab file should come. Field Description volumeSource ConfigFileVolumeSource VolumeSource accepts a pared down version of the standard Kubernetes VolumeSource for the Kerberos keytab file like what is normally used to configure Volumes for a Pod. For example, a Secret or HostPath. There are two requirements for the source\u2019s content: 1. The config file must be mountable via subPath: krb5.keytab . For example, in a Secret, the data item must be named krb5.keytab , or items must be defined to select the key and give it path krb5.keytab . A HostPath directory must have the krb5.keytab file. 2. The volume or config file must have mode 0600. KerberosSpec ( Appears on: NFSSecuritySpec ) KerberosSpec represents configuration for Kerberos. Field Description principalName string (Optional) PrincipalName corresponds directly to NFS-Ganesha\u2019s NFS_KRB5:PrincipalName config. In practice, this is the service prefix of the principal name. The default is \u201cnfs\u201d. This value is combined with (a) the namespace and name of the CephNFS (with a hyphen between) and (b) the Realm configured in the user-provided krb5.conf to determine the full principal name: / - @ . e.g., nfs/rook-ceph-my-nfs@example.net. See https://github.com/nfs-ganesha/nfs-ganesha/wiki/RPCSEC_GSS for more detail. domainName string (Optional) DomainName should be set to the Kerberos Realm. configFiles KerberosConfigFiles (Optional) ConfigFiles defines where the Kerberos configuration should be sourced from. Config files will be placed into the /etc/krb5.conf.rook/ directory. If this is left empty, Rook will not add any files. This allows you to manage the files yourself however you wish. For example, you may build them into your custom Ceph container image or use the Vault agent injector to securely add the files via annotations on the CephNFS spec (passed to the NFS server pods). Rook configures Kerberos to log to stderr. We suggest removing logging sections from config files to avoid consuming unnecessary disk space from logging to files. keytabFile KerberosKeytabFile (Optional) KeytabFile defines where the Kerberos keytab should be sourced from. The keytab file will be placed into /etc/krb5.keytab . If this is left empty, Rook will not add the file. This allows you to manage the krb5.keytab file yourself however you wish. For example, you may build it into your custom Ceph container image or use the Vault agent injector to securely add the file via annotations on the CephNFS spec (passed to the NFS server pods). KeyManagementServiceSpec ( Appears on: ObjectStoreSecuritySpec , SecuritySpec ) KeyManagementServiceSpec represent various details of the KMS server Field Description connectionDetails map[string]string (Optional) ConnectionDetails contains the KMS connection details (address, port etc) tokenSecretName string (Optional) TokenSecretName is the kubernetes secret containing the KMS token KeyRotationSpec ( Appears on: SecuritySpec ) KeyRotationSpec represents the settings for Key Rotation. Field Description enabled bool (Optional) Enabled represents whether the key rotation is enabled. schedule string (Optional) Schedule represents the cron schedule for key rotation. KeyType ( string alias) KeyType type safety Value Description \"exporter\" \"cleanup\" \"clusterMetadata\" \"crashcollector\" \"mds\" \"mgr\" \"mon\" \"arbiter\" \"monitoring\" \"osd\" \"prepareosd\" \"rgw\" \"keyrotation\" Labels ( map[string]string alias) ( Appears on: FilesystemMirroringSpec , GaneshaServerSpec , GatewaySpec , MetadataServerSpec , RBDMirroringSpec ) Labels are label for a given daemons LabelsSpec ( map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]github.com/rook/rook/pkg/apis/ceph.rook.io/v1.Labels alias) ( Appears on: ClusterSpec ) LabelsSpec is the main spec label for all daemons LogCollectorSpec ( Appears on: ClusterSpec ) LogCollectorSpec is the logging spec Field Description enabled bool (Optional) Enabled represents whether the log collector is enabled periodicity string (Optional) Periodicity is the periodicity of the log rotation. maxLogSize k8s.io/apimachinery/pkg/api/resource.Quantity (Optional) MaxLogSize is the maximum size of the log per ceph daemons. Must be at least 1M. MetadataServerSpec ( Appears on: FilesystemSpec ) MetadataServerSpec represents the specification of a Ceph Metadata Server Field Description activeCount int32 The number of metadata servers that are active. The remaining servers in the cluster will be in standby mode. activeStandby bool (Optional) Whether each active MDS instance will have an active standby with a warm metadata cache for faster failover. If false, standbys will still be available, but will not have a warm metadata cache. placement Placement (Optional) The affinity to place the mds pods (default is to place on all available node) with a daemonset annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the rgw pods priorityClassName string (Optional) PriorityClassName sets priority classes on components livenessProbe ProbeSpec (Optional) startupProbe ProbeSpec (Optional) MgrSpec ( Appears on: ClusterSpec ) MgrSpec represents options to configure a ceph mgr Field Description count int (Optional) Count is the number of manager to run allowMultiplePerNode bool (Optional) AllowMultiplePerNode allows to run multiple managers on the same node (not recommended) modules []Module (Optional) Modules is the list of ceph manager modules to enable/disable MirrorHealthCheckSpec ( Appears on: FilesystemSpec , PoolSpec ) MirrorHealthCheckSpec represents the health specification of a Ceph Storage Pool mirror Field Description mirror HealthCheckSpec (Optional) MirroringInfoSpec ( Appears on: CephBlockPoolStatus ) MirroringInfoSpec is the status of the pool mirroring Field Description PoolMirroringInfo PoolMirroringInfo (Members of PoolMirroringInfo are embedded into this type.) (Optional) lastChecked string (Optional) lastChanged string (Optional) details string (Optional) MirroringPeerSpec ( Appears on: FSMirroringSpec , MirroringSpec , RBDMirroringSpec ) MirroringPeerSpec represents the specification of a mirror peer Field Description secretNames []string (Optional) SecretNames represents the Kubernetes Secret names to add rbd-mirror or cephfs-mirror peers MirroringSpec ( Appears on: PoolSpec ) MirroringSpec represents the setting for a mirrored pool Field Description enabled bool (Optional) Enabled whether this pool is mirrored or not mode string (Optional) Mode is the mirroring mode: either pool or image snapshotSchedules []SnapshotScheduleSpec (Optional) SnapshotSchedules is the scheduling of snapshot for mirrored images/pools peers MirroringPeerSpec (Optional) Peers represents the peers spec MirroringStatusSpec ( Appears on: CephBlockPoolStatus ) MirroringStatusSpec is the status of the pool mirroring Field Description PoolMirroringStatus PoolMirroringStatus (Members of PoolMirroringStatus are embedded into this type.) (Optional) PoolMirroringStatus is the mirroring status of a pool lastChecked string (Optional) LastChecked is the last time time the status was checked lastChanged string (Optional) LastChanged is the last time time the status last changed details string (Optional) Details contains potential status errors Module ( Appears on: MgrSpec ) Module represents mgr modules that the user wants to enable or disable Field Description name string (Optional) Name is the name of the ceph manager module enabled bool (Optional) Enabled determines whether a module should be enabled or not MonSpec ( Appears on: ClusterSpec ) MonSpec represents the specification of the monitor Field Description count int (Optional) Count is the number of Ceph monitors allowMultiplePerNode bool (Optional) AllowMultiplePerNode determines if we can run multiple monitors on the same node (not recommended) failureDomainLabel string (Optional) zones []MonZoneSpec (Optional) Zones are specified when we want to provide zonal awareness to mons stretchCluster StretchClusterSpec (Optional) StretchCluster is the stretch cluster specification volumeClaimTemplate Kubernetes core/v1.PersistentVolumeClaim (Optional) VolumeClaimTemplate is the PVC definition MonZoneSpec ( Appears on: MonSpec , StretchClusterSpec ) MonZoneSpec represents the specification of a zone in a Ceph Cluster Field Description name string (Optional) Name is the name of the zone arbiter bool (Optional) Arbiter determines if the zone contains the arbiter used for stretch cluster mode volumeClaimTemplate Kubernetes core/v1.PersistentVolumeClaim (Optional) VolumeClaimTemplate is the PVC template MonitoringSpec ( Appears on: ClusterSpec ) MonitoringSpec represents the settings for Prometheus based Ceph monitoring Field Description enabled bool (Optional) Enabled determines whether to create the prometheus rules for the ceph cluster. If true, the prometheus types must exist or the creation will fail. Default is false. metricsDisabled bool (Optional) Whether to disable the metrics reported by Ceph. If false, the prometheus mgr module and Ceph exporter are enabled. If true, the prometheus mgr module and Ceph exporter are both disabled. Default is false. externalMgrEndpoints []Kubernetes core/v1.EndpointAddress (Optional) ExternalMgrEndpoints points to an existing Ceph prometheus exporter endpoint externalMgrPrometheusPort uint16 (Optional) ExternalMgrPrometheusPort Prometheus exporter port port int (Optional) Port is the prometheus server port interval Kubernetes meta/v1.Duration (Optional) Interval determines prometheus scrape interval MultiClusterServiceSpec ( Appears on: NetworkSpec ) Field Description enabled bool (Optional) Enable multiClusterService to export the mon and OSD services to peer cluster. Ensure that peer clusters are connected using an MCS API compatible application, like Globalnet Submariner. clusterID string ClusterID uniquely identifies a cluster. It is used as a prefix to nslookup exported services. For example: . . .svc.clusterset.local NFSGaneshaSpec ( Appears on: CephNFS ) NFSGaneshaSpec represents the spec of an nfs ganesha server Field Description rados GaneshaRADOSSpec (Optional) RADOS is the Ganesha RADOS specification server GaneshaServerSpec Server is the Ganesha Server specification security NFSSecuritySpec (Optional) Security allows specifying security configurations for the NFS cluster NFSSecuritySpec ( Appears on: NFSGaneshaSpec ) NFSSecuritySpec represents security configurations for an NFS server pod Field Description sssd SSSDSpec (Optional) SSSD enables integration with System Security Services Daemon (SSSD). SSSD can be used to provide user ID mapping from a number of sources. See https://sssd.io for more information about the SSSD project. kerberos KerberosSpec (Optional) Kerberos configures NFS-Ganesha to secure NFS client connections with Kerberos. NamedBlockPoolSpec ( Appears on: CephBlockPool ) NamedBlockPoolSpec allows a block pool to be created with a non-default name. This is more specific than the NamedPoolSpec so we get schema validation on the allowed pool names that can be specified. Field Description name string (Optional) The desired name of the pool if different from the CephBlockPool CR name. PoolSpec PoolSpec (Members of PoolSpec are embedded into this type.) The core pool configuration NamedPoolSpec ( Appears on: FilesystemSpec ) NamedPoolSpec represents the named ceph pool spec Field Description name string Name of the pool PoolSpec PoolSpec (Members of PoolSpec are embedded into this type.) PoolSpec represents the spec of ceph pool NetworkSpec ( Appears on: ClusterSpec ) NetworkSpec for Ceph includes backward compatibility code Field Description provider string (Optional) Provider is what provides network connectivity to the cluster e.g. \u201chost\u201d or \u201cmultus\u201d selectors map[string]string (Optional) Selectors string values describe what networks will be used to connect the cluster. Meanwhile the keys describe each network respective responsibilities or any metadata storage provider decide. connections ConnectionsSpec (Optional) Settings for network connections such as compression and encryption across the wire. hostNetwork bool (Optional) HostNetwork to enable host network ipFamily IPFamilyType (Optional) IPFamily is the single stack IPv6 or IPv4 protocol dualStack bool (Optional) DualStack determines whether Ceph daemons should listen on both IPv4 and IPv6 multiClusterService MultiClusterServiceSpec (Optional) Enable multiClusterService to export the Services between peer clusters Node ( Appears on: StorageScopeSpec ) Node is a storage nodes Field Description name string (Optional) resources Kubernetes core/v1.ResourceRequirements (Optional) config map[string]string (Optional) Selection Selection (Members of Selection are embedded into this type.) NodesByName ( []github.com/rook/rook/pkg/apis/ceph.rook.io/v1.Node alias) NodesByName implements an interface to sort nodes by name NotificationFilterRule ( Appears on: NotificationFilterSpec ) NotificationFilterRule represent a single rule in the Notification Filter spec Field Description name string Name of the metadata or tag value string Value to filter on NotificationFilterSpec ( Appears on: BucketNotificationSpec ) NotificationFilterSpec represent the spec of a Bucket Notification filter Field Description keyFilters []NotificationKeyFilterRule (Optional) Filters based on the object\u2019s key metadataFilters []NotificationFilterRule (Optional) Filters based on the object\u2019s metadata tagFilters []NotificationFilterRule (Optional) Filters based on the object\u2019s tags NotificationKeyFilterRule ( Appears on: NotificationFilterSpec ) NotificationKeyFilterRule represent a single key rule in the Notification Filter spec Field Description name string Name of the filter - prefix/suffix/regex value string Value to filter on OSDStatus ( Appears on: CephStorage ) OSDStatus represents OSD status of the ceph Cluster Field Description storeType map[string]int StoreType is a mapping between the OSD backend stores and number of OSDs using these stores OSDStore ( Appears on: StorageScopeSpec ) OSDStore is the backend storage type used for creating the OSDs Field Description type string (Optional) Type of backend storage to be used while creating OSDs. If empty, then bluestore will be used updateStore string (Optional) UpdateStore updates the backend store for existing OSDs. It destroys each OSD one at a time, cleans up the backing disk and prepares same OSD on that disk ObjectEndpoints ( Appears on: ObjectStoreStatus ) Field Description insecure []string (Optional) secure []string (Optional) ObjectHealthCheckSpec ( Appears on: ObjectStoreSpec ) ObjectHealthCheckSpec represents the health check of an object store Field Description readinessProbe ProbeSpec (Optional) startupProbe ProbeSpec (Optional) ObjectRealmSpec ( Appears on: CephObjectRealm ) ObjectRealmSpec represent the spec of an ObjectRealm Field Description pull PullSpec ObjectStoreSecuritySpec ( Appears on: ObjectStoreSpec ) ObjectStoreSecuritySpec is spec to define security features like encryption Field Description SecuritySpec SecuritySpec (Optional) s3 KeyManagementServiceSpec (Optional) The settings for supporting AWS-SSE:S3 with RGW ObjectStoreSpec ( Appears on: CephObjectStore ) ObjectStoreSpec represent the spec of a pool Field Description metadataPool PoolSpec (Optional) The metadata pool settings dataPool PoolSpec (Optional) The data pool settings preservePoolsOnDelete bool (Optional) Preserve pools on object store deletion gateway GatewaySpec (Optional) The rgw pod info zone ZoneSpec (Optional) The multisite info healthCheck ObjectHealthCheckSpec (Optional) The RGW health probes security ObjectStoreSecuritySpec (Optional) Security represents security settings ObjectStoreStatus ( Appears on: CephObjectStore ) ObjectStoreStatus represents the status of a Ceph Object Store resource Field Description phase ConditionType (Optional) message string (Optional) endpoints ObjectEndpoints (Optional) info map[string]string (Optional) conditions []Condition observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. ObjectStoreUserSpec ( Appears on: CephObjectStoreUser ) ObjectStoreUserSpec represent the spec of an Objectstoreuser Field Description store string (Optional) The store the user will be created in displayName string (Optional) The display name for the ceph users capabilities ObjectUserCapSpec (Optional) quotas ObjectUserQuotaSpec (Optional) ObjectStoreUserStatus ( Appears on: CephObjectStoreUser ) ObjectStoreUserStatus represents the status Ceph Object Store Gateway User Field Description phase string (Optional) info map[string]string (Optional) observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. ObjectUserCapSpec ( Appears on: ObjectStoreUserSpec ) Additional admin-level capabilities for the Ceph object store user Field Description user string (Optional) Admin capabilities to read/write Ceph object store users. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities users string (Optional) Admin capabilities to read/write Ceph object store users. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities bucket string (Optional) Admin capabilities to read/write Ceph object store buckets. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities buckets string (Optional) Admin capabilities to read/write Ceph object store buckets. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities metadata string (Optional) Admin capabilities to read/write Ceph object store metadata. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities usage string (Optional) Admin capabilities to read/write Ceph object store usage. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities zone string (Optional) Admin capabilities to read/write Ceph object store zones. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities roles string (Optional) Admin capabilities to read/write roles for user. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities info string (Optional) Admin capabilities to read/write information about the user. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities amz-cache string (Optional) Add capabilities for user to send request to RGW Cache API header. Documented in https://docs.ceph.com/en/quincy/radosgw/rgw-cache/#cache-api bilog string (Optional) Add capabilities for user to change bucket index logging. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities mdlog string (Optional) Add capabilities for user to change metadata logging. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities datalog string (Optional) Add capabilities for user to change data logging. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities user-policy string (Optional) Add capabilities for user to change user policies. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities oidc-provider string (Optional) Add capabilities for user to change oidc provider. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities ratelimit string (Optional) Add capabilities for user to set rate limiter for user and bucket. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities ObjectUserQuotaSpec ( Appears on: ObjectStoreUserSpec ) ObjectUserQuotaSpec can be used to set quotas for the object store user to limit their usage. See the Ceph docs for more Field Description maxBuckets int (Optional) Maximum bucket limit for the ceph user maxSize k8s.io/apimachinery/pkg/api/resource.Quantity (Optional) Maximum size limit of all objects across all the user\u2019s buckets See https://pkg.go.dev/k8s.io/apimachinery/pkg/api/resource#Quantity for more info. maxObjects int64 (Optional) Maximum number of objects across all the user\u2019s buckets ObjectZoneGroupSpec ( Appears on: CephObjectZoneGroup ) ObjectZoneGroupSpec represent the spec of an ObjectZoneGroup Field Description realm string The display name for the ceph users ObjectZoneSpec ( Appears on: CephObjectZone ) ObjectZoneSpec represent the spec of an ObjectZone Field Description zoneGroup string The display name for the ceph users metadataPool PoolSpec The metadata pool settings dataPool PoolSpec The data pool settings customEndpoints []string (Optional) If this zone cannot be accessed from other peer Ceph clusters via the ClusterIP Service endpoint created by Rook, you must set this to the externally reachable endpoint(s). You may include the port in the definition. For example: \u201c https://my-object-store.my-domain.net:443\u201d . In many cases, you should set this to the endpoint of the ingress resource that makes the CephObjectStore associated with this CephObjectStoreZone reachable to peer clusters. The list can have one or more endpoints pointing to different RGW servers in the zone. If a CephObjectStore endpoint is omitted from this list, that object store\u2019s gateways will not receive multisite replication data (see CephObjectStore.spec.gateway.disableMultisiteSyncTraffic). preservePoolsOnDelete bool (Optional) Preserve pools on object zone deletion PeerRemoteSpec ( Appears on: FilesystemMirrorInfoPeerSpec ) Field Description client_name string (Optional) ClientName is cephx name cluster_name string (Optional) ClusterName is the name of the cluster fs_name string (Optional) FsName is the filesystem name PeerStatSpec ( Appears on: FilesystemMirrorInfoPeerSpec ) PeerStatSpec are the mirror stat with a given peer Field Description failure_count int (Optional) FailureCount is the number of mirroring failure recovery_count int (Optional) RecoveryCount is the number of recovery attempted after failures PeersSpec ( Appears on: PoolMirroringInfo ) PeersSpec contains peer details Field Description uuid string (Optional) UUID is the peer UUID direction string (Optional) Direction is the peer mirroring direction site_name string (Optional) SiteName is the current site name mirror_uuid string (Optional) MirrorUUID is the mirror UUID client_name string (Optional) ClientName is the CephX user used to connect to the peer Placement ( Appears on: CephCOSIDriverSpec , FilesystemMirroringSpec , GaneshaServerSpec , GatewaySpec , MetadataServerSpec , RBDMirroringSpec , StorageClassDeviceSet ) Placement is the placement for an object Field Description nodeAffinity Kubernetes core/v1.NodeAffinity (Optional) NodeAffinity is a group of node affinity scheduling rules podAffinity Kubernetes core/v1.PodAffinity (Optional) PodAffinity is a group of inter pod affinity scheduling rules podAntiAffinity Kubernetes core/v1.PodAntiAffinity (Optional) PodAntiAffinity is a group of inter pod anti affinity scheduling rules tolerations []Kubernetes core/v1.Toleration (Optional) The pod this Toleration is attached to tolerates any taint that matches the triple using the matching operator topologySpreadConstraints []Kubernetes core/v1.TopologySpreadConstraint (Optional) TopologySpreadConstraint specifies how to spread matching pods among the given topology PlacementSpec ( map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]github.com/rook/rook/pkg/apis/ceph.rook.io/v1.Placement alias) ( Appears on: ClusterSpec ) PlacementSpec is the placement for core ceph daemons part of the CephCluster CRD PoolMirroringInfo ( Appears on: MirroringInfoSpec ) PoolMirroringInfo is the mirroring info of a given pool Field Description mode string (Optional) Mode is the mirroring mode site_name string (Optional) SiteName is the current site name peers []PeersSpec (Optional) Peers are the list of peer sites connected to that cluster PoolMirroringStatus ( Appears on: MirroringStatusSpec ) PoolMirroringStatus is the pool mirror status Field Description summary PoolMirroringStatusSummarySpec (Optional) Summary is the mirroring status summary PoolMirroringStatusSummarySpec ( Appears on: PoolMirroringStatus ) PoolMirroringStatusSummarySpec is the summary output of the command Field Description health string (Optional) Health is the mirroring health daemon_health string (Optional) DaemonHealth is the health of the mirroring daemon image_health string (Optional) ImageHealth is the health of the mirrored image states StatesSpec (Optional) States is the various state for all mirrored images PoolSpec ( Appears on: FilesystemSpec , NamedBlockPoolSpec , NamedPoolSpec , ObjectStoreSpec , ObjectZoneSpec ) PoolSpec represents the spec of ceph pool Field Description failureDomain string (Optional) The failure domain: osd/host/(region or zone if available) - technically also any type in the crush map crushRoot string (Optional) The root of the crush hierarchy utilized by the pool deviceClass string (Optional) The device class the OSD should set to for use in the pool compressionMode string (Optional) DEPRECATED: use Parameters instead, e.g., Parameters[\u201ccompression_mode\u201d] = \u201cforce\u201d The inline compression mode in Bluestore OSD to set to (options are: none, passive, aggressive, force) Do NOT set a default value for kubebuilder as this will override the Parameters replicated ReplicatedSpec (Optional) The replication settings erasureCoded ErasureCodedSpec (Optional) The erasure code settings parameters map[string]string (Optional) Parameters is a list of properties to enable on a given pool enableRBDStats bool EnableRBDStats is used to enable gathering of statistics for all RBD images in the pool mirroring MirroringSpec The mirroring settings statusCheck MirrorHealthCheckSpec The mirroring statusCheck quotas QuotaSpec (Optional) The quota settings PriorityClassNamesSpec ( map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]string alias) ( Appears on: ClusterSpec ) PriorityClassNamesSpec is a map of priority class names to be assigned to components ProbeSpec ( Appears on: MetadataServerSpec , ObjectHealthCheckSpec ) ProbeSpec is a wrapper around Probe so it can be enabled or disabled for a Ceph daemon Field Description disabled bool (Optional) Disabled determines whether probe is disable or not probe Kubernetes core/v1.Probe (Optional) Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. PullSpec ( Appears on: ObjectRealmSpec ) PullSpec represents the pulling specification of a Ceph Object Storage Gateway Realm Field Description endpoint string QuotaSpec ( Appears on: PoolSpec ) QuotaSpec represents the spec for quotas in a pool Field Description maxBytes uint64 (Optional) MaxBytes represents the quota in bytes Deprecated in favor of MaxSize maxSize string (Optional) MaxSize represents the quota in bytes as a string maxObjects uint64 (Optional) MaxObjects represents the quota in objects RBDMirroringSpec ( Appears on: CephRBDMirror ) RBDMirroringSpec represents the specification of an RBD mirror daemon Field Description count int Count represents the number of rbd mirror instance to run peers MirroringPeerSpec (Optional) Peers represents the peers spec placement Placement (Optional) The affinity to place the rgw pods (default is to place on any available node) annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the rbd mirror pods priorityClassName string (Optional) PriorityClassName sets priority class on the rbd mirror pods RGWServiceSpec ( Appears on: GatewaySpec ) RGWServiceSpec represent the spec for RGW service Field Description annotations Annotations The annotations-related configuration to add/set on each rgw service. nullable optional ReplicatedSpec ( Appears on: PoolSpec ) ReplicatedSpec represents the spec for replication in a pool Field Description size uint Size - Number of copies per object in a replicated storage pool, including the object itself (required for replicated pool type) targetSizeRatio float64 (Optional) TargetSizeRatio gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity requireSafeReplicaSize bool (Optional) RequireSafeReplicaSize if false allows you to set replica 1 replicasPerFailureDomain uint (Optional) ReplicasPerFailureDomain the number of replica in the specified failure domain subFailureDomain string (Optional) SubFailureDomain the name of the sub-failure domain hybridStorage HybridStorageSpec (Optional) HybridStorage represents hybrid storage tier settings ResourceSpec ( map[string]k8s.io/api/core/v1.ResourceRequirements alias) ( Appears on: ClusterSpec ) ResourceSpec is a collection of ResourceRequirements that describes the compute resource requirements SSSDSidecar ( Appears on: SSSDSpec ) SSSDSidecar represents configuration when SSSD is run in a sidecar. Field Description image string Image defines the container image that should be used for the SSSD sidecar. sssdConfigFile SSSDSidecarConfigFile (Optional) SSSDConfigFile defines where the SSSD configuration should be sourced from. The config file will be placed into /etc/sssd/sssd.conf . If this is left empty, Rook will not add the file. This allows you to manage the sssd.conf file yourself however you wish. For example, you may build it into your custom Ceph container image or use the Vault agent injector to securely add the file via annotations on the CephNFS spec (passed to the NFS server pods). additionalFiles []SSSDSidecarAdditionalFile (Optional) AdditionalFiles defines any number of additional files that should be mounted into the SSSD sidecar. These files may be referenced by the sssd.conf config file. resources Kubernetes core/v1.ResourceRequirements (Optional) Resources allow specifying resource requests/limits on the SSSD sidecar container. debugLevel int (Optional) DebugLevel sets the debug level for SSSD. If unset or set to 0, Rook does nothing. Otherwise, this may be a value between 1 and 10. See SSSD docs for more info: https://sssd.io/troubleshooting/basics.html#sssd-debug-logs SSSDSidecarAdditionalFile ( Appears on: SSSDSidecar ) SSSDSidecarAdditionalFile represents the source from where additional files for the the SSSD configuration should come from and are made available. Field Description subPath string SubPath defines the sub-path in /etc/sssd/rook-additional/ where the additional file(s) will be placed. Each subPath definition must be unique and must not contain \u2018:\u2019. volumeSource ConfigFileVolumeSource VolumeSource accepts a pared down version of the standard Kubernetes VolumeSource for the additional file(s) like what is normally used to configure Volumes for a Pod. Fore example, a ConfigMap, Secret, or HostPath. Each VolumeSource adds one or more additional files to the SSSD sidecar container in the /etc/sssd/rook-additional/ directory. Be aware that some files may need to have a specific file mode like 0600 due to requirements by SSSD for some files. For example, CA or TLS certificates. SSSDSidecarConfigFile ( Appears on: SSSDSidecar ) SSSDSidecarConfigFile represents the source(s) from which the SSSD configuration should come. Field Description volumeSource ConfigFileVolumeSource VolumeSource accepts a pared down version of the standard Kubernetes VolumeSource for the SSSD configuration file like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. There are two requirements for the source\u2019s content: 1. The config file must be mountable via subPath: sssd.conf . For example, in a ConfigMap, the data item must be named sssd.conf , or items must be defined to select the key and give it path sssd.conf . A HostPath directory must have the sssd.conf file. 2. The volume or config file must have mode 0600. SSSDSpec ( Appears on: NFSSecuritySpec ) SSSDSpec represents configuration for System Security Services Daemon (SSSD). Field Description sidecar SSSDSidecar (Optional) Sidecar tells Rook to run SSSD in a sidecar alongside the NFS-Ganesha server in each NFS pod. SanitizeDataSourceProperty ( string alias) ( Appears on: SanitizeDisksSpec ) SanitizeDataSourceProperty represents a sanitizing data source Value Description \"random\" SanitizeDataSourceRandom uses `shred\u2019s default entropy source \"zero\" SanitizeDataSourceZero uses /dev/zero as sanitize source SanitizeDisksSpec ( Appears on: CleanupPolicySpec ) SanitizeDisksSpec represents a disk sanitizing specification Field Description method SanitizeMethodProperty (Optional) Method is the method we use to sanitize disks dataSource SanitizeDataSourceProperty (Optional) DataSource is the data source to use to sanitize the disk with iteration int32 (Optional) Iteration is the number of pass to apply the sanitizing SanitizeMethodProperty ( string alias) ( Appears on: SanitizeDisksSpec ) SanitizeMethodProperty represents a disk sanitizing method Value Description \"complete\" SanitizeMethodComplete will sanitize everything on the disk \"quick\" SanitizeMethodQuick will sanitize metadata only on the disk SecuritySpec ( Appears on: ClusterSpec , ObjectStoreSecuritySpec ) SecuritySpec is security spec to include various security items such as kms Field Description kms KeyManagementServiceSpec (Optional) KeyManagementService is the main Key Management option keyRotation KeyRotationSpec (Optional) KeyRotation defines options for Key Rotation. Selection ( Appears on: Node , StorageScopeSpec ) Field Description useAllDevices bool (Optional) Whether to consume all the storage devices found on a machine deviceFilter string (Optional) A regular expression to allow more fine-grained selection of devices on nodes across the cluster devicePathFilter string (Optional) A regular expression to allow more fine-grained selection of devices with path names devices []Device (Optional) List of devices to use as storage devices volumeClaimTemplates []Kubernetes core/v1.PersistentVolumeClaim (Optional) PersistentVolumeClaims to use as storage SnapshotSchedule ( Appears on: SnapshotSchedulesSpec ) SnapshotSchedule is a schedule Field Description interval string (Optional) Interval is the interval in which snapshots will be taken start_time string (Optional) StartTime is the snapshot starting time SnapshotScheduleRetentionSpec ( Appears on: FSMirroringSpec ) SnapshotScheduleRetentionSpec is a retention policy Field Description path string (Optional) Path is the path to snapshot duration string (Optional) Duration represents the retention duration for a snapshot SnapshotScheduleSpec ( Appears on: FSMirroringSpec , MirroringSpec ) SnapshotScheduleSpec represents the snapshot scheduling settings of a mirrored pool Field Description path string (Optional) Path is the path to snapshot, only valid for CephFS interval string (Optional) Interval represent the periodicity of the snapshot. startTime string (Optional) StartTime indicates when to start the snapshot SnapshotScheduleStatusSpec ( Appears on: CephBlockPoolStatus ) SnapshotScheduleStatusSpec is the status of the snapshot schedule Field Description snapshotSchedules []SnapshotSchedulesSpec (Optional) SnapshotSchedules is the list of snapshots scheduled lastChecked string (Optional) LastChecked is the last time time the status was checked lastChanged string (Optional) LastChanged is the last time time the status last changed details string (Optional) Details contains potential status errors SnapshotSchedulesSpec ( Appears on: SnapshotScheduleStatusSpec ) SnapshotSchedulesSpec is the list of snapshot scheduled for images in a pool Field Description pool string (Optional) Pool is the pool name namespace string (Optional) Namespace is the RADOS namespace the image is part of image string (Optional) Image is the mirrored image items []SnapshotSchedule (Optional) Items is the list schedules times for a given snapshot StatesSpec ( Appears on: PoolMirroringStatusSummarySpec ) StatesSpec are rbd images mirroring state Field Description starting_replay int (Optional) StartingReplay is when the replay of the mirroring journal starts replaying int (Optional) Replaying is when the replay of the mirroring journal is on-going syncing int (Optional) Syncing is when the image is syncing stopping_replay int (Optional) StopReplaying is when the replay of the mirroring journal stops stopped int (Optional) Stopped is when the mirroring state is stopped unknown int (Optional) Unknown is when the mirroring state is unknown error int (Optional) Error is when the mirroring state is errored Status ( Appears on: CephBucketNotification , CephFilesystemMirror , CephNFS , CephObjectRealm , CephObjectZone , CephObjectZoneGroup , CephRBDMirror ) Status represents the status of an object Field Description phase string (Optional) observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. conditions []Condition StatusConditionGetter A StatusConditionGetter allows getting a pointer to an object\u2019s conditions. StorageClassDeviceSet ( Appears on: StorageScopeSpec ) StorageClassDeviceSet is a storage class device set Field Description name string Name is a unique identifier for the set count int Count is the number of devices in this set resources Kubernetes core/v1.ResourceRequirements (Optional) placement Placement (Optional) preparePlacement Placement (Optional) config map[string]string (Optional) Provider-specific device configuration volumeClaimTemplates []Kubernetes core/v1.PersistentVolumeClaim VolumeClaimTemplates is a list of PVC templates for the underlying storage devices portable bool (Optional) Portable represents OSD portability across the hosts tuneDeviceClass bool (Optional) TuneSlowDeviceClass Tune the OSD when running on a slow Device Class tuneFastDeviceClass bool (Optional) TuneFastDeviceClass Tune the OSD when running on a fast Device Class schedulerName string (Optional) Scheduler name for OSD pod placement encrypted bool (Optional) Whether to encrypt the deviceSet StorageScopeSpec ( Appears on: ClusterSpec ) Field Description nodes []Node (Optional) useAllNodes bool (Optional) onlyApplyOSDPlacement bool (Optional) config map[string]string (Optional) Selection Selection (Members of Selection are embedded into this type.) storageClassDeviceSets []StorageClassDeviceSet (Optional) store OSDStore (Optional) StoreType ( string alias) Value Description \"bluestore\" StoreTypeBlueStore is the bluestore backend storage for OSDs \"bluestore-rdr\" StoreTypeBlueStoreRDR is the bluestore-rdr backed storage for OSDs StretchClusterSpec ( Appears on: MonSpec ) StretchClusterSpec represents the specification of a stretched Ceph Cluster Field Description failureDomainLabel string (Optional) FailureDomainLabel the failure domain name (e,g: zone) subFailureDomain string (Optional) SubFailureDomain is the failure domain within a zone zones []MonZoneSpec (Optional) Zones is the list of zones TopicEndpointSpec ( Appears on: BucketTopicSpec ) TopicEndpointSpec contains exactly one of the endpoint specs of a Bucket Topic Field Description http HTTPEndpointSpec (Optional) Spec of HTTP endpoint amqp AMQPEndpointSpec (Optional) Spec of AMQP endpoint kafka KafkaEndpointSpec (Optional) Spec of Kafka endpoint ZoneSpec ( Appears on: ObjectStoreSpec ) ZoneSpec represents a Ceph Object Store Gateway Zone specification Field Description name string RGW Zone the Object Store is in Generated with gen-crd-api-reference-docs .","title":"Specification"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/","text":"Rook allows creation and customization of storage pools through the custom resource definitions (CRDs). The following settings are available for pools. Examples \u00b6 Replicated \u00b6 For optimal performance, while also adding redundancy, this sample will configure Ceph to make three full copies of the data on multiple nodes. Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because the failureDomain is set to host and the replicated.size is set to 3 . 1 2 3 4 5 6 7 8 9 10 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 deviceClass : hdd Hybrid Storage Pools \u00b6 Hybrid storage is a combination of two different storage tiers. For example, SSD and HDD. This helps to improve the read performance of cluster by placing, say, 1st copy of data on the higher performance tier (SSD or NVME) and remaining replicated copies on lower cost tier (HDDs). WARNING Hybrid storage pools are likely to suffer from lower availability if a node goes down. The data across the two tiers may actually end up on the same node, instead of being spread across unique nodes (or failure domains) as expected. Instead of using hybrid pools, consider configuring primary affinity from the toolbox. 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 hybridStorage : primaryDeviceClass : ssd secondaryDeviceClass : hdd Important The device classes primaryDeviceClass and secondaryDeviceClass must have at least one OSD associated with them or else the pool creation will fail. Erasure Coded \u00b6 This sample will lower the overall storage capacity requirement, while also adding redundancy by using erasure coding . Note This sample requires at least 3 bluestore OSDs . The OSDs can be located on a single Ceph node or spread across multiple nodes, because the failureDomain is set to osd and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : ecpool namespace : rook-ceph spec : failureDomain : osd erasureCoded : dataChunks : 2 codingChunks : 1 deviceClass : hdd High performance applications typically will not use erasure coding due to the performance overhead of creating and distributing the chunks in the cluster. When creating an erasure-coded pool, it is highly recommended to create the pool when you have bluestore OSDs in your cluster (see the OSD configuration settings . Filestore OSDs have limitations that are unsafe and lower performance. Mirroring \u00b6 RADOS Block Device (RBD) mirroring is a process of asynchronous replication of Ceph block device images between two or more Ceph clusters. Mirroring ensures point-in-time consistent replicas of all changes to an image, including reads and writes, block device resizing, snapshots, clones and flattening. It is generally useful when planning for Disaster Recovery. Mirroring is for clusters that are geographically distributed and stretching a single cluster is not possible due to high latencies. The following will enable mirroring of the pool at the image level: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : replicated : size : 3 mirroring : enabled : true mode : image # schedule(s) of snapshot snapshotSchedules : - interval : 24h # daily snapshots startTime : 14:00:00-05:00 Once mirroring is enabled, Rook will by default create its own bootstrap peer token so that it can be used by another cluster. The bootstrap peer token can be found in a Kubernetes Secret. The name of the Secret is present in the Status field of the CephBlockPool CR: 1 2 3 status : info : rbdMirrorBootstrapPeerSecretName : pool-peer-token-replicapool This secret can then be fetched like so: 1 2 kubectl get secret -n rook-ceph pool-peer-token-replicapool -o jsonpath='{.data.token}'|base64 -d eyJmc2lkIjoiOTFlYWUwZGQtMDZiMS00ZDJjLTkxZjMtMTMxMWM5ZGYzODJiIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFEN1psOWZ3V1VGRHhBQWdmY0gyZi8xeUhYeGZDUTU5L1N0NEE9PSIsIm1vbl9ob3N0IjoiW3YyOjEwLjEwMS4xOC4yMjM6MzMwMCx2MToxMC4xMDEuMTguMjIzOjY3ODldIn0= The secret must be decoded. The result will be another base64 encoded blob that you will import in the destination cluster: 1 external-cluster-console # rbd mirror pool peer bootstrap import  See the official rbd mirror documentation on how to add a bootstrap peer . Data spread across subdomains \u00b6 Imagine the following topology with datacenters containing racks and then hosts: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 . \u251c\u2500\u2500 datacenter-1 \u2502 \u251c\u2500\u2500 rack-1 \u2502 \u2502 \u251c\u2500\u2500 host-1 \u2502 \u2502 \u251c\u2500\u2500 host-2 \u2502 \u2514\u2500\u2500 rack-2 \u2502 \u251c\u2500\u2500 host-3 \u2502 \u251c\u2500\u2500 host-4 \u2514\u2500\u2500 datacenter-2 \u251c\u2500\u2500 rack-3 \u2502 \u251c\u2500\u2500 host-5 \u2502 \u251c\u2500\u2500 host-6 \u2514\u2500\u2500 rack-4 \u251c\u2500\u2500 host-7 \u2514\u2500\u2500 host-8 As an administrator I would like to place 4 copies across both datacenter where each copy inside a datacenter is across a rack. This can be achieved by the following: 1 2 3 4 5 6 7 8 9 10 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : replicated : size : 4 replicasPerFailureDomain : 2 subFailureDomain : rack Pool Settings \u00b6 Metadata \u00b6 name : The name of the pool to create. namespace : The namespace of the Rook cluster where the pool is created. Spec \u00b6 replicated : Settings for a replicated pool. If specified, erasureCoded settings must not be specified. size : The desired number of copies to make of the data in the pool. requireSafeReplicaSize : set to false if you want to create a pool with size 1, setting pool size 1 could lead to data loss without recovery. Make sure you are ABSOLUTELY CERTAIN that is what you want. replicasPerFailureDomain : Sets up the number of replicas to place in a given failure domain. For instance, if the failure domain is a datacenter (cluster is stretched) then you will have 2 replicas per datacenter where each replica ends up on a different host. This gives you a total of 4 replicas and for this, the size must be set to 4. The default is 1. subFailureDomain : Name of the CRUSH bucket representing a sub-failure domain. In a stretched configuration this option represent the \"last\" bucket where replicas will end up being written. Imagine the cluster is stretched across two datacenters, you can then have 2 copies per datacenter and each copy on a different CRUSH bucket. The default is \"host\". erasureCoded : Settings for an erasure-coded pool. If specified, replicated settings must not be specified. See below for more details on erasure coding . dataChunks : Number of chunks to divide the original object into codingChunks : Number of coding chunks to generate failureDomain : The failure domain across which the data will be spread. This can be set to a value of either osd or host , with host being the default setting. A failure domain can also be set to a different type (e.g. rack ), if the OSDs are created on nodes with the supported topology labels . If the failureDomain is changed on the pool, the operator will create a new CRUSH rule and update the pool. If a replicated pool of size 3 is configured and the failureDomain is set to host , all three copies of the replicated data will be placed on OSDs located on 3 different Ceph hosts. This case is guaranteed to tolerate a failure of two hosts without a loss of data. Similarly, a failure domain set to osd , can tolerate a loss of two OSD devices. If erasure coding is used, the data and coding chunks are spread across the configured failure domain. Caution Neither Rook, nor Ceph, prevent the creation of a cluster where the replicated data (or Erasure Coded chunks) can be written safely. By design, Ceph will delay checking for suitable OSDs until a write request is made and this write can hang if there are not sufficient OSDs to satisfy the request. deviceClass : Sets up the CRUSH rule for the pool to distribute data only on the specified device class. If left empty or unspecified, the pool will use the cluster's default CRUSH root, which usually distributes data over all OSDs, regardless of their class. If deviceClass is specified on any pool, ensure that it is added to every pool in the cluster, otherwise Ceph will warn about pools with overlapping roots. crushRoot : The root in the crush map to be used by the pool. If left empty or unspecified, the default root will be used. Creating a crush hierarchy for the OSDs currently requires the Rook toolbox to run the Ceph tools described here . enableRBDStats : Enables collecting RBD per-image IO statistics by enabling dynamic OSD performance counters. Defaults to false. For more info see the ceph documentation . name : The name of Ceph pools is based on the metadata.name of the CephBlockPool CR. Some built-in Ceph pools require names that are incompatible with K8s resource names. These special pools can be configured by setting this name to override the name of the Ceph pool that is created instead of using the metadata.name for the pool. Only the following pool names are supported: device_health_metrics , .nfs , and .mgr . See the example builtin mgr pool . parameters : Sets any parameters listed to the given pool target_size_ratio: gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool, for more info see the ceph documentation compression_mode : Sets up the pool for inline compression when using a Bluestore OSD. If left unspecified does not setup any compression mode for the pool. Values supported are the same as Bluestore inline compression modes , such as none , passive , aggressive , and force . mirroring : Sets up mirroring of the pool enabled : whether mirroring is enabled on that pool (default: false) mode : mirroring mode to run, possible values are \"pool\" or \"image\" (required). Refer to the mirroring modes Ceph documentation for more details. snapshotSchedules : schedule(s) snapshot at the pool level. One or more schedules are supported. interval : frequency of the snapshots. The interval can be specified in days, hours, or minutes using d, h, m suffix respectively. startTime : optional, determines at what time the snapshot process starts, specified using the ISO 8601 time format. peers : to configure mirroring peers. See the prerequisite RBD Mirror documentation first. secretNames : a list of peers to connect to. Currently only a single peer is supported where a peer represents a Ceph cluster. statusCheck : Sets up pool mirroring status mirror : displays the mirroring status disabled : whether to enable or disable pool mirroring status interval : time interval to refresh the mirroring status (default 60s) quotas : Set byte and object quotas. See the ceph documentation for more info. maxSize : quota in bytes as a string with quantity suffixes (e.g. \"10Gi\") maxObjects : quota in objects as an integer Note A value of 0 disables the quota. Add specific pool properties \u00b6 With poolProperties you can set any pool property: 1 2 3 spec : parameters :  :  For instance: 1 2 3 spec : parameters : min_size : 1 Erasure Coding \u00b6 Erasure coding allows you to keep your data safe while reducing the storage overhead. Instead of creating multiple replicas of the data, erasure coding divides the original data into chunks of equal size, then generates extra chunks of that same size for redundancy. For example, if you have an object of size 2MB, the simplest erasure coding with two data chunks would divide the object into two chunks of size 1MB each (data chunks). One more chunk (coding chunk) of size 1MB will be generated. In total, 3MB will be stored in the cluster. The object will be able to suffer the loss of any one of the chunks and still be able to reconstruct the original object. The number of data and coding chunks you choose will depend on your resiliency to loss and how much storage overhead is acceptable in your storage cluster. Here are some examples to illustrate how the number of chunks affects the storage and loss toleration. Data chunks (k) Coding chunks (m) Total storage Losses Tolerated OSDs required 2 1 1.5x 1 3 2 2 2x 2 4 4 2 1.5x 2 6 16 4 1.25x 4 20 The failureDomain must be also be taken into account when determining the number of chunks. The failure domain determines the level in the Ceph CRUSH hierarchy where the chunks must be uniquely distributed. This decision will impact whether node losses or disk losses are tolerated. There could also be performance differences of placing the data across nodes or osds. host : All chunks will be placed on unique hosts osd : All chunks will be placed on unique OSDs If you do not have a sufficient number of hosts or OSDs for unique placement the pool can be created, writing to the pool will hang. Rook currently only configures two levels in the CRUSH map. It is also possible to configure other levels such as rack with by adding topology labels to the nodes.","title":"CephBlockPool CRD"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#examples","text":"","title":"Examples"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#replicated","text":"For optimal performance, while also adding redundancy, this sample will configure Ceph to make three full copies of the data on multiple nodes. Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because the failureDomain is set to host and the replicated.size is set to 3 . 1 2 3 4 5 6 7 8 9 10 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 deviceClass : hdd","title":"Replicated"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#hybrid-storage-pools","text":"Hybrid storage is a combination of two different storage tiers. For example, SSD and HDD. This helps to improve the read performance of cluster by placing, say, 1st copy of data on the higher performance tier (SSD or NVME) and remaining replicated copies on lower cost tier (HDDs). WARNING Hybrid storage pools are likely to suffer from lower availability if a node goes down. The data across the two tiers may actually end up on the same node, instead of being spread across unique nodes (or failure domains) as expected. Instead of using hybrid pools, consider configuring primary affinity from the toolbox. 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 hybridStorage : primaryDeviceClass : ssd secondaryDeviceClass : hdd Important The device classes primaryDeviceClass and secondaryDeviceClass must have at least one OSD associated with them or else the pool creation will fail.","title":"Hybrid Storage Pools"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#erasure-coded","text":"This sample will lower the overall storage capacity requirement, while also adding redundancy by using erasure coding . Note This sample requires at least 3 bluestore OSDs . The OSDs can be located on a single Ceph node or spread across multiple nodes, because the failureDomain is set to osd and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : ecpool namespace : rook-ceph spec : failureDomain : osd erasureCoded : dataChunks : 2 codingChunks : 1 deviceClass : hdd High performance applications typically will not use erasure coding due to the performance overhead of creating and distributing the chunks in the cluster. When creating an erasure-coded pool, it is highly recommended to create the pool when you have bluestore OSDs in your cluster (see the OSD configuration settings . Filestore OSDs have limitations that are unsafe and lower performance.","title":"Erasure Coded"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#mirroring","text":"RADOS Block Device (RBD) mirroring is a process of asynchronous replication of Ceph block device images between two or more Ceph clusters. Mirroring ensures point-in-time consistent replicas of all changes to an image, including reads and writes, block device resizing, snapshots, clones and flattening. It is generally useful when planning for Disaster Recovery. Mirroring is for clusters that are geographically distributed and stretching a single cluster is not possible due to high latencies. The following will enable mirroring of the pool at the image level: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : replicated : size : 3 mirroring : enabled : true mode : image # schedule(s) of snapshot snapshotSchedules : - interval : 24h # daily snapshots startTime : 14:00:00-05:00 Once mirroring is enabled, Rook will by default create its own bootstrap peer token so that it can be used by another cluster. The bootstrap peer token can be found in a Kubernetes Secret. The name of the Secret is present in the Status field of the CephBlockPool CR: 1 2 3 status : info : rbdMirrorBootstrapPeerSecretName : pool-peer-token-replicapool This secret can then be fetched like so: 1 2 kubectl get secret -n rook-ceph pool-peer-token-replicapool -o jsonpath='{.data.token}'|base64 -d eyJmc2lkIjoiOTFlYWUwZGQtMDZiMS00ZDJjLTkxZjMtMTMxMWM5ZGYzODJiIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFEN1psOWZ3V1VGRHhBQWdmY0gyZi8xeUhYeGZDUTU5L1N0NEE9PSIsIm1vbl9ob3N0IjoiW3YyOjEwLjEwMS4xOC4yMjM6MzMwMCx2MToxMC4xMDEuMTguMjIzOjY3ODldIn0= The secret must be decoded. The result will be another base64 encoded blob that you will import in the destination cluster: 1 external-cluster-console # rbd mirror pool peer bootstrap import  See the official rbd mirror documentation on how to add a bootstrap peer .","title":"Mirroring"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#data-spread-across-subdomains","text":"Imagine the following topology with datacenters containing racks and then hosts: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 . \u251c\u2500\u2500 datacenter-1 \u2502 \u251c\u2500\u2500 rack-1 \u2502 \u2502 \u251c\u2500\u2500 host-1 \u2502 \u2502 \u251c\u2500\u2500 host-2 \u2502 \u2514\u2500\u2500 rack-2 \u2502 \u251c\u2500\u2500 host-3 \u2502 \u251c\u2500\u2500 host-4 \u2514\u2500\u2500 datacenter-2 \u251c\u2500\u2500 rack-3 \u2502 \u251c\u2500\u2500 host-5 \u2502 \u251c\u2500\u2500 host-6 \u2514\u2500\u2500 rack-4 \u251c\u2500\u2500 host-7 \u2514\u2500\u2500 host-8 As an administrator I would like to place 4 copies across both datacenter where each copy inside a datacenter is across a rack. This can be achieved by the following: 1 2 3 4 5 6 7 8 9 10 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : replicated : size : 4 replicasPerFailureDomain : 2 subFailureDomain : rack","title":"Data spread across subdomains"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#pool-settings","text":"","title":"Pool Settings"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#metadata","text":"name : The name of the pool to create. namespace : The namespace of the Rook cluster where the pool is created.","title":"Metadata"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#spec","text":"replicated : Settings for a replicated pool. If specified, erasureCoded settings must not be specified. size : The desired number of copies to make of the data in the pool. requireSafeReplicaSize : set to false if you want to create a pool with size 1, setting pool size 1 could lead to data loss without recovery. Make sure you are ABSOLUTELY CERTAIN that is what you want. replicasPerFailureDomain : Sets up the number of replicas to place in a given failure domain. For instance, if the failure domain is a datacenter (cluster is stretched) then you will have 2 replicas per datacenter where each replica ends up on a different host. This gives you a total of 4 replicas and for this, the size must be set to 4. The default is 1. subFailureDomain : Name of the CRUSH bucket representing a sub-failure domain. In a stretched configuration this option represent the \"last\" bucket where replicas will end up being written. Imagine the cluster is stretched across two datacenters, you can then have 2 copies per datacenter and each copy on a different CRUSH bucket. The default is \"host\". erasureCoded : Settings for an erasure-coded pool. If specified, replicated settings must not be specified. See below for more details on erasure coding . dataChunks : Number of chunks to divide the original object into codingChunks : Number of coding chunks to generate failureDomain : The failure domain across which the data will be spread. This can be set to a value of either osd or host , with host being the default setting. A failure domain can also be set to a different type (e.g. rack ), if the OSDs are created on nodes with the supported topology labels . If the failureDomain is changed on the pool, the operator will create a new CRUSH rule and update the pool. If a replicated pool of size 3 is configured and the failureDomain is set to host , all three copies of the replicated data will be placed on OSDs located on 3 different Ceph hosts. This case is guaranteed to tolerate a failure of two hosts without a loss of data. Similarly, a failure domain set to osd , can tolerate a loss of two OSD devices. If erasure coding is used, the data and coding chunks are spread across the configured failure domain. Caution Neither Rook, nor Ceph, prevent the creation of a cluster where the replicated data (or Erasure Coded chunks) can be written safely. By design, Ceph will delay checking for suitable OSDs until a write request is made and this write can hang if there are not sufficient OSDs to satisfy the request. deviceClass : Sets up the CRUSH rule for the pool to distribute data only on the specified device class. If left empty or unspecified, the pool will use the cluster's default CRUSH root, which usually distributes data over all OSDs, regardless of their class. If deviceClass is specified on any pool, ensure that it is added to every pool in the cluster, otherwise Ceph will warn about pools with overlapping roots. crushRoot : The root in the crush map to be used by the pool. If left empty or unspecified, the default root will be used. Creating a crush hierarchy for the OSDs currently requires the Rook toolbox to run the Ceph tools described here . enableRBDStats : Enables collecting RBD per-image IO statistics by enabling dynamic OSD performance counters. Defaults to false. For more info see the ceph documentation . name : The name of Ceph pools is based on the metadata.name of the CephBlockPool CR. Some built-in Ceph pools require names that are incompatible with K8s resource names. These special pools can be configured by setting this name to override the name of the Ceph pool that is created instead of using the metadata.name for the pool. Only the following pool names are supported: device_health_metrics , .nfs , and .mgr . See the example builtin mgr pool . parameters : Sets any parameters listed to the given pool target_size_ratio: gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool, for more info see the ceph documentation compression_mode : Sets up the pool for inline compression when using a Bluestore OSD. If left unspecified does not setup any compression mode for the pool. Values supported are the same as Bluestore inline compression modes , such as none , passive , aggressive , and force . mirroring : Sets up mirroring of the pool enabled : whether mirroring is enabled on that pool (default: false) mode : mirroring mode to run, possible values are \"pool\" or \"image\" (required). Refer to the mirroring modes Ceph documentation for more details. snapshotSchedules : schedule(s) snapshot at the pool level. One or more schedules are supported. interval : frequency of the snapshots. The interval can be specified in days, hours, or minutes using d, h, m suffix respectively. startTime : optional, determines at what time the snapshot process starts, specified using the ISO 8601 time format. peers : to configure mirroring peers. See the prerequisite RBD Mirror documentation first. secretNames : a list of peers to connect to. Currently only a single peer is supported where a peer represents a Ceph cluster. statusCheck : Sets up pool mirroring status mirror : displays the mirroring status disabled : whether to enable or disable pool mirroring status interval : time interval to refresh the mirroring status (default 60s) quotas : Set byte and object quotas. See the ceph documentation for more info. maxSize : quota in bytes as a string with quantity suffixes (e.g. \"10Gi\") maxObjects : quota in objects as an integer Note A value of 0 disables the quota.","title":"Spec"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#add-specific-pool-properties","text":"With poolProperties you can set any pool property: 1 2 3 spec : parameters :  :  For instance: 1 2 3 spec : parameters : min_size : 1","title":"Add specific pool properties"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#erasure-coding","text":"Erasure coding allows you to keep your data safe while reducing the storage overhead. Instead of creating multiple replicas of the data, erasure coding divides the original data into chunks of equal size, then generates extra chunks of that same size for redundancy. For example, if you have an object of size 2MB, the simplest erasure coding with two data chunks would divide the object into two chunks of size 1MB each (data chunks). One more chunk (coding chunk) of size 1MB will be generated. In total, 3MB will be stored in the cluster. The object will be able to suffer the loss of any one of the chunks and still be able to reconstruct the original object. The number of data and coding chunks you choose will depend on your resiliency to loss and how much storage overhead is acceptable in your storage cluster. Here are some examples to illustrate how the number of chunks affects the storage and loss toleration. Data chunks (k) Coding chunks (m) Total storage Losses Tolerated OSDs required 2 1 1.5x 1 3 2 2 2x 2 4 4 2 1.5x 2 6 16 4 1.25x 4 20 The failureDomain must be also be taken into account when determining the number of chunks. The failure domain determines the level in the Ceph CRUSH hierarchy where the chunks must be uniquely distributed. This decision will impact whether node losses or disk losses are tolerated. There could also be performance differences of placing the data across nodes or osds. host : All chunks will be placed on unique hosts osd : All chunks will be placed on unique OSDs If you do not have a sufficient number of hosts or OSDs for unique placement the pool can be created, writing to the pool will hang. Rook currently only configures two levels in the CRUSH map. It is also possible to configure other levels such as rack with by adding topology labels to the nodes.","title":"Erasure Coding"},{"location":"CRDs/Block-Storage/ceph-block-pool-rados-namespace-crd/","text":"This guide assumes you have created a Rook cluster as explained in the main Quickstart guide RADOS currently uses pools both for data distribution (pools are shared into PGs, which map to OSDs) and as the granularity for security (capabilities can restrict access by pool). Overloading pools for both purposes makes it hard to do multi-tenancy because it not a good idea to have a very large number of pools. A namespace would be a division of a pool into separate logical namespaces. For more information about BlockPool and namespace refer to the Ceph docs Having multiple namespaces in a pool would allow multiple Kubernetes clusters to share one unique ceph cluster without creating a pool per kubernetes cluster and it will also allow to have tenant isolation between multiple tenants in a single Kubernetes cluster without creating multiple pools for tenants. Rook allows creation of Ceph BlockPool RadosNamespaces through the custom resource definitions (CRDs). Example \u00b6 To get you started, here is a simple example of a CR to create a CephBlockPoolRadosNamespace on the CephBlockPool \"replicapool\". 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephBlockPoolRadosNamespace metadata : name : namespace-a namespace : rook-ceph # namespace:cluster spec : # The name of the CephBlockPool CR where the namespace is created. blockPoolName : replicapool Settings \u00b6 If any setting is unspecified, a suitable default will be used automatically. Metadata \u00b6 name : The name that will be used for the Ceph BlockPool rados namespace. Spec \u00b6 blockPoolName : The metadata name of the CephBlockPool CR where the rados namespace will be created.","title":"CephBlockPoolRados Namespace CRD"},{"location":"CRDs/Block-Storage/ceph-block-pool-rados-namespace-crd/#example","text":"To get you started, here is a simple example of a CR to create a CephBlockPoolRadosNamespace on the CephBlockPool \"replicapool\". 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephBlockPoolRadosNamespace metadata : name : namespace-a namespace : rook-ceph # namespace:cluster spec : # The name of the CephBlockPool CR where the namespace is created. blockPoolName : replicapool","title":"Example"},{"location":"CRDs/Block-Storage/ceph-block-pool-rados-namespace-crd/#settings","text":"If any setting is unspecified, a suitable default will be used automatically.","title":"Settings"},{"location":"CRDs/Block-Storage/ceph-block-pool-rados-namespace-crd/#metadata","text":"name : The name that will be used for the Ceph BlockPool rados namespace.","title":"Metadata"},{"location":"CRDs/Block-Storage/ceph-block-pool-rados-namespace-crd/#spec","text":"blockPoolName : The metadata name of the CephBlockPool CR where the rados namespace will be created.","title":"Spec"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/","text":"Rook allows creation and updating rbd-mirror daemon(s) through the custom resource definitions (CRDs). RBD images can be asynchronously mirrored between two Ceph clusters. For more information about user management and capabilities see the Ceph docs . Creating daemons \u00b6 To get you started, here is a simple example of a CRD to deploy an rbd-mirror daemon. 1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephRBDMirror metadata : name : my-rbd-mirror namespace : rook-ceph spec : count : 1 Prerequisites \u00b6 This guide assumes you have created a Rook cluster as explained in the main Quickstart guide Settings \u00b6 If any setting is unspecified, a suitable default will be used automatically. RBDMirror metadata \u00b6 name : The name that will be used for the Ceph RBD Mirror daemon. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace. RBDMirror Settings \u00b6 count : The number of rbd mirror instance to run. placement : The rbd mirror pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD .. annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. resources : The resource requirements for the rbd mirror pods. priorityClassName : The priority class to set on the rbd mirror pods. Configuring mirroring peers \u00b6 Configure mirroring peers individually for each CephBlockPool. Refer to the CephBlockPool documentation for more detail.","title":"CephRBDMirror CRD"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#creating-daemons","text":"To get you started, here is a simple example of a CRD to deploy an rbd-mirror daemon. 1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephRBDMirror metadata : name : my-rbd-mirror namespace : rook-ceph spec : count : 1","title":"Creating daemons"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#prerequisites","text":"This guide assumes you have created a Rook cluster as explained in the main Quickstart guide","title":"Prerequisites"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#settings","text":"If any setting is unspecified, a suitable default will be used automatically.","title":"Settings"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#rbdmirror-metadata","text":"name : The name that will be used for the Ceph RBD Mirror daemon. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace.","title":"RBDMirror metadata"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#rbdmirror-settings","text":"count : The number of rbd mirror instance to run. placement : The rbd mirror pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD .. annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. resources : The resource requirements for the rbd mirror pods. priorityClassName : The priority class to set on the rbd mirror pods.","title":"RBDMirror Settings"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#configuring-mirroring-peers","text":"Configure mirroring peers individually for each CephBlockPool. Refer to the CephBlockPool documentation for more detail.","title":"Configuring mirroring peers"},{"location":"CRDs/Cluster/ceph-cluster-crd/","text":"Rook allows creation and customization of storage clusters through the custom resource definitions (CRDs). There are primarily four different modes in which to create your cluster. Host Storage Cluster : Consume storage from host paths and raw devices PVC Storage Cluster : Dynamically provision storage underneath Rook by specifying the storage class Rook should use to consume storage (via PVCs) Stretched Storage Cluster : Distribute Ceph mons across three zones, while storage (OSDs) is only configured in two zones External Ceph Cluster : Connect your K8s applications to an external Ceph cluster See the separate topics for a description and examples of each of these scenarios. Settings \u00b6 Settings can be specified at the global level to apply to the cluster as a whole, while other settings can be specified at more fine-grained levels. If any setting is unspecified, a suitable default will be used automatically. Cluster metadata \u00b6 name : The name that will be used internally for the Ceph cluster. Most commonly the name is the same as the namespace since multiple clusters are not supported in the same namespace. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace. The common scenario is to create a single Rook cluster. If multiple clusters are created, they must not have conflicting devices or host paths. Cluster Settings \u00b6 external : enable : if true , the cluster will not be managed by Rook but via an external entity. This mode is intended to connect to an existing cluster. In this case, Rook will only consume the external cluster. However, Rook will be able to deploy various daemons in Kubernetes such as object gateways, mds and nfs if an image is provided and will refuse otherwise. If this setting is enabled all the other options will be ignored except cephVersion.image and dataDirHostPath . See external cluster configuration . If cephVersion.image is left blank, Rook will refuse the creation of extra CRs like object, file and nfs. cephVersion : The version information for launching the ceph daemons. image : The image used for running the ceph daemons. For example, quay.io/ceph/ceph:v16.2.11 or v17.2.6 . For more details read the container images section . For the latest ceph images, see the Ceph DockerHub . To ensure a consistent version of the image is running across all nodes in the cluster, it is recommended to use a very specific image version. Tags also exist that would give the latest version, but they are only recommended for test environments. For example, the tag v17 will be updated each time a new Quincy build is released. Using the v17 tag is not recommended in production because it may lead to inconsistent versions of the image running across different nodes in the cluster. allowUnsupported : If true , allow an unsupported major version of the Ceph release. Currently pacific and quincy are supported. Future versions such as reef (v18) would require this to be set to true . Should be set to false in production. imagePullPolicy : The image pull policy for the ceph daemon pods. Possible values are Always , IfNotPresent , and Never . The default is IfNotPresent . dataDirHostPath : The path on the host ( hostPath ) where config and data should be stored for each of the services. If the directory does not exist, it will be created. Because this directory persists on the host, it will remain after pods are deleted. Following paths and any of their subpaths must not be used : /etc/ceph , /rook or /var/log/ceph . WARNING : For test scenarios, if you delete a cluster and start a new cluster on the same hosts, the path used by dataDirHostPath must be deleted. Otherwise, stale keys and other config will remain from the previous cluster and the new mons will fail to start. If this value is empty, each pod will get an ephemeral directory to store their config files that is tied to the lifetime of the pod running on that node. More details can be found in the Kubernetes empty dir docs . skipUpgradeChecks : if set to true Rook won't perform any upgrade checks on Ceph daemons during an upgrade. Use this at YOUR OWN RISK , only if you know what you're doing. To understand Rook's upgrade process of Ceph, read the upgrade doc . continueUpgradeAfterChecksEvenIfNotHealthy : if set to true Rook will continue the OSD daemon upgrade process even if the PGs are not clean, or continue with the MDS upgrade even the file system is not healthy. dashboard : Settings for the Ceph dashboard. To view the dashboard in your browser see the dashboard guide . enabled : Whether to enable the dashboard to view cluster status urlPrefix : Allows to serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy) port : Allows to change the default port where the dashboard is served ssl : Whether to serve the dashboard via SSL, ignored on Ceph versions older than 13.2.2 monitoring : Settings for monitoring Ceph using Prometheus. To enable monitoring on your cluster see the monitoring guide . enabled : Whether to enable the prometheus service monitor for an internal cluster. For an external cluster, whether to create an endpoint port for the metrics. Default is false. metricsDisabled : Whether to disable the metrics reported by Ceph. If false, the prometheus mgr module and Ceph exporter are enabled. If true, the prometheus mgr module and Ceph exporter are both disabled. Default is false. externalMgrEndpoints : external cluster manager endpoints externalMgrPrometheusPort : external prometheus manager module port. See external cluster configuration for more details. port : The internal prometheus manager module port where the prometheus mgr module listens. The port may need to be configured when host networking is enabled. interval : The interval for the prometheus module to to scrape targets. network : For the network settings for the cluster, refer to the network configuration settings mon : contains mon related options mon settings For more details on the mons and when to choose a number other than 3 , see the mon health doc . mgr : manager top level section count : set number of ceph managers between 1 to 2 . The default value is 2. If there are two managers, it is important for all mgr services point to the active mgr and not the standby mgr. Rook automatically updates the label mgr_role on the mgr pods to be either active or standby . Therefore, services need just to add the label mgr_role=active to their selector to point to the active mgr. This applies to all services that rely on the ceph mgr such as the dashboard or the prometheus metrics collector. modules : is the list of Ceph manager modules to enable crashCollector : The settings for crash collector daemon(s). disable : is set to true , the crash collector will not run on any node where a Ceph daemon runs daysToRetain : specifies the number of days to keep crash entries in the Ceph cluster. By default the entries are kept indefinitely. logCollector : The settings for log collector daemon. enabled : if set to true , the log collector will run as a side-car next to each Ceph daemon. The Ceph configuration option log_to_file will be turned on, meaning Ceph daemons will log on files in addition to still logging to container's stdout. These logs will be rotated. In case a daemon terminates with a segfault, the coredump files will be commonly be generated in /var/lib/systemd/coredump directory on the host, depending on the underlying OS location. (default: true ) periodicity : how often to rotate daemon's log. (default: 24h). Specified with a time suffix which may be h for hours or d for days. Rotating too often will slightly impact the daemon's performance since the signal briefly interrupts the program. annotations : annotations configuration settings labels : labels configuration settings placement : placement configuration settings resources : resources configuration settings priorityClassNames : priority class names configuration settings storage : Storage selection and configuration that will be used across the cluster. Note that these settings can be overridden for specific nodes. useAllNodes : true or false , indicating if all nodes in the cluster should be used for storage according to the cluster level storage selection and configuration values. If individual nodes are specified under the nodes field, then useAllNodes must be set to false . nodes : Names of individual nodes in the cluster that should have their storage included in accordance with either the cluster level configuration specified above or any node specific overrides described in the next section below. useAllNodes must be set to false to use specific nodes and their config. See node settings below. config : Config settings applied to all OSDs on the node unless overridden by devices . See the config settings below. storage selection settings Storage Class Device Sets onlyApplyOSDPlacement : Whether the placement specific for OSDs is merged with the all placement. If false , the OSD placement will be merged with the all placement. If true, the OSD placement will be applied and the all placement will be ignored. The placement for OSDs is computed from several different places depending on the type of OSD: For non-PVCs: placement.all and placement.osd For PVCs: placement.all and inside the storageClassDeviceSets from the placement or preparePlacement disruptionManagement : The section for configuring management of daemon disruptions managePodBudgets : if true , the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically via the strategy outlined in the design . The operator will block eviction of OSDs by default and unblock them safely when drains are detected. osdMaintenanceTimeout : is a duration in minutes that determines how long an entire failureDomain like region/zone/host will be held in noout (in addition to the default DOWN/OUT interval) when it is draining. This is only relevant when managePodBudgets is true . The default value is 30 minutes. removeOSDsIfOutAndSafeToRemove : If true the operator will remove the OSDs that are down and whose data has been restored to other OSDs. In Ceph terms, the OSDs are out and safe-to-destroy when they are removed. cleanupPolicy : cleanup policy settings security : security page for key management configuration Ceph container images \u00b6 Official releases of Ceph Container images are available from Docker Hub . These are general purpose Ceph container with all necessary daemons and dependencies installed. TAG MEANING vRELNUM Latest release in this series (e.g., v17 = Quincy) vRELNUM.Y Latest stable release in this stable series (e.g., v17.2) vRELNUM.Y.Z A specific release (e.g., v17.2.6) vRELNUM.Y.Z-YYYYMMDD A specific build (e.g., v17.2.6-20230410) A specific will contain a specific release of Ceph as well as security fixes from the Operating System. Mon Settings \u00b6 count : Set the number of mons to be started. The number must be between 1 and 9 . The recommended value is most commonly 3 . For highest availability, an odd number of mons should be specified. For higher durability in case of mon loss, an even number can be specified although availability may be lower. To maintain quorum a majority of mons must be up. For example, if there are three mons, two must be up. If there are four mons, three must be up. If there are two mons, both must be up. If quorum is lost, see the disaster recovery guide to restore quorum from a single mon. allowMultiplePerNode : Whether to allow the placement of multiple mons on a single node. Default is false for production. Should only be set to true in test environments. volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . failureDomainLabel : The label that is expected on each node where the mons are expected to be deployed. The labels must be found in the list of well-known topology labels . zones : The failure domain names where the Mons are expected to be deployed. There must be at least three zones specified in the list. Each zone can be backed by a different storage class by specifying the volumeClaimTemplate . name : The name of the zone, which is the value of the domain label. volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . stretchCluster : The stretch cluster settings that define the zones (or other failure domain labels) across which to configure the cluster. failureDomainLabel : The label that is expected on each node where the cluster is expected to be deployed. The labels must be found in the list of well-known topology labels . subFailureDomain : With a zone, the data replicas must be spread across OSDs in the subFailureDomain. The default is host . zones : The failure domain names where the Mons and OSDs are expected to be deployed. There must be three zones specified in the list. This element is always named zone even if a non-default failureDomainLabel is specified. The elements have two values: name : The name of the zone, which is the value of the domain label. arbiter : Whether the zone is expected to be the arbiter zone which only runs a single mon. Exactly one zone must be labeled true . volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . The two zones that are not the arbiter zone are expected to have OSDs deployed. If these settings are changed in the CRD the operator will update the number of mons during a periodic check of the mon health, which by default is every 45 seconds. To change the defaults that the operator uses to determine the mon health and whether to failover a mon, refer to the health settings . The intervals should be small enough that you have confidence the mons will maintain quorum, while also being long enough to ignore network blips where mons are failed over too often. Mgr Settings \u00b6 You can use the cluster CR to enable or disable any manager module. This can be configured like so: 1 2 3 4 mgr : modules : - name :  enabled : true Some modules will have special configuration to ensure the module is fully functional after being enabled. Specifically: pg_autoscaler : Rook will configure all new pools with PG autoscaling by setting: osd_pool_default_pg_autoscale_mode = on Network Configuration Settings \u00b6 If not specified, the default SDN will be used. Configure the network that will be enabled for the cluster and services. provider : Specifies the network provider that will be used to connect the network interface. You can choose between host , and multus . selectors : List the network selector(s) that will be used associated by a key. ipFamily : Specifies the network stack Ceph daemons should listen on. dualStack : Specifies that Ceph daemon should listen on both IPv4 and IPv6 network stacks. connections : Settings for network connections using Ceph's msgr2 protocol requireMsgr2 : Whether to require communication over msgr2. If true, the msgr v1 port (6789) will be disabled and clients will be required to connect to the Ceph cluster with the v2 port (3300). Requires a kernel that supports msgr2 (kernel 5.11 or CentOS 8.4 or newer). Default is false. encryption : Settings for encryption on the wire to Ceph daemons enabled : Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network. The default is false. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted. When encryption is not enabled, clients still establish a strong initial authentication and data integrity is still validated with a crc check. IMPORTANT : Encryption requires the 5.11 kernel for the latest nbd and cephfs drivers. Alternatively for testing only, set \"mounter: rbd-nbd\" in the rbd storage class, or \"mounter: fuse\" in the cephfs storage class. The nbd and fuse drivers are not recommended in production since restarting the csi driver pod will disconnect the volumes. If this setting is enabled, CephFS volumes also require setting CSI_CEPHFS_KERNEL_MOUNT_OPTIONS to \"ms_mode=secure\" in operator.yaml. compression : enabled : Whether to compress the data in transit across the wire. The default is false. Requires Ceph Quincy (v17) or newer. Also see the kernel requirements above for encryption. Caution Changing networking configuration after a Ceph cluster has been deployed is NOT supported and will result in a non-functioning cluster. Host Networking \u00b6 To use host networking, set provider: host . If the host networking setting is changed in a cluster where mons are already running, the existing mons will remain running with the same network settings with which they were created. To complete the conversion to or from host networking after you update this setting, you will need to failover the mons in order to have mons on the desired network configuration. Multus \u00b6 Rook supports addition of public and cluster network for ceph using Multus The selector keys are required to be public and cluster where each represent: public : client communications with the cluster (reads/writes) cluster : internal Ceph replication network If you want to learn more, please read: Ceph Networking reference . Multus documentation Based on the configuration, the operator will do the following: If only the public selector is specified, all communication will happen on that network 1 2 3 4 network : provider : multus selectors : public : rook-ceph/rook-public-nw If only the cluster selector is specified, the internal cluster traffic* will happen on that network. All other traffic to mons, OSDs, and other daemons will be on the default network. 1 2 3 4 network : provider : multus selectors : cluster : rook-ceph/rook-cluster-nw If both public and cluster selectors are specified the first one will run all the communication network and the second the internal cluster network* 1 2 3 4 5 network : provider : multus selectors : public : rook-ceph/rook-public-nw cluster : rook-ceph/rook-cluster-nw * Internal cluster traffic includes OSD heartbeats, data replication, and data recovery Only OSD pods will have both Public and Cluster networks attached. The rest of the Ceph component pods and CSI pods will only have the Public network attached. Rook Ceph operator will not have any networks attached as it proxies the required commands via a sidecar container in the mgr pod. In order to work, each selector value must match a NetworkAttachmentDefinition object name in Multus. For multus network provider, an already working cluster with Multus networking is required. Network attachment definition that later will be attached to the cluster needs to be created before the Cluster CRD. The Network attachment definitions should be using whereabouts cni. If Rook cannot find the provided Network attachment definition it will fail running the Ceph OSD pods. You can add the Multus network attachment selection annotation selecting the created network attachment definition on selectors . A valid NetworkAttachmentDefinition will look like following: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 apiVersion : \"k8s.cni.cncf.io/v1\" kind : NetworkAttachmentDefinition metadata : name : rook-public-nw spec : config : '{ \"cniVersion\": \"0.3.0\", \"name\": \"public-nad\", \"type\": \"macvlan\", \"master\": \"ens5\", \"mode\": \"bridge\", \"ipam\": { \"type\": \"whereabouts\", \"range\": \"192.168.1.0/24\" } }' Ensure that master matches the network interface of the host that you want to use. IPAM type whereabouts is required because it makes sure that all the pods get a unique IP address from the multus network. The NetworkAttachmentDefinition should be referenced along with the namespace in which it is present like public: / . e.g., the network attachment definition are in default namespace: 1 2 public : default/rook-public-nw cluster : default/rook-cluster-nw 1 2 * This format is required in order to use the NetworkAttachmentDefinition across namespaces. * In Openshift, to use a NetworkAttachmentDefinition (NAD) across namespaces, the NAD must be deployed in the `default` namespace. The NAD is then referenced with the namespace: `default/rook-public-nw` Validating Multus configuration \u00b6 We highly recommend validating your Multus configuration before you install Rook. A tool exists to facilitate validating the Multus configuration. After installing the Rook operator and before installing any Custom Resources, run the tool from the operator pod. The tool's CLI is designed to be as helpful as possible. Get help text for the multus validation tool like so: 1 kubectl --namespace rook-ceph exec -it deploy/rook-ceph-operator -- rook multus validation run --help Then, update the args in the multus-validation job template. Minimally, add the NAD names(s) for public and/or cluster as needed and and then, create the job to validate the Multus configuration. If the tool fails, it will suggest what things may be preventing Multus networks from working properly, and it will request the logs and outputs that will help debug issues. Check the logs of the pod created by the job to know the status of the validation test. Known limitations with Multus \u00b6 Daemons leveraging Kubernetes service IPs (Monitors, Managers, Rados Gateways) are not listening on the NAD specified in the selectors . Instead the daemon listens on the default network, however the NAD is attached to the container, allowing the daemon to communicate with the rest of the cluster. There is work in progress to fix this issue in the multus-service repository. At the time of writing it's unclear when this will be supported. IPFamily \u00b6 Provide single-stack IPv4 or IPv6 protocol to assign corresponding addresses to pods and services. This field is optional. Possible inputs are IPv6 and IPv4. Empty value will be treated as IPv4. Kubernetes version should be at least v1.13 to run IPv6. Dual-stack is supported as of ceph Pacific. To turn on dual stack see the network configuration section . Node Settings \u00b6 In addition to the cluster level settings specified above, each individual node can also specify configuration to override the cluster level settings and defaults. If a node does not specify any configuration then it will inherit the cluster level settings. name : The name of the node, which should match its kubernetes.io/hostname label. config : Config settings applied to all OSDs on the node unless overridden by devices . See the config settings below. storage selection settings When useAllNodes is set to true , Rook attempts to make Ceph cluster management as hands-off as possible while still maintaining reasonable data safety. If a usable node comes online, Rook will begin to use it automatically. To maintain a balance between hands-off usability and data safety, Nodes are removed from Ceph as OSD hosts only (1) if the node is deleted from Kubernetes itself or (2) if the node has its taints or affinities modified in such a way that the node is no longer usable by Rook. Any changes to taints or affinities, intentional or unintentional, may affect the data reliability of the Ceph cluster. In order to help protect against this somewhat, deletion of nodes by taint or affinity modifications must be \"confirmed\" by deleting the Rook Ceph operator pod and allowing the operator deployment to restart the pod. For production clusters, we recommend that useAllNodes is set to false to prevent the Ceph cluster from suffering reduced data reliability unintentionally due to a user mistake. When useAllNodes is set to false , Rook relies on the user to be explicit about when nodes are added to or removed from the Ceph cluster. Nodes are only added to the Ceph cluster if the node is added to the Ceph cluster resource. Similarly, nodes are only removed if the node is removed from the Ceph cluster resource. Node Updates \u00b6 Nodes can be added and removed over time by updating the Cluster CRD, for example with kubectl -n rook-ceph edit cephcluster rook-ceph . This will bring up your default text editor and allow you to add and remove storage nodes from the cluster. This feature is only available when useAllNodes has been set to false . Storage Selection Settings \u00b6 Below are the settings for host-based cluster. This type of cluster can specify devices for OSDs, both at the cluster and individual node level, for selecting which storage resources will be included in the cluster. useAllDevices : true or false , indicating whether all devices found on nodes in the cluster should be automatically consumed by OSDs. Not recommended unless you have a very controlled environment where you will not risk formatting of devices with existing data. When true , all devices and partitions will be used. Is overridden by deviceFilter if specified. LVM logical volumes are not picked by useAllDevices . deviceFilter : A regular expression for short kernel names of devices (e.g. sda ) that allows selection of devices and partitions to be consumed by OSDs. LVM logical volumes are not picked by deviceFilter .If individual devices have been specified for a node then this filter will be ignored. This field uses golang regular expression syntax . For example: sdb : Only selects the sdb device if found ^sd. : Selects all devices starting with sd ^sd[a-d] : Selects devices starting with sda , sdb , sdc , and sdd if found ^s : Selects all devices that start with s ^[^r] : Selects all devices that do not start with r devicePathFilter : A regular expression for device paths (e.g. /dev/disk/by-path/pci-0:1:2:3-scsi-1 ) that allows selection of devices and partitions to be consumed by OSDs. LVM logical volumes are not picked by devicePathFilter .If individual devices or deviceFilter have been specified for a node then this filter will be ignored. This field uses golang regular expression syntax . For example: ^/dev/sd. : Selects all devices starting with sd ^/dev/disk/by-path/pci-.* : Selects all devices which are connected to PCI bus devices : A list of individual device names belonging to this node to include in the storage cluster. name : The name of the devices and partitions (e.g., sda ). The full udev path can also be specified for devices, partitions, and logical volumes (e.g. /dev/disk/by-id/ata-ST4000DM004-XXXX - this will not change after reboots). config : Device-specific config settings. See the config settings below Host-based cluster supports raw device, partition, and logical volume. Be sure to see the quickstart doc prerequisites for additional considerations. Below are the settings for a PVC-based cluster. storageClassDeviceSets : Explained in Storage Class Device Sets Storage Class Device Sets \u00b6 The following are the settings for Storage Class Device Sets which can be configured to create OSDs that are backed by block mode PVs. name : A name for the set. count : The number of devices in the set. resources : The CPU and RAM requests/limits for the devices. (Optional) placement : The placement criteria for the devices. (Optional) Default is no placement criteria. The syntax is the same as for other placement configuration . It supports nodeAffinity , podAffinity , podAntiAffinity and tolerations keys. It is recommended to configure the placement such that the OSDs will be as evenly spread across nodes as possible. At a minimum, anti-affinity should be added so at least one OSD will be placed on each available nodes. However, if there are more OSDs than nodes, this anti-affinity will not be effective. Another placement scheme to consider is to add labels to the nodes in such a way that the OSDs can be grouped on those nodes, create multiple storageClassDeviceSets, and add node affinity to each of the device sets that will place the OSDs in those sets of nodes. Rook will automatically add required nodeAffinity to the OSD daemons to match the topology labels that are found on the nodes where the OSD prepare jobs ran. To ensure data durability, the OSDs are required to run in the same topology that the Ceph CRUSH map expects. For example, if the nodes are labeled with rack topology labels, the OSDs will be constrained to a certain rack. Without the topology labels, Rook will not constrain the OSDs beyond what is required by the PVs, for example to run in the zone where provisioned. See the OSD Topology section for the related labels. preparePlacement : The placement criteria for the preparation of the OSD devices. Creating OSDs is a two-step process and the prepare job may require different placement than the OSD daemons. If the preparePlacement is not specified, the placement will instead be applied for consistent placement for the OSD prepare jobs and OSD deployments. The preparePlacement is only useful for portable OSDs in the device sets. OSDs that are not portable will be tied to the host where the OSD prepare job initially runs. For example, provisioning may require topology spread constraints across zones, but the OSD daemons may require constraints across hosts within the zones. portable : If true , the OSDs will be allowed to move between nodes during failover. This requires a storage class that supports portability (e.g. aws-ebs , but not the local storage provisioner). If false , the OSDs will be assigned to a node permanently. Rook will configure Ceph's CRUSH map to support the portability. tuneDeviceClass : For example, Ceph cannot detect AWS volumes as HDDs from the storage class \"gp2\", so you can improve Ceph performance by setting this to true. tuneFastDeviceClass : For example, Ceph cannot detect Azure disks as SSDs from the storage class \"managed-premium\", so you can improve Ceph performance by setting this to true.. volumeClaimTemplates : A list of PVC templates to use for provisioning the underlying storage devices. resources.requests.storage : The desired capacity for the underlying storage devices. storageClassName : The StorageClass to provision PVCs from. Default would be to use the cluster-default StorageClass. This StorageClass should provide a raw block device, multipath device, or logical volume. Other types are not supported. If you want to use logical volume, please see known issue of OSD on LV-backed PVC volumeMode : The volume mode to be set for the PVC. Which should be Block accessModes : The access mode for the PVC to be bound by OSD. schedulerName : Scheduler name for OSD pod placement. (Optional) encrypted : whether to encrypt all the OSDs in a given storageClassDeviceSet OSD Configuration Settings \u00b6 The following storage selection settings are specific to Ceph and do not apply to other backends. All variables are key-value pairs represented as strings. metadataDevice : Name of a device or lvm to use for the metadata of OSDs on each node. Performance can be improved by using a low latency device (such as SSD or NVMe) as the metadata device, while other spinning platter (HDD) devices on a node are used to store data. Provisioning will fail if the user specifies a metadataDevice but that device is not used as a metadata device by Ceph. Notably, ceph-volume will not use a device of the same device class (HDD, SSD, NVMe) as OSD devices for metadata, resulting in this failure. databaseSizeMB : The size in MB of a bluestore database. Include quotes around the size. walSizeMB : The size in MB of a bluestore write ahead log (WAL). Include quotes around the size. deviceClass : The CRUSH device class to use for this selection of storage devices. (By default, if a device's class has not already been set, OSDs will automatically set a device's class to either hdd , ssd , or nvme based on the hardware properties exposed by the Linux kernel.) These storage classes can then be used to select the devices backing a storage pool by specifying them as the value of the pool spec's deviceClass field . initialWeight : The initial OSD weight in TiB units. By default, this value is derived from OSD's capacity. primaryAffinity : The primary-affinity value of an OSD, within range [0, 1] (default: 1 ). osdsPerDevice **: The number of OSDs to create on each device. High performance devices such as NVMe can handle running multiple OSDs. If desired, this can be overridden for each node and each device. encryptedDevice **: Encrypt OSD volumes using dmcrypt (\"true\" or \"false\"). By default this option is disabled. See encryption for more information on encryption in Ceph. crushRoot : The value of the root CRUSH map label. The default is default . Generally, you should not need to change this. However, if any of your topology labels may have the value default , you need to change crushRoot to avoid conflicts, since CRUSH map values need to be unique. Annotations and Labels \u00b6 Annotations and Labels can be specified so that the Rook components will have those annotations / labels added to them. You can set annotations / labels for Rook components for the list of key value pairs: all : Set annotations / labels for all components except clusterMetadata . mgr : Set annotations / labels for MGRs mon : Set annotations / labels for mons osd : Set annotations / labels for OSDs prepareosd : Set annotations / labels for OSD Prepare Jobs monitoring : Set annotations / labels for service monitor crashcollector : Set annotations / labels for crash collectors clusterMetadata : Set annotations only to rook-ceph-mon-endpoints configmap and the rook-ceph-mon and rook-ceph-admin-keyring secrets. These annotations will not be merged with the all annotations. The common usage is for backing up these critical resources with kubed . Note the clusterMetadata annotation will not be merged with the all annotation. When other keys are set, all will be merged together with the specific component. Placement Configuration Settings \u00b6 Placement configuration for the cluster services. It includes the following keys: mgr , mon , arbiter , osd , prepareosd , cleanup , and all . Each service will have its placement configuration generated by merging the generic configuration under all with the most specific one (which will override any attributes). In stretch clusters, if the arbiter placement is specified, that placement will only be applied to the arbiter. Neither will the arbiter placement be merged with the all placement to allow the arbiter to be fully independent of other daemon placement. The remaining mons will still use the mon and/or all sections. Note Placement of OSD pods is controlled using the Storage Class Device Set , not the general placement configuration. A Placement configuration is specified (according to the kubernetes PodSpec) as: nodeAffinity : kubernetes NodeAffinity podAffinity : kubernetes PodAffinity podAntiAffinity : kubernetes PodAntiAffinity tolerations : list of kubernetes Toleration topologySpreadConstraints : kubernetes TopologySpreadConstraints If you use labelSelector for osd pods, you must write two rules both for rook-ceph-osd and rook-ceph-osd-prepare like the example configuration . It comes from the design that there are these two pods for an OSD. For more detail, see the osd design doc and the related issue . The Rook Ceph operator creates a Job called rook-ceph-detect-version to detect the full Ceph version used by the given cephVersion.image . The placement from the mon section is used for the Job except for the PodAntiAffinity field. Placement Example \u00b6 To control where various services will be scheduled by kubernetes, use the placement configuration sections below. The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node . Specific node affinity and tolerations that only apply to the mon daemons in this example require the label role=storage-mon-node` and also tolerate the control plane taint. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false # enable the ceph dashboard for viewing cluster status dashboard : enabled : true placement : all : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - storage-node mon : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - storage-mon-node tolerations : - effect : NoSchedule key : node-role.kubernetes.io/control-plane operator : Exists Cluster-wide Resources Configuration Settings \u00b6 Resources should be specified so that the Rook components are handled after Kubernetes Pod Quality of Service classes . This allows to keep Rook components running when for example a node runs out of memory and the Rook components are not killed depending on their Quality of Service class. You can set resource requests/limits for Rook components through the Resource Requirements/Limits structure in the following keys: mon : Set resource requests/limits for mons osd : Set resource requests/limits for OSDs. This key applies for all OSDs regardless of their device classes. In case of need to apply resource requests/limits for OSDs with particular device class use specific osd keys below. If the memory resource is declared Rook will automatically set the OSD configuration osd_memory_target to the same value. This aims to ensure that the actual OSD memory consumption is consistent with the OSD pods' resource declaration. osd- : Set resource requests/limits for OSDs on a specific device class. Rook will automatically detect hdd , ssd , or nvme device classes. Custom device classes can also be set. mgr : Set resource requests/limits for MGRs mgr-sidecar : Set resource requests/limits for the MGR sidecar, which is only created when mgr.count: 2 . The sidecar requires very few resources since it only executes every 15 seconds to query Ceph for the active mgr and update the mgr services if the active mgr changed. prepareosd : Set resource requests/limits for OSD prepare job crashcollector : Set resource requests/limits for crash. This pod runs wherever there is a Ceph pod running. It scrapes for Ceph daemon core dumps and sends them to the Ceph manager crash module so that core dumps are centralized and can be easily listed/accessed. You can read more about the Ceph Crash module . logcollector : Set resource requests/limits for the log collector. When enabled, this container runs as side-car to each Ceph daemons. cleanup : Set resource requests/limits for cleanup job, responsible for wiping cluster's data after uninstall exporter : Set resource requests/limits for Ceph exporter. In order to provide the best possible experience running Ceph in containers, Rook internally recommends minimum memory limits if resource limits are passed. If a user configures a limit or request value that is too low, Rook will still run the pod(s) and print a warning to the operator log. mon : 1024MB mgr : 512MB osd : 2048MB crashcollector : 60MB mgr-sidecar : 100MB limit, 40MB requests prepareosd : no limits (see the note) exporter : 128MB limit, 50MB requests Note We recommend not setting memory limits on the OSD prepare job to prevent OSD provisioning failure due to memory constraints. The OSD prepare job bursts memory usage during the OSD provisioning depending on the size of the device, typically 1-2Gi for large disks. The OSD prepare job only bursts a single time per OSD. All future runs of the OSD prepare job will detect the OSD is already provisioned and skip the provisioning. Hint The resources for MDS daemons are not configured in the Cluster. Refer to the Ceph Filesystem CRD instead. Resource Requirements/Limits \u00b6 For more information on resource requests/limits see the official Kubernetes documentation: Kubernetes - Managing Compute Resources for Containers requests : Requests for cpu or memory. cpu : Request for CPU (example: one CPU core 1 , 50% of one CPU core 500m ). memory : Limit for Memory (example: one gigabyte of memory 1Gi , half a gigabyte of memory 512Mi ). limits : Limits for cpu or memory. cpu : Limit for CPU (example: one CPU core 1 , 50% of one CPU core 500m ). memory : Limit for Memory (example: one gigabyte of memory 1Gi , half a gigabyte of memory 512Mi ). Warning Before setting resource requests/limits, please take a look at the Ceph documentation for recommendations for each component: Ceph - Hardware Recommendations . Node Specific Resources for OSDs \u00b6 This example shows that you can override these requests/limits for OSDs per node when using useAllNodes: false in the node item in the nodes list. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false storage : useAllNodes : false nodes : - name : \"172.17.4.201\" resources : limits : cpu : \"2\" memory : \"4096Mi\" requests : cpu : \"2\" memory : \"4096Mi\" Priority Class Names \u00b6 Priority class names can be specified so that the Rook components will have those priority class names added to them. You can set priority class names for Rook components for the list of key value pairs: all : Set priority class names for MGRs, Mons, OSDs, and crashcollectors. mgr : Set priority class names for MGRs. Examples default to system-cluster-critical. mon : Set priority class names for Mons. Examples default to system-node-critical. osd : Set priority class names for OSDs. Examples default to system-node-critical. crashcollector : Set priority class names for crashcollectors. The specific component keys will act as overrides to all . Health settings \u00b6 The Rook Ceph operator will monitor the state of the CephCluster on various components by default. The following CRD settings are available: healthCheck : main ceph cluster health monitoring section Currently three health checks are implemented: mon : health check on the ceph monitors, basically check whether monitors are members of the quorum. If after a certain timeout a given monitor has not joined the quorum back it will be failed over and replace by a new monitor. osd : health check on the ceph osds status : ceph health status check, periodically check the Ceph health state and reflects it in the CephCluster CR status field. The liveness probe and startup probe of each daemon can also be controlled via livenessProbe and startupProbe respectively. The settings are valid for mon , mgr and osd . Here is a complete example for both daemonHealth , livenessProbe , and startupProbe : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 healthCheck : daemonHealth : mon : disabled : false interval : 45s timeout : 600s osd : disabled : false interval : 60s status : disabled : false livenessProbe : mon : disabled : false mgr : disabled : false osd : disabled : false startupProbe : mon : disabled : false mgr : disabled : false osd : disabled : false The probe's timing values and thresholds (but not the probe itself) can also be overridden. For more info, refer to the Kubernetes documentation . For example, you could change the mgr probe by applying: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 healthCheck : startupProbe : mgr : disabled : false probe : initialDelaySeconds : 3 periodSeconds : 3 failureThreshold : 30 livenessProbe : mgr : disabled : false probe : initialDelaySeconds : 3 periodSeconds : 3 Changing the liveness probe is an advanced operation and should rarely be necessary. If you want to change these settings then modify the desired settings. Status \u00b6 The operator is regularly configuring and checking the health of the cluster. The results of the configuration and health checks can be seen in the status section of the CephCluster CR. 1 kubectl -n rook-ceph get CephCluster -o yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 [ ... ] status : ceph : health : HEALTH_OK lastChecked : \"2021-03-02T21:22:11Z\" capacity : bytesAvailable : 22530293760 bytesTotal : 25757220864 bytesUsed : 3226927104 lastUpdated : \"2021-03-02T21:22:11Z\" message : Cluster created successfully phase : Ready state : Created storage : deviceClasses : - name : hdd version : image : quay.io/ceph/ceph:v17.2.6 version : 16.2.6-0 conditions : - lastHeartbeatTime : \"2021-03-02T21:22:11Z\" lastTransitionTime : \"2021-03-02T21:21:09Z\" message : Cluster created successfully reason : ClusterCreated status : \"True\" type : Ready Ceph Status \u00b6 Ceph is constantly monitoring the health of the data plane and reporting back if there are any warnings or errors. If everything is healthy from Ceph's perspective, you will see HEALTH_OK . If Ceph reports any warnings or errors, the details will be printed to the status. If further troubleshooting is needed to resolve these issues, the toolbox will likely be needed where you can run ceph commands to find more details. The capacity of the cluster is reported, including bytes available, total, and used. The available space will be less that you may expect due to overhead in the OSDs. Conditions \u00b6 The conditions represent the status of the Rook operator. If the cluster is fully configured and the operator is stable, the Ready condition is raised with ClusterCreated reason and no other conditions. The cluster will remain in the Ready condition after the first successful configuration since it is expected the storage is consumable from this point on. If there are issues preventing the storage layer from working, they are expected to show as Ceph health errors. If the cluster is externally connected successfully, the Ready condition will have the reason ClusterConnected . If the operator is currently being configured or the operator is checking for update, there will be a Progressing condition. If there was a failure, the condition(s) status will be false and the message will give a summary of the error. See the operator log for more details. Other Status \u00b6 There are several other properties for the overall status including: message , phase , and state : A summary of the overall current state of the cluster, which is somewhat duplicated from the conditions for backward compatibility. storage.deviceClasses : The names of the types of storage devices that Ceph discovered in the cluster. These types will be ssd or hdd unless they have been overridden with the crushDeviceClass in the storageClassDeviceSets . version : The version of the Ceph image currently deployed. OSD Topology \u00b6 The topology of the cluster is important in production environments where you want your data spread across failure domains. The topology can be controlled by adding labels to the nodes. When the labels are found on a node at first OSD deployment, Rook will add them to the desired level in the CRUSH map . The complete list of labels in hierarchy order from highest to lowest is: 1 2 3 4 5 6 7 8 9 topology.kubernetes.io/region topology.kubernetes.io/zone topology.rook.io/datacenter topology.rook.io/room topology.rook.io/pod topology.rook.io/pdu topology.rook.io/row topology.rook.io/rack topology.rook.io/chassis For example, if the following labels were added to a node: 1 2 kubectl label node mynode topology.kubernetes.io/zone=zone1 kubectl label node mynode topology.rook.io/rack=zone1-rack1 These labels would result in the following hierarchy for OSDs on that node (this command can be run in the Rook toolbox): 1 2 3 4 5 6 7 8 $ ceph osd tree ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF -1 0.01358 root default -5 0.01358 zone zone1 -4 0.01358 rack rack1 -3 0.01358 host mynode 0 hdd 0.00679 osd.0 up 1.00000 1.00000 1 hdd 0.00679 osd.1 up 1.00000 1.00000 Ceph requires unique names at every level in the hierarchy (CRUSH map). For example, you cannot have two racks with the same name that are in different zones. Racks in different zones must be named uniquely. Note that the host is added automatically to the hierarchy by Rook. The host cannot be specified with a topology label. All topology labels are optional. Hint When setting the node labels prior to CephCluster creation, these settings take immediate effect. However, applying this to an already deployed CephCluster requires removing each node from the cluster first and then re-adding it with new configuration to take effect. Do this node by node to keep your data safe! Check the result with ceph osd tree from the Rook Toolbox . The OSD tree should display the hierarchy for the nodes that already have been re-added. To utilize the failureDomain based on the node labels, specify the corresponding option in the CephBlockPool 1 2 3 4 5 6 7 8 9 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : rack # this matches the topology labels on nodes replicated : size : 3 This configuration will split the replication of volumes across unique racks in the data center setup. Deleting a CephCluster \u00b6 During deletion of a CephCluster resource, Rook protects against accidental or premature destruction of user data by blocking deletion if there are any other Rook Ceph Custom Resources that reference the CephCluster being deleted. Rook will warn about which other resources are blocking deletion in three ways until all blocking resources are deleted: An event will be registered on the CephCluster resource A status condition will be added to the CephCluster resource An error will be added to the Rook Ceph operator log Cleanup policy \u00b6 Rook has the ability to cleanup resources and data that were deployed when a CephCluster is removed. The policy settings indicate which data should be forcibly deleted and in what way the data should be wiped. The cleanupPolicy has several fields: confirmation : Only an empty string and yes-really-destroy-data are valid values for this field. If this setting is empty, the cleanupPolicy settings will be ignored and Rook will not cleanup any resources during cluster removal. To reinstall the cluster, the admin would then be required to follow the cleanup guide to delete the data on hosts. If this setting is yes-really-destroy-data , the operator will automatically delete the data on hosts. Because this cleanup policy is destructive, after the confirmation is set to yes-really-destroy-data Rook will stop configuring the cluster as if the cluster is about to be destroyed. sanitizeDisks : sanitizeDisks represents advanced settings that can be used to delete data on drives. method : indicates if the entire disk should be sanitized or simply ceph's metadata. Possible choices are quick (default) or complete dataSource : indicate where to get random bytes from to write on the disk. Possible choices are zero (default) or random . Using random sources will consume entropy from the system and will take much more time then the zero source iteration : overwrite N times instead of the default (1). Takes an integer value allowUninstallWithVolumes : If set to true, then the cephCluster deletion doesn't wait for the PVCs to be deleted. Default is false . To automate activation of the cleanup, you can use the following command. WARNING: DATA WILL BE PERMANENTLY DELETED : 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Nothing will happen until the deletion of the CR is requested, so this can still be reverted. However, all new configuration by the operator will be blocked with this cleanup policy enabled. Rook waits for the deletion of PVs provisioned using the cephCluster before proceeding to delete the cephCluster. To force deletion of the cephCluster without waiting for the PVs to be deleted, you can set the allowUninstallWithVolumes to true under spec.CleanupPolicy .","title":"CephCluster CRD"},{"location":"CRDs/Cluster/ceph-cluster-crd/#settings","text":"Settings can be specified at the global level to apply to the cluster as a whole, while other settings can be specified at more fine-grained levels. If any setting is unspecified, a suitable default will be used automatically.","title":"Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#cluster-metadata","text":"name : The name that will be used internally for the Ceph cluster. Most commonly the name is the same as the namespace since multiple clusters are not supported in the same namespace. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace. The common scenario is to create a single Rook cluster. If multiple clusters are created, they must not have conflicting devices or host paths.","title":"Cluster metadata"},{"location":"CRDs/Cluster/ceph-cluster-crd/#cluster-settings","text":"external : enable : if true , the cluster will not be managed by Rook but via an external entity. This mode is intended to connect to an existing cluster. In this case, Rook will only consume the external cluster. However, Rook will be able to deploy various daemons in Kubernetes such as object gateways, mds and nfs if an image is provided and will refuse otherwise. If this setting is enabled all the other options will be ignored except cephVersion.image and dataDirHostPath . See external cluster configuration . If cephVersion.image is left blank, Rook will refuse the creation of extra CRs like object, file and nfs. cephVersion : The version information for launching the ceph daemons. image : The image used for running the ceph daemons. For example, quay.io/ceph/ceph:v16.2.11 or v17.2.6 . For more details read the container images section . For the latest ceph images, see the Ceph DockerHub . To ensure a consistent version of the image is running across all nodes in the cluster, it is recommended to use a very specific image version. Tags also exist that would give the latest version, but they are only recommended for test environments. For example, the tag v17 will be updated each time a new Quincy build is released. Using the v17 tag is not recommended in production because it may lead to inconsistent versions of the image running across different nodes in the cluster. allowUnsupported : If true , allow an unsupported major version of the Ceph release. Currently pacific and quincy are supported. Future versions such as reef (v18) would require this to be set to true . Should be set to false in production. imagePullPolicy : The image pull policy for the ceph daemon pods. Possible values are Always , IfNotPresent , and Never . The default is IfNotPresent . dataDirHostPath : The path on the host ( hostPath ) where config and data should be stored for each of the services. If the directory does not exist, it will be created. Because this directory persists on the host, it will remain after pods are deleted. Following paths and any of their subpaths must not be used : /etc/ceph , /rook or /var/log/ceph . WARNING : For test scenarios, if you delete a cluster and start a new cluster on the same hosts, the path used by dataDirHostPath must be deleted. Otherwise, stale keys and other config will remain from the previous cluster and the new mons will fail to start. If this value is empty, each pod will get an ephemeral directory to store their config files that is tied to the lifetime of the pod running on that node. More details can be found in the Kubernetes empty dir docs . skipUpgradeChecks : if set to true Rook won't perform any upgrade checks on Ceph daemons during an upgrade. Use this at YOUR OWN RISK , only if you know what you're doing. To understand Rook's upgrade process of Ceph, read the upgrade doc . continueUpgradeAfterChecksEvenIfNotHealthy : if set to true Rook will continue the OSD daemon upgrade process even if the PGs are not clean, or continue with the MDS upgrade even the file system is not healthy. dashboard : Settings for the Ceph dashboard. To view the dashboard in your browser see the dashboard guide . enabled : Whether to enable the dashboard to view cluster status urlPrefix : Allows to serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy) port : Allows to change the default port where the dashboard is served ssl : Whether to serve the dashboard via SSL, ignored on Ceph versions older than 13.2.2 monitoring : Settings for monitoring Ceph using Prometheus. To enable monitoring on your cluster see the monitoring guide . enabled : Whether to enable the prometheus service monitor for an internal cluster. For an external cluster, whether to create an endpoint port for the metrics. Default is false. metricsDisabled : Whether to disable the metrics reported by Ceph. If false, the prometheus mgr module and Ceph exporter are enabled. If true, the prometheus mgr module and Ceph exporter are both disabled. Default is false. externalMgrEndpoints : external cluster manager endpoints externalMgrPrometheusPort : external prometheus manager module port. See external cluster configuration for more details. port : The internal prometheus manager module port where the prometheus mgr module listens. The port may need to be configured when host networking is enabled. interval : The interval for the prometheus module to to scrape targets. network : For the network settings for the cluster, refer to the network configuration settings mon : contains mon related options mon settings For more details on the mons and when to choose a number other than 3 , see the mon health doc . mgr : manager top level section count : set number of ceph managers between 1 to 2 . The default value is 2. If there are two managers, it is important for all mgr services point to the active mgr and not the standby mgr. Rook automatically updates the label mgr_role on the mgr pods to be either active or standby . Therefore, services need just to add the label mgr_role=active to their selector to point to the active mgr. This applies to all services that rely on the ceph mgr such as the dashboard or the prometheus metrics collector. modules : is the list of Ceph manager modules to enable crashCollector : The settings for crash collector daemon(s). disable : is set to true , the crash collector will not run on any node where a Ceph daemon runs daysToRetain : specifies the number of days to keep crash entries in the Ceph cluster. By default the entries are kept indefinitely. logCollector : The settings for log collector daemon. enabled : if set to true , the log collector will run as a side-car next to each Ceph daemon. The Ceph configuration option log_to_file will be turned on, meaning Ceph daemons will log on files in addition to still logging to container's stdout. These logs will be rotated. In case a daemon terminates with a segfault, the coredump files will be commonly be generated in /var/lib/systemd/coredump directory on the host, depending on the underlying OS location. (default: true ) periodicity : how often to rotate daemon's log. (default: 24h). Specified with a time suffix which may be h for hours or d for days. Rotating too often will slightly impact the daemon's performance since the signal briefly interrupts the program. annotations : annotations configuration settings labels : labels configuration settings placement : placement configuration settings resources : resources configuration settings priorityClassNames : priority class names configuration settings storage : Storage selection and configuration that will be used across the cluster. Note that these settings can be overridden for specific nodes. useAllNodes : true or false , indicating if all nodes in the cluster should be used for storage according to the cluster level storage selection and configuration values. If individual nodes are specified under the nodes field, then useAllNodes must be set to false . nodes : Names of individual nodes in the cluster that should have their storage included in accordance with either the cluster level configuration specified above or any node specific overrides described in the next section below. useAllNodes must be set to false to use specific nodes and their config. See node settings below. config : Config settings applied to all OSDs on the node unless overridden by devices . See the config settings below. storage selection settings Storage Class Device Sets onlyApplyOSDPlacement : Whether the placement specific for OSDs is merged with the all placement. If false , the OSD placement will be merged with the all placement. If true, the OSD placement will be applied and the all placement will be ignored. The placement for OSDs is computed from several different places depending on the type of OSD: For non-PVCs: placement.all and placement.osd For PVCs: placement.all and inside the storageClassDeviceSets from the placement or preparePlacement disruptionManagement : The section for configuring management of daemon disruptions managePodBudgets : if true , the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically via the strategy outlined in the design . The operator will block eviction of OSDs by default and unblock them safely when drains are detected. osdMaintenanceTimeout : is a duration in minutes that determines how long an entire failureDomain like region/zone/host will be held in noout (in addition to the default DOWN/OUT interval) when it is draining. This is only relevant when managePodBudgets is true . The default value is 30 minutes. removeOSDsIfOutAndSafeToRemove : If true the operator will remove the OSDs that are down and whose data has been restored to other OSDs. In Ceph terms, the OSDs are out and safe-to-destroy when they are removed. cleanupPolicy : cleanup policy settings security : security page for key management configuration","title":"Cluster Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#ceph-container-images","text":"Official releases of Ceph Container images are available from Docker Hub . These are general purpose Ceph container with all necessary daemons and dependencies installed. TAG MEANING vRELNUM Latest release in this series (e.g., v17 = Quincy) vRELNUM.Y Latest stable release in this stable series (e.g., v17.2) vRELNUM.Y.Z A specific release (e.g., v17.2.6) vRELNUM.Y.Z-YYYYMMDD A specific build (e.g., v17.2.6-20230410) A specific will contain a specific release of Ceph as well as security fixes from the Operating System.","title":"Ceph container images"},{"location":"CRDs/Cluster/ceph-cluster-crd/#mon-settings","text":"count : Set the number of mons to be started. The number must be between 1 and 9 . The recommended value is most commonly 3 . For highest availability, an odd number of mons should be specified. For higher durability in case of mon loss, an even number can be specified although availability may be lower. To maintain quorum a majority of mons must be up. For example, if there are three mons, two must be up. If there are four mons, three must be up. If there are two mons, both must be up. If quorum is lost, see the disaster recovery guide to restore quorum from a single mon. allowMultiplePerNode : Whether to allow the placement of multiple mons on a single node. Default is false for production. Should only be set to true in test environments. volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . failureDomainLabel : The label that is expected on each node where the mons are expected to be deployed. The labels must be found in the list of well-known topology labels . zones : The failure domain names where the Mons are expected to be deployed. There must be at least three zones specified in the list. Each zone can be backed by a different storage class by specifying the volumeClaimTemplate . name : The name of the zone, which is the value of the domain label. volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . stretchCluster : The stretch cluster settings that define the zones (or other failure domain labels) across which to configure the cluster. failureDomainLabel : The label that is expected on each node where the cluster is expected to be deployed. The labels must be found in the list of well-known topology labels . subFailureDomain : With a zone, the data replicas must be spread across OSDs in the subFailureDomain. The default is host . zones : The failure domain names where the Mons and OSDs are expected to be deployed. There must be three zones specified in the list. This element is always named zone even if a non-default failureDomainLabel is specified. The elements have two values: name : The name of the zone, which is the value of the domain label. arbiter : Whether the zone is expected to be the arbiter zone which only runs a single mon. Exactly one zone must be labeled true . volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . The two zones that are not the arbiter zone are expected to have OSDs deployed. If these settings are changed in the CRD the operator will update the number of mons during a periodic check of the mon health, which by default is every 45 seconds. To change the defaults that the operator uses to determine the mon health and whether to failover a mon, refer to the health settings . The intervals should be small enough that you have confidence the mons will maintain quorum, while also being long enough to ignore network blips where mons are failed over too often.","title":"Mon Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#mgr-settings","text":"You can use the cluster CR to enable or disable any manager module. This can be configured like so: 1 2 3 4 mgr : modules : - name :  enabled : true Some modules will have special configuration to ensure the module is fully functional after being enabled. Specifically: pg_autoscaler : Rook will configure all new pools with PG autoscaling by setting: osd_pool_default_pg_autoscale_mode = on","title":"Mgr Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#network-configuration-settings","text":"If not specified, the default SDN will be used. Configure the network that will be enabled for the cluster and services. provider : Specifies the network provider that will be used to connect the network interface. You can choose between host , and multus . selectors : List the network selector(s) that will be used associated by a key. ipFamily : Specifies the network stack Ceph daemons should listen on. dualStack : Specifies that Ceph daemon should listen on both IPv4 and IPv6 network stacks. connections : Settings for network connections using Ceph's msgr2 protocol requireMsgr2 : Whether to require communication over msgr2. If true, the msgr v1 port (6789) will be disabled and clients will be required to connect to the Ceph cluster with the v2 port (3300). Requires a kernel that supports msgr2 (kernel 5.11 or CentOS 8.4 or newer). Default is false. encryption : Settings for encryption on the wire to Ceph daemons enabled : Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network. The default is false. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted. When encryption is not enabled, clients still establish a strong initial authentication and data integrity is still validated with a crc check. IMPORTANT : Encryption requires the 5.11 kernel for the latest nbd and cephfs drivers. Alternatively for testing only, set \"mounter: rbd-nbd\" in the rbd storage class, or \"mounter: fuse\" in the cephfs storage class. The nbd and fuse drivers are not recommended in production since restarting the csi driver pod will disconnect the volumes. If this setting is enabled, CephFS volumes also require setting CSI_CEPHFS_KERNEL_MOUNT_OPTIONS to \"ms_mode=secure\" in operator.yaml. compression : enabled : Whether to compress the data in transit across the wire. The default is false. Requires Ceph Quincy (v17) or newer. Also see the kernel requirements above for encryption. Caution Changing networking configuration after a Ceph cluster has been deployed is NOT supported and will result in a non-functioning cluster.","title":"Network Configuration Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#host-networking","text":"To use host networking, set provider: host . If the host networking setting is changed in a cluster where mons are already running, the existing mons will remain running with the same network settings with which they were created. To complete the conversion to or from host networking after you update this setting, you will need to failover the mons in order to have mons on the desired network configuration.","title":"Host Networking"},{"location":"CRDs/Cluster/ceph-cluster-crd/#multus","text":"Rook supports addition of public and cluster network for ceph using Multus The selector keys are required to be public and cluster where each represent: public : client communications with the cluster (reads/writes) cluster : internal Ceph replication network If you want to learn more, please read: Ceph Networking reference . Multus documentation Based on the configuration, the operator will do the following: If only the public selector is specified, all communication will happen on that network 1 2 3 4 network : provider : multus selectors : public : rook-ceph/rook-public-nw If only the cluster selector is specified, the internal cluster traffic* will happen on that network. All other traffic to mons, OSDs, and other daemons will be on the default network. 1 2 3 4 network : provider : multus selectors : cluster : rook-ceph/rook-cluster-nw If both public and cluster selectors are specified the first one will run all the communication network and the second the internal cluster network* 1 2 3 4 5 network : provider : multus selectors : public : rook-ceph/rook-public-nw cluster : rook-ceph/rook-cluster-nw * Internal cluster traffic includes OSD heartbeats, data replication, and data recovery Only OSD pods will have both Public and Cluster networks attached. The rest of the Ceph component pods and CSI pods will only have the Public network attached. Rook Ceph operator will not have any networks attached as it proxies the required commands via a sidecar container in the mgr pod. In order to work, each selector value must match a NetworkAttachmentDefinition object name in Multus. For multus network provider, an already working cluster with Multus networking is required. Network attachment definition that later will be attached to the cluster needs to be created before the Cluster CRD. The Network attachment definitions should be using whereabouts cni. If Rook cannot find the provided Network attachment definition it will fail running the Ceph OSD pods. You can add the Multus network attachment selection annotation selecting the created network attachment definition on selectors . A valid NetworkAttachmentDefinition will look like following: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 apiVersion : \"k8s.cni.cncf.io/v1\" kind : NetworkAttachmentDefinition metadata : name : rook-public-nw spec : config : '{ \"cniVersion\": \"0.3.0\", \"name\": \"public-nad\", \"type\": \"macvlan\", \"master\": \"ens5\", \"mode\": \"bridge\", \"ipam\": { \"type\": \"whereabouts\", \"range\": \"192.168.1.0/24\" } }' Ensure that master matches the network interface of the host that you want to use. IPAM type whereabouts is required because it makes sure that all the pods get a unique IP address from the multus network. The NetworkAttachmentDefinition should be referenced along with the namespace in which it is present like public: / . e.g., the network attachment definition are in default namespace: 1 2 public : default/rook-public-nw cluster : default/rook-cluster-nw 1 2 * This format is required in order to use the NetworkAttachmentDefinition across namespaces. * In Openshift, to use a NetworkAttachmentDefinition (NAD) across namespaces, the NAD must be deployed in the `default` namespace. The NAD is then referenced with the namespace: `default/rook-public-nw`","title":"Multus"},{"location":"CRDs/Cluster/ceph-cluster-crd/#validating-multus-configuration","text":"We highly recommend validating your Multus configuration before you install Rook. A tool exists to facilitate validating the Multus configuration. After installing the Rook operator and before installing any Custom Resources, run the tool from the operator pod. The tool's CLI is designed to be as helpful as possible. Get help text for the multus validation tool like so: 1 kubectl --namespace rook-ceph exec -it deploy/rook-ceph-operator -- rook multus validation run --help Then, update the args in the multus-validation job template. Minimally, add the NAD names(s) for public and/or cluster as needed and and then, create the job to validate the Multus configuration. If the tool fails, it will suggest what things may be preventing Multus networks from working properly, and it will request the logs and outputs that will help debug issues. Check the logs of the pod created by the job to know the status of the validation test.","title":"Validating Multus configuration"},{"location":"CRDs/Cluster/ceph-cluster-crd/#known-limitations-with-multus","text":"Daemons leveraging Kubernetes service IPs (Monitors, Managers, Rados Gateways) are not listening on the NAD specified in the selectors . Instead the daemon listens on the default network, however the NAD is attached to the container, allowing the daemon to communicate with the rest of the cluster. There is work in progress to fix this issue in the multus-service repository. At the time of writing it's unclear when this will be supported.","title":"Known limitations with Multus"},{"location":"CRDs/Cluster/ceph-cluster-crd/#ipfamily","text":"Provide single-stack IPv4 or IPv6 protocol to assign corresponding addresses to pods and services. This field is optional. Possible inputs are IPv6 and IPv4. Empty value will be treated as IPv4. Kubernetes version should be at least v1.13 to run IPv6. Dual-stack is supported as of ceph Pacific. To turn on dual stack see the network configuration section .","title":"IPFamily"},{"location":"CRDs/Cluster/ceph-cluster-crd/#node-settings","text":"In addition to the cluster level settings specified above, each individual node can also specify configuration to override the cluster level settings and defaults. If a node does not specify any configuration then it will inherit the cluster level settings. name : The name of the node, which should match its kubernetes.io/hostname label. config : Config settings applied to all OSDs on the node unless overridden by devices . See the config settings below. storage selection settings When useAllNodes is set to true , Rook attempts to make Ceph cluster management as hands-off as possible while still maintaining reasonable data safety. If a usable node comes online, Rook will begin to use it automatically. To maintain a balance between hands-off usability and data safety, Nodes are removed from Ceph as OSD hosts only (1) if the node is deleted from Kubernetes itself or (2) if the node has its taints or affinities modified in such a way that the node is no longer usable by Rook. Any changes to taints or affinities, intentional or unintentional, may affect the data reliability of the Ceph cluster. In order to help protect against this somewhat, deletion of nodes by taint or affinity modifications must be \"confirmed\" by deleting the Rook Ceph operator pod and allowing the operator deployment to restart the pod. For production clusters, we recommend that useAllNodes is set to false to prevent the Ceph cluster from suffering reduced data reliability unintentionally due to a user mistake. When useAllNodes is set to false , Rook relies on the user to be explicit about when nodes are added to or removed from the Ceph cluster. Nodes are only added to the Ceph cluster if the node is added to the Ceph cluster resource. Similarly, nodes are only removed if the node is removed from the Ceph cluster resource.","title":"Node Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#node-updates","text":"Nodes can be added and removed over time by updating the Cluster CRD, for example with kubectl -n rook-ceph edit cephcluster rook-ceph . This will bring up your default text editor and allow you to add and remove storage nodes from the cluster. This feature is only available when useAllNodes has been set to false .","title":"Node Updates"},{"location":"CRDs/Cluster/ceph-cluster-crd/#storage-selection-settings","text":"Below are the settings for host-based cluster. This type of cluster can specify devices for OSDs, both at the cluster and individual node level, for selecting which storage resources will be included in the cluster. useAllDevices : true or false , indicating whether all devices found on nodes in the cluster should be automatically consumed by OSDs. Not recommended unless you have a very controlled environment where you will not risk formatting of devices with existing data. When true , all devices and partitions will be used. Is overridden by deviceFilter if specified. LVM logical volumes are not picked by useAllDevices . deviceFilter : A regular expression for short kernel names of devices (e.g. sda ) that allows selection of devices and partitions to be consumed by OSDs. LVM logical volumes are not picked by deviceFilter .If individual devices have been specified for a node then this filter will be ignored. This field uses golang regular expression syntax . For example: sdb : Only selects the sdb device if found ^sd. : Selects all devices starting with sd ^sd[a-d] : Selects devices starting with sda , sdb , sdc , and sdd if found ^s : Selects all devices that start with s ^[^r] : Selects all devices that do not start with r devicePathFilter : A regular expression for device paths (e.g. /dev/disk/by-path/pci-0:1:2:3-scsi-1 ) that allows selection of devices and partitions to be consumed by OSDs. LVM logical volumes are not picked by devicePathFilter .If individual devices or deviceFilter have been specified for a node then this filter will be ignored. This field uses golang regular expression syntax . For example: ^/dev/sd. : Selects all devices starting with sd ^/dev/disk/by-path/pci-.* : Selects all devices which are connected to PCI bus devices : A list of individual device names belonging to this node to include in the storage cluster. name : The name of the devices and partitions (e.g., sda ). The full udev path can also be specified for devices, partitions, and logical volumes (e.g. /dev/disk/by-id/ata-ST4000DM004-XXXX - this will not change after reboots). config : Device-specific config settings. See the config settings below Host-based cluster supports raw device, partition, and logical volume. Be sure to see the quickstart doc prerequisites for additional considerations. Below are the settings for a PVC-based cluster. storageClassDeviceSets : Explained in Storage Class Device Sets","title":"Storage Selection Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#storage-class-device-sets","text":"The following are the settings for Storage Class Device Sets which can be configured to create OSDs that are backed by block mode PVs. name : A name for the set. count : The number of devices in the set. resources : The CPU and RAM requests/limits for the devices. (Optional) placement : The placement criteria for the devices. (Optional) Default is no placement criteria. The syntax is the same as for other placement configuration . It supports nodeAffinity , podAffinity , podAntiAffinity and tolerations keys. It is recommended to configure the placement such that the OSDs will be as evenly spread across nodes as possible. At a minimum, anti-affinity should be added so at least one OSD will be placed on each available nodes. However, if there are more OSDs than nodes, this anti-affinity will not be effective. Another placement scheme to consider is to add labels to the nodes in such a way that the OSDs can be grouped on those nodes, create multiple storageClassDeviceSets, and add node affinity to each of the device sets that will place the OSDs in those sets of nodes. Rook will automatically add required nodeAffinity to the OSD daemons to match the topology labels that are found on the nodes where the OSD prepare jobs ran. To ensure data durability, the OSDs are required to run in the same topology that the Ceph CRUSH map expects. For example, if the nodes are labeled with rack topology labels, the OSDs will be constrained to a certain rack. Without the topology labels, Rook will not constrain the OSDs beyond what is required by the PVs, for example to run in the zone where provisioned. See the OSD Topology section for the related labels. preparePlacement : The placement criteria for the preparation of the OSD devices. Creating OSDs is a two-step process and the prepare job may require different placement than the OSD daemons. If the preparePlacement is not specified, the placement will instead be applied for consistent placement for the OSD prepare jobs and OSD deployments. The preparePlacement is only useful for portable OSDs in the device sets. OSDs that are not portable will be tied to the host where the OSD prepare job initially runs. For example, provisioning may require topology spread constraints across zones, but the OSD daemons may require constraints across hosts within the zones. portable : If true , the OSDs will be allowed to move between nodes during failover. This requires a storage class that supports portability (e.g. aws-ebs , but not the local storage provisioner). If false , the OSDs will be assigned to a node permanently. Rook will configure Ceph's CRUSH map to support the portability. tuneDeviceClass : For example, Ceph cannot detect AWS volumes as HDDs from the storage class \"gp2\", so you can improve Ceph performance by setting this to true. tuneFastDeviceClass : For example, Ceph cannot detect Azure disks as SSDs from the storage class \"managed-premium\", so you can improve Ceph performance by setting this to true.. volumeClaimTemplates : A list of PVC templates to use for provisioning the underlying storage devices. resources.requests.storage : The desired capacity for the underlying storage devices. storageClassName : The StorageClass to provision PVCs from. Default would be to use the cluster-default StorageClass. This StorageClass should provide a raw block device, multipath device, or logical volume. Other types are not supported. If you want to use logical volume, please see known issue of OSD on LV-backed PVC volumeMode : The volume mode to be set for the PVC. Which should be Block accessModes : The access mode for the PVC to be bound by OSD. schedulerName : Scheduler name for OSD pod placement. (Optional) encrypted : whether to encrypt all the OSDs in a given storageClassDeviceSet","title":"Storage Class Device Sets"},{"location":"CRDs/Cluster/ceph-cluster-crd/#osd-configuration-settings","text":"The following storage selection settings are specific to Ceph and do not apply to other backends. All variables are key-value pairs represented as strings. metadataDevice : Name of a device or lvm to use for the metadata of OSDs on each node. Performance can be improved by using a low latency device (such as SSD or NVMe) as the metadata device, while other spinning platter (HDD) devices on a node are used to store data. Provisioning will fail if the user specifies a metadataDevice but that device is not used as a metadata device by Ceph. Notably, ceph-volume will not use a device of the same device class (HDD, SSD, NVMe) as OSD devices for metadata, resulting in this failure. databaseSizeMB : The size in MB of a bluestore database. Include quotes around the size. walSizeMB : The size in MB of a bluestore write ahead log (WAL). Include quotes around the size. deviceClass : The CRUSH device class to use for this selection of storage devices. (By default, if a device's class has not already been set, OSDs will automatically set a device's class to either hdd , ssd , or nvme based on the hardware properties exposed by the Linux kernel.) These storage classes can then be used to select the devices backing a storage pool by specifying them as the value of the pool spec's deviceClass field . initialWeight : The initial OSD weight in TiB units. By default, this value is derived from OSD's capacity. primaryAffinity : The primary-affinity value of an OSD, within range [0, 1] (default: 1 ). osdsPerDevice **: The number of OSDs to create on each device. High performance devices such as NVMe can handle running multiple OSDs. If desired, this can be overridden for each node and each device. encryptedDevice **: Encrypt OSD volumes using dmcrypt (\"true\" or \"false\"). By default this option is disabled. See encryption for more information on encryption in Ceph. crushRoot : The value of the root CRUSH map label. The default is default . Generally, you should not need to change this. However, if any of your topology labels may have the value default , you need to change crushRoot to avoid conflicts, since CRUSH map values need to be unique.","title":"OSD Configuration Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#annotations-and-labels","text":"Annotations and Labels can be specified so that the Rook components will have those annotations / labels added to them. You can set annotations / labels for Rook components for the list of key value pairs: all : Set annotations / labels for all components except clusterMetadata . mgr : Set annotations / labels for MGRs mon : Set annotations / labels for mons osd : Set annotations / labels for OSDs prepareosd : Set annotations / labels for OSD Prepare Jobs monitoring : Set annotations / labels for service monitor crashcollector : Set annotations / labels for crash collectors clusterMetadata : Set annotations only to rook-ceph-mon-endpoints configmap and the rook-ceph-mon and rook-ceph-admin-keyring secrets. These annotations will not be merged with the all annotations. The common usage is for backing up these critical resources with kubed . Note the clusterMetadata annotation will not be merged with the all annotation. When other keys are set, all will be merged together with the specific component.","title":"Annotations and Labels"},{"location":"CRDs/Cluster/ceph-cluster-crd/#placement-configuration-settings","text":"Placement configuration for the cluster services. It includes the following keys: mgr , mon , arbiter , osd , prepareosd , cleanup , and all . Each service will have its placement configuration generated by merging the generic configuration under all with the most specific one (which will override any attributes). In stretch clusters, if the arbiter placement is specified, that placement will only be applied to the arbiter. Neither will the arbiter placement be merged with the all placement to allow the arbiter to be fully independent of other daemon placement. The remaining mons will still use the mon and/or all sections. Note Placement of OSD pods is controlled using the Storage Class Device Set , not the general placement configuration. A Placement configuration is specified (according to the kubernetes PodSpec) as: nodeAffinity : kubernetes NodeAffinity podAffinity : kubernetes PodAffinity podAntiAffinity : kubernetes PodAntiAffinity tolerations : list of kubernetes Toleration topologySpreadConstraints : kubernetes TopologySpreadConstraints If you use labelSelector for osd pods, you must write two rules both for rook-ceph-osd and rook-ceph-osd-prepare like the example configuration . It comes from the design that there are these two pods for an OSD. For more detail, see the osd design doc and the related issue . The Rook Ceph operator creates a Job called rook-ceph-detect-version to detect the full Ceph version used by the given cephVersion.image . The placement from the mon section is used for the Job except for the PodAntiAffinity field.","title":"Placement Configuration Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#placement-example","text":"To control where various services will be scheduled by kubernetes, use the placement configuration sections below. The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node . Specific node affinity and tolerations that only apply to the mon daemons in this example require the label role=storage-mon-node` and also tolerate the control plane taint. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false # enable the ceph dashboard for viewing cluster status dashboard : enabled : true placement : all : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - storage-node mon : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - storage-mon-node tolerations : - effect : NoSchedule key : node-role.kubernetes.io/control-plane operator : Exists","title":"Placement Example"},{"location":"CRDs/Cluster/ceph-cluster-crd/#cluster-wide-resources-configuration-settings","text":"Resources should be specified so that the Rook components are handled after Kubernetes Pod Quality of Service classes . This allows to keep Rook components running when for example a node runs out of memory and the Rook components are not killed depending on their Quality of Service class. You can set resource requests/limits for Rook components through the Resource Requirements/Limits structure in the following keys: mon : Set resource requests/limits for mons osd : Set resource requests/limits for OSDs. This key applies for all OSDs regardless of their device classes. In case of need to apply resource requests/limits for OSDs with particular device class use specific osd keys below. If the memory resource is declared Rook will automatically set the OSD configuration osd_memory_target to the same value. This aims to ensure that the actual OSD memory consumption is consistent with the OSD pods' resource declaration. osd- : Set resource requests/limits for OSDs on a specific device class. Rook will automatically detect hdd , ssd , or nvme device classes. Custom device classes can also be set. mgr : Set resource requests/limits for MGRs mgr-sidecar : Set resource requests/limits for the MGR sidecar, which is only created when mgr.count: 2 . The sidecar requires very few resources since it only executes every 15 seconds to query Ceph for the active mgr and update the mgr services if the active mgr changed. prepareosd : Set resource requests/limits for OSD prepare job crashcollector : Set resource requests/limits for crash. This pod runs wherever there is a Ceph pod running. It scrapes for Ceph daemon core dumps and sends them to the Ceph manager crash module so that core dumps are centralized and can be easily listed/accessed. You can read more about the Ceph Crash module . logcollector : Set resource requests/limits for the log collector. When enabled, this container runs as side-car to each Ceph daemons. cleanup : Set resource requests/limits for cleanup job, responsible for wiping cluster's data after uninstall exporter : Set resource requests/limits for Ceph exporter. In order to provide the best possible experience running Ceph in containers, Rook internally recommends minimum memory limits if resource limits are passed. If a user configures a limit or request value that is too low, Rook will still run the pod(s) and print a warning to the operator log. mon : 1024MB mgr : 512MB osd : 2048MB crashcollector : 60MB mgr-sidecar : 100MB limit, 40MB requests prepareosd : no limits (see the note) exporter : 128MB limit, 50MB requests Note We recommend not setting memory limits on the OSD prepare job to prevent OSD provisioning failure due to memory constraints. The OSD prepare job bursts memory usage during the OSD provisioning depending on the size of the device, typically 1-2Gi for large disks. The OSD prepare job only bursts a single time per OSD. All future runs of the OSD prepare job will detect the OSD is already provisioned and skip the provisioning. Hint The resources for MDS daemons are not configured in the Cluster. Refer to the Ceph Filesystem CRD instead.","title":"Cluster-wide Resources Configuration Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#resource-requirementslimits","text":"For more information on resource requests/limits see the official Kubernetes documentation: Kubernetes - Managing Compute Resources for Containers requests : Requests for cpu or memory. cpu : Request for CPU (example: one CPU core 1 , 50% of one CPU core 500m ). memory : Limit for Memory (example: one gigabyte of memory 1Gi , half a gigabyte of memory 512Mi ). limits : Limits for cpu or memory. cpu : Limit for CPU (example: one CPU core 1 , 50% of one CPU core 500m ). memory : Limit for Memory (example: one gigabyte of memory 1Gi , half a gigabyte of memory 512Mi ). Warning Before setting resource requests/limits, please take a look at the Ceph documentation for recommendations for each component: Ceph - Hardware Recommendations .","title":"Resource Requirements/Limits"},{"location":"CRDs/Cluster/ceph-cluster-crd/#node-specific-resources-for-osds","text":"This example shows that you can override these requests/limits for OSDs per node when using useAllNodes: false in the node item in the nodes list. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false storage : useAllNodes : false nodes : - name : \"172.17.4.201\" resources : limits : cpu : \"2\" memory : \"4096Mi\" requests : cpu : \"2\" memory : \"4096Mi\"","title":"Node Specific Resources for OSDs"},{"location":"CRDs/Cluster/ceph-cluster-crd/#priority-class-names","text":"Priority class names can be specified so that the Rook components will have those priority class names added to them. You can set priority class names for Rook components for the list of key value pairs: all : Set priority class names for MGRs, Mons, OSDs, and crashcollectors. mgr : Set priority class names for MGRs. Examples default to system-cluster-critical. mon : Set priority class names for Mons. Examples default to system-node-critical. osd : Set priority class names for OSDs. Examples default to system-node-critical. crashcollector : Set priority class names for crashcollectors. The specific component keys will act as overrides to all .","title":"Priority Class Names"},{"location":"CRDs/Cluster/ceph-cluster-crd/#health-settings","text":"The Rook Ceph operator will monitor the state of the CephCluster on various components by default. The following CRD settings are available: healthCheck : main ceph cluster health monitoring section Currently three health checks are implemented: mon : health check on the ceph monitors, basically check whether monitors are members of the quorum. If after a certain timeout a given monitor has not joined the quorum back it will be failed over and replace by a new monitor. osd : health check on the ceph osds status : ceph health status check, periodically check the Ceph health state and reflects it in the CephCluster CR status field. The liveness probe and startup probe of each daemon can also be controlled via livenessProbe and startupProbe respectively. The settings are valid for mon , mgr and osd . Here is a complete example for both daemonHealth , livenessProbe , and startupProbe : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 healthCheck : daemonHealth : mon : disabled : false interval : 45s timeout : 600s osd : disabled : false interval : 60s status : disabled : false livenessProbe : mon : disabled : false mgr : disabled : false osd : disabled : false startupProbe : mon : disabled : false mgr : disabled : false osd : disabled : false The probe's timing values and thresholds (but not the probe itself) can also be overridden. For more info, refer to the Kubernetes documentation . For example, you could change the mgr probe by applying: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 healthCheck : startupProbe : mgr : disabled : false probe : initialDelaySeconds : 3 periodSeconds : 3 failureThreshold : 30 livenessProbe : mgr : disabled : false probe : initialDelaySeconds : 3 periodSeconds : 3 Changing the liveness probe is an advanced operation and should rarely be necessary. If you want to change these settings then modify the desired settings.","title":"Health settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#status","text":"The operator is regularly configuring and checking the health of the cluster. The results of the configuration and health checks can be seen in the status section of the CephCluster CR. 1 kubectl -n rook-ceph get CephCluster -o yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 [ ... ] status : ceph : health : HEALTH_OK lastChecked : \"2021-03-02T21:22:11Z\" capacity : bytesAvailable : 22530293760 bytesTotal : 25757220864 bytesUsed : 3226927104 lastUpdated : \"2021-03-02T21:22:11Z\" message : Cluster created successfully phase : Ready state : Created storage : deviceClasses : - name : hdd version : image : quay.io/ceph/ceph:v17.2.6 version : 16.2.6-0 conditions : - lastHeartbeatTime : \"2021-03-02T21:22:11Z\" lastTransitionTime : \"2021-03-02T21:21:09Z\" message : Cluster created successfully reason : ClusterCreated status : \"True\" type : Ready","title":"Status"},{"location":"CRDs/Cluster/ceph-cluster-crd/#ceph-status","text":"Ceph is constantly monitoring the health of the data plane and reporting back if there are any warnings or errors. If everything is healthy from Ceph's perspective, you will see HEALTH_OK . If Ceph reports any warnings or errors, the details will be printed to the status. If further troubleshooting is needed to resolve these issues, the toolbox will likely be needed where you can run ceph commands to find more details. The capacity of the cluster is reported, including bytes available, total, and used. The available space will be less that you may expect due to overhead in the OSDs.","title":"Ceph Status"},{"location":"CRDs/Cluster/ceph-cluster-crd/#conditions","text":"The conditions represent the status of the Rook operator. If the cluster is fully configured and the operator is stable, the Ready condition is raised with ClusterCreated reason and no other conditions. The cluster will remain in the Ready condition after the first successful configuration since it is expected the storage is consumable from this point on. If there are issues preventing the storage layer from working, they are expected to show as Ceph health errors. If the cluster is externally connected successfully, the Ready condition will have the reason ClusterConnected . If the operator is currently being configured or the operator is checking for update, there will be a Progressing condition. If there was a failure, the condition(s) status will be false and the message will give a summary of the error. See the operator log for more details.","title":"Conditions"},{"location":"CRDs/Cluster/ceph-cluster-crd/#other-status","text":"There are several other properties for the overall status including: message , phase , and state : A summary of the overall current state of the cluster, which is somewhat duplicated from the conditions for backward compatibility. storage.deviceClasses : The names of the types of storage devices that Ceph discovered in the cluster. These types will be ssd or hdd unless they have been overridden with the crushDeviceClass in the storageClassDeviceSets . version : The version of the Ceph image currently deployed.","title":"Other Status"},{"location":"CRDs/Cluster/ceph-cluster-crd/#osd-topology","text":"The topology of the cluster is important in production environments where you want your data spread across failure domains. The topology can be controlled by adding labels to the nodes. When the labels are found on a node at first OSD deployment, Rook will add them to the desired level in the CRUSH map . The complete list of labels in hierarchy order from highest to lowest is: 1 2 3 4 5 6 7 8 9 topology.kubernetes.io/region topology.kubernetes.io/zone topology.rook.io/datacenter topology.rook.io/room topology.rook.io/pod topology.rook.io/pdu topology.rook.io/row topology.rook.io/rack topology.rook.io/chassis For example, if the following labels were added to a node: 1 2 kubectl label node mynode topology.kubernetes.io/zone=zone1 kubectl label node mynode topology.rook.io/rack=zone1-rack1 These labels would result in the following hierarchy for OSDs on that node (this command can be run in the Rook toolbox): 1 2 3 4 5 6 7 8 $ ceph osd tree ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF -1 0.01358 root default -5 0.01358 zone zone1 -4 0.01358 rack rack1 -3 0.01358 host mynode 0 hdd 0.00679 osd.0 up 1.00000 1.00000 1 hdd 0.00679 osd.1 up 1.00000 1.00000 Ceph requires unique names at every level in the hierarchy (CRUSH map). For example, you cannot have two racks with the same name that are in different zones. Racks in different zones must be named uniquely. Note that the host is added automatically to the hierarchy by Rook. The host cannot be specified with a topology label. All topology labels are optional. Hint When setting the node labels prior to CephCluster creation, these settings take immediate effect. However, applying this to an already deployed CephCluster requires removing each node from the cluster first and then re-adding it with new configuration to take effect. Do this node by node to keep your data safe! Check the result with ceph osd tree from the Rook Toolbox . The OSD tree should display the hierarchy for the nodes that already have been re-added. To utilize the failureDomain based on the node labels, specify the corresponding option in the CephBlockPool 1 2 3 4 5 6 7 8 9 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : rack # this matches the topology labels on nodes replicated : size : 3 This configuration will split the replication of volumes across unique racks in the data center setup.","title":"OSD Topology"},{"location":"CRDs/Cluster/ceph-cluster-crd/#deleting-a-cephcluster","text":"During deletion of a CephCluster resource, Rook protects against accidental or premature destruction of user data by blocking deletion if there are any other Rook Ceph Custom Resources that reference the CephCluster being deleted. Rook will warn about which other resources are blocking deletion in three ways until all blocking resources are deleted: An event will be registered on the CephCluster resource A status condition will be added to the CephCluster resource An error will be added to the Rook Ceph operator log","title":"Deleting a CephCluster"},{"location":"CRDs/Cluster/ceph-cluster-crd/#cleanup-policy","text":"Rook has the ability to cleanup resources and data that were deployed when a CephCluster is removed. The policy settings indicate which data should be forcibly deleted and in what way the data should be wiped. The cleanupPolicy has several fields: confirmation : Only an empty string and yes-really-destroy-data are valid values for this field. If this setting is empty, the cleanupPolicy settings will be ignored and Rook will not cleanup any resources during cluster removal. To reinstall the cluster, the admin would then be required to follow the cleanup guide to delete the data on hosts. If this setting is yes-really-destroy-data , the operator will automatically delete the data on hosts. Because this cleanup policy is destructive, after the confirmation is set to yes-really-destroy-data Rook will stop configuring the cluster as if the cluster is about to be destroyed. sanitizeDisks : sanitizeDisks represents advanced settings that can be used to delete data on drives. method : indicates if the entire disk should be sanitized or simply ceph's metadata. Possible choices are quick (default) or complete dataSource : indicate where to get random bytes from to write on the disk. Possible choices are zero (default) or random . Using random sources will consume entropy from the system and will take much more time then the zero source iteration : overwrite N times instead of the default (1). Takes an integer value allowUninstallWithVolumes : If set to true, then the cephCluster deletion doesn't wait for the PVCs to be deleted. Default is false . To automate activation of the cleanup, you can use the following command. WARNING: DATA WILL BE PERMANENTLY DELETED : 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Nothing will happen until the deletion of the CR is requested, so this can still be reverted. However, all new configuration by the operator will be blocked with this cleanup policy enabled. Rook waits for the deletion of PVs provisioned using the cephCluster before proceeding to delete the cephCluster. To force deletion of the cephCluster without waiting for the PVs to be deleted, you can set the allowUninstallWithVolumes to true under spec.CleanupPolicy .","title":"Cleanup policy"},{"location":"CRDs/Cluster/external-cluster/","text":"An external cluster is a Ceph configuration that is managed outside of the local K8s cluster. The external cluster could be managed by cephadm, or it could be another Rook cluster that is configured to allow the access (usually configured with host networking). In external mode, Rook will provide the configuration for the CSI driver and other basic resources that allows your applications to connect to Ceph in the external cluster. External configuration \u00b6 Source cluster: The cluster providing the data, usually configured by cephadm Consumer cluster: The K8s cluster that will be consuming the external source cluster Prerequisites \u00b6 Create the desired types of storage in the provider Ceph cluster: RBD pools CephFS filesystem Commands on the source Ceph cluster \u00b6 In order to configure an external Ceph cluster with Rook, we need to extract some information in order to connect to that cluster. 1. Create all users and keys \u00b6 Run the python script create-external-cluster-resources.py for creating all users and keys. 1 python3 create-external-cluster-resources.py --rbd-data-pool-name  --cephfs-filesystem-name  --rgw-endpoint  --namespace  --format bash --namespace : Namespace where CephCluster will run, for example rook-ceph-external --format bash : The format of the output --rbd-data-pool-name : The name of the RBD data pool --alias-rbd-data-pool-name : Provides an alias for the RBD data pool name, necessary if a special character is present in the pool name such as a period or underscore --rgw-endpoint : (optional) The RADOS Gateway endpoint in the format : or : . --rgw-pool-prefix : (optional) The prefix of the RGW pools. If not specified, the default prefix is default --rgw-tls-cert-path : (optional) RADOS Gateway endpoint TLS certificate file path --rgw-skip-tls : (optional) Ignore TLS certification validation when a self-signed certificate is provided (NOT RECOMMENDED) --rbd-metadata-ec-pool-name : (optional) Provides the name of erasure coded RBD metadata pool, used for creating ECRBDStorageClass. --monitoring-endpoint : (optional) Ceph Manager prometheus exporter endpoints (comma separated list of entries of active and standby mgrs) --monitoring-endpoint-port : (optional) Ceph Manager prometheus exporter port --skip-monitoring-endpoint : (optional) Skip prometheus exporter endpoints, even if they are available. Useful if the prometheus module is not enabled --ceph-conf : (optional) Provide a Ceph conf file --keyring : (optional) Path to Ceph keyring file, to be used with --ceph-conf --cluster-name : (optional) Ceph cluster name --output : (optional) Output will be stored into the provided file --dry-run : (optional) Prints the executed commands without running them --run-as-user : (optional) Provides a user name to check the cluster's health status, must be prefixed by client . --cephfs-metadata-pool-name : (optional) Provides the name of the cephfs metadata pool --cephfs-filesystem-name : (optional) The name of the filesystem, used for creating CephFS StorageClass --cephfs-data-pool-name : (optional) Provides the name of the CephFS data pool, used for creating CephFS StorageClass --rados-namespace : (optional) Divides a pool into separate logical namespaces, used for creating RBD PVC in a RadosNamespaces --subvolume-group : (optional) Provides the name of the subvolume group, used for creating CephFS PVC in a subvolumeGroup --rgw-realm-name : (optional) Provides the name of the rgw-realm --rgw-zone-name : (optional) Provides the name of the rgw-zone --rgw-zonegroup-name : (optional) Provides the name of the rgw-zone-group --upgrade : (optional) Upgrades the 'Ceph CSI keyrings (For example: client.csi-cephfs-provisioner) with new permissions needed for the new cluster version and older permission will still be applied. --restricted-auth-permission : (optional) Restrict cephCSIKeyrings auth permissions to specific pools, and cluster. Mandatory flags that need to be set are --rbd-data-pool-name , and --cluster-name . --cephfs-filesystem-name flag can also be passed in case of CephFS user restriction, so it can restrict users to particular CephFS filesystem. --v2-port-enable : (optional) Enables the v2 mon port (3300) for mons. Multi-tenancy \u00b6 To enable multi-tenancy, run the script with the --restricted-auth-permission flag and pass the mandatory flags with it, It will generate the secrets which you can use for creating new Consumer cluster deployment using the same Source cluster (ceph cluster). So you would be running different isolated consumer clusters on top of single Source cluster . Note Restricting the csi-users per pool, and per cluster will require creating new csi-users and new secrets for that csi-users. So apply these secrets only to new Consumer cluster deployment while using the same Source cluster . 1 python3 create-external-cluster-resources.py --cephfs-filesystem-name  --rbd-data-pool-name  --cluster-name  --restricted-auth-permission true --format  --rgw-endpoint  --namespace  RGW Multisite \u00b6 Pass the --rgw-realm-name , --rgw-zonegroup-name and --rgw-zone-name flags to create the admin ops user in a master zone, zonegroup and realm. See the Multisite doc for creating a zone, zonegroup and realm. 1 python3 create-external-cluster-resources.py --rbd-data-pool-name  --format bash --rgw-endpoint  --rgw-realm-name > --rgw-zonegroup-name  --rgw-zone-name > Upgrade Example \u00b6 1) If consumer cluster doesn't have restricted caps, this will upgrade all the default csi-users (non-restricted): 1 python3 create-external-cluster-resources.py --upgrade 2) If the consumer cluster has restricted caps: Restricted users created using --restricted-auth-permission flag need to pass mandatory flags: ' --rbd-data-pool-name (if it is a rbd user), --cluster-name and --run-as-user ' flags while upgrading, in case of cephfs users if you have passed --cephfs-filesystem-name flag while creating csi-users then while upgrading it will be mandatory too. In this example the user would be client.csi-rbd-node-rookstorage-replicapool (following the pattern csi-user-clusterName-poolName ) 1 python3 create-external-cluster-resources.py --upgrade --rbd-data-pool-name replicapool --cluster-name rookstorage --run-as-user client.csi-rbd-node-rookstorage-replicapool Note An existing non-restricted user cannot be converted to a restricted user by upgrading. The upgrade flag should only be used to append new permissions to users. It shouldn't be used for changing a csi user already applied permissions. For example, you shouldn't change the pool(s) a user has access to. 2. Copy the bash output \u00b6 Example Output: 1 2 3 4 5 6 7 8 9 10 11 export ROOK_EXTERNAL_FSID=797f411a-aafe-11ec-a254-fa163e1539f5 export ROOK_EXTERNAL_USERNAME=client.healthchecker export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-rados-upstream-w4pdvq-node1-installer=10.0.210.83:6789 export ROOK_EXTERNAL_USER_SECRET=AQAdm0FilZDSJxAAMucfuu/j0ZYYP4Bia8Us+w== export ROOK_EXTERNAL_DASHBOARD_LINK=https://10.0.210.83:8443/ export CSI_RBD_NODE_SECRET=AQC1iDxip45JDRAAVahaBhKz1z0WW98+ACLqMQ== export CSI_RBD_PROVISIONER_SECRET=AQC1iDxiMM+LLhAA0PucjNZI8sG9Eh+pcvnWhQ== export MONITORING_ENDPOINT=10.0.210.83 export MONITORING_ENDPOINT_PORT=9283 export RBD_POOL_NAME=replicated_2g export RGW_POOL_PREFIX=default Commands on the K8s consumer cluster \u00b6 Import the Source Data \u00b6 Paste the above output from create-external-cluster-resources.py into your current shell to allow importing the source data. Run the import script. !!! note If your Rook cluster nodes are running a kernel earlier than or equivalent to 5.4, remove fast-diff,object-map,deep-flatten,exclusive-lock from the imageFeatures line. 1 2 3 ```console . import-external-cluster.sh ``` Helm Installation \u00b6 To install with Helm, the rook cluster helm chart will configure the necessary resources for the external cluster with the example values-external.yaml . 1 2 3 4 5 6 7 clusterNamespace=rook-ceph operatorNamespace=rook-ceph cd deploy/examples/charts/rook-ceph-cluster helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace $clusterNamespace rook-ceph rook-release/rook-ceph -f values.yaml helm install --create-namespace --namespace $clusterNamespace rook-ceph-cluster \\ --set operatorNamespace=$operatorNamespace rook-release/rook-ceph-cluster -f values-external.yaml Skip the manifest installation section and continue with Cluster Verification . Manifest Installation \u00b6 If not installing with Helm, here are the steps to install with manifests. Deploy Rook, create common.yaml , crds.yaml and operator.yaml manifests. Create common-external.yaml and cluster-external.yaml Cluster Verification \u00b6 Verify the consumer cluster is connected to the source ceph cluster: 1 2 3 $ kubectl -n rook-ceph-external get CephCluster NAME DATADIRHOSTPATH MONCOUNT AGE STATE HEALTH rook-ceph-external /var/lib/rook 162m Connected HEALTH_OK Verify the creation of the storage class depending on the rbd pools and filesystem provided. ceph-rbd and cephfs would be the respective names for the RBD and CephFS storage classes. 1 kubectl -n rook-ceph-external get sc Then you can now create a persistent volume based on these StorageClass. Connect to an External Object Store \u00b6 Create the object store resources: Create the external object store CR to configure connection to external gateways. Create an Object store user for credentials to access the S3 endpoint. Create a bucket storage class where a client can request creating buckets. Create the Object Bucket Claim , which will create an individual bucket for reading and writing objects. 1 2 3 4 5 cd deploy/examples kubectl create -f object-external.yaml kubectl create -f object-user.yaml kubectl create -f storageclass-bucket-delete.yaml kubectl create -f object-bucket-claim-delete.yaml Hint For more details see the Object Store topic Connect to v2 mon port \u00b6 If encryption or compression on the wire is needed, specify the --v2-port-enable flag. If the v2 address type is present in the ceph quorum_status , then the output of 'ceph mon data' i.e, ROOK_EXTERNAL_CEPH_MON_DATA will use the v2 port( 3300 ). Exporting Rook to another cluster \u00b6 If you have multiple K8s clusters running, and want to use the local rook-ceph cluster as the central storage, you can export the settings from this cluster with the following steps. 1) Copy create-external-cluster-resources.py into the directory /etc/ceph/ of the toolbox. 1 2 toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}') kubectl -n rook-ceph cp deploy/examples/create-external-cluster-resources.py $toolbox:/etc/ceph 2) Exec to the toolbox pod and execute create-external-cluster-resources.py with needed options to create required users and keys . Important For other clusters to connect to storage in this cluster, Rook must be configured with a networking configuration that is accessible from other clusters. Most commonly this is done by enabling host networking in the CephCluster CR so the Ceph daemons will be addressable by their host IPs.","title":"External Storage Cluster"},{"location":"CRDs/Cluster/external-cluster/#external-configuration","text":"Source cluster: The cluster providing the data, usually configured by cephadm Consumer cluster: The K8s cluster that will be consuming the external source cluster","title":"External configuration"},{"location":"CRDs/Cluster/external-cluster/#prerequisites","text":"Create the desired types of storage in the provider Ceph cluster: RBD pools CephFS filesystem","title":"Prerequisites"},{"location":"CRDs/Cluster/external-cluster/#commands-on-the-source-ceph-cluster","text":"In order to configure an external Ceph cluster with Rook, we need to extract some information in order to connect to that cluster.","title":"Commands on the source Ceph cluster"},{"location":"CRDs/Cluster/external-cluster/#1-create-all-users-and-keys","text":"Run the python script create-external-cluster-resources.py for creating all users and keys. 1 python3 create-external-cluster-resources.py --rbd-data-pool-name  --cephfs-filesystem-name  --rgw-endpoint  --namespace  --format bash --namespace : Namespace where CephCluster will run, for example rook-ceph-external --format bash : The format of the output --rbd-data-pool-name : The name of the RBD data pool --alias-rbd-data-pool-name : Provides an alias for the RBD data pool name, necessary if a special character is present in the pool name such as a period or underscore --rgw-endpoint : (optional) The RADOS Gateway endpoint in the format : or : . --rgw-pool-prefix : (optional) The prefix of the RGW pools. If not specified, the default prefix is default --rgw-tls-cert-path : (optional) RADOS Gateway endpoint TLS certificate file path --rgw-skip-tls : (optional) Ignore TLS certification validation when a self-signed certificate is provided (NOT RECOMMENDED) --rbd-metadata-ec-pool-name : (optional) Provides the name of erasure coded RBD metadata pool, used for creating ECRBDStorageClass. --monitoring-endpoint : (optional) Ceph Manager prometheus exporter endpoints (comma separated list of entries of active and standby mgrs) --monitoring-endpoint-port : (optional) Ceph Manager prometheus exporter port --skip-monitoring-endpoint : (optional) Skip prometheus exporter endpoints, even if they are available. Useful if the prometheus module is not enabled --ceph-conf : (optional) Provide a Ceph conf file --keyring : (optional) Path to Ceph keyring file, to be used with --ceph-conf --cluster-name : (optional) Ceph cluster name --output : (optional) Output will be stored into the provided file --dry-run : (optional) Prints the executed commands without running them --run-as-user : (optional) Provides a user name to check the cluster's health status, must be prefixed by client . --cephfs-metadata-pool-name : (optional) Provides the name of the cephfs metadata pool --cephfs-filesystem-name : (optional) The name of the filesystem, used for creating CephFS StorageClass --cephfs-data-pool-name : (optional) Provides the name of the CephFS data pool, used for creating CephFS StorageClass --rados-namespace : (optional) Divides a pool into separate logical namespaces, used for creating RBD PVC in a RadosNamespaces --subvolume-group : (optional) Provides the name of the subvolume group, used for creating CephFS PVC in a subvolumeGroup --rgw-realm-name : (optional) Provides the name of the rgw-realm --rgw-zone-name : (optional) Provides the name of the rgw-zone --rgw-zonegroup-name : (optional) Provides the name of the rgw-zone-group --upgrade : (optional) Upgrades the 'Ceph CSI keyrings (For example: client.csi-cephfs-provisioner) with new permissions needed for the new cluster version and older permission will still be applied. --restricted-auth-permission : (optional) Restrict cephCSIKeyrings auth permissions to specific pools, and cluster. Mandatory flags that need to be set are --rbd-data-pool-name , and --cluster-name . --cephfs-filesystem-name flag can also be passed in case of CephFS user restriction, so it can restrict users to particular CephFS filesystem. --v2-port-enable : (optional) Enables the v2 mon port (3300) for mons.","title":"1. Create all users and keys"},{"location":"CRDs/Cluster/external-cluster/#multi-tenancy","text":"To enable multi-tenancy, run the script with the --restricted-auth-permission flag and pass the mandatory flags with it, It will generate the secrets which you can use for creating new Consumer cluster deployment using the same Source cluster (ceph cluster). So you would be running different isolated consumer clusters on top of single Source cluster . Note Restricting the csi-users per pool, and per cluster will require creating new csi-users and new secrets for that csi-users. So apply these secrets only to new Consumer cluster deployment while using the same Source cluster . 1 python3 create-external-cluster-resources.py --cephfs-filesystem-name  --rbd-data-pool-name  --cluster-name  --restricted-auth-permission true --format  --rgw-endpoint  --namespace ","title":"Multi-tenancy"},{"location":"CRDs/Cluster/external-cluster/#rgw-multisite","text":"Pass the --rgw-realm-name , --rgw-zonegroup-name and --rgw-zone-name flags to create the admin ops user in a master zone, zonegroup and realm. See the Multisite doc for creating a zone, zonegroup and realm. 1 python3 create-external-cluster-resources.py --rbd-data-pool-name  --format bash --rgw-endpoint  --rgw-realm-name > --rgw-zonegroup-name  --rgw-zone-name >","title":"RGW Multisite"},{"location":"CRDs/Cluster/external-cluster/#upgrade-example","text":"1) If consumer cluster doesn't have restricted caps, this will upgrade all the default csi-users (non-restricted): 1 python3 create-external-cluster-resources.py --upgrade 2) If the consumer cluster has restricted caps: Restricted users created using --restricted-auth-permission flag need to pass mandatory flags: ' --rbd-data-pool-name (if it is a rbd user), --cluster-name and --run-as-user ' flags while upgrading, in case of cephfs users if you have passed --cephfs-filesystem-name flag while creating csi-users then while upgrading it will be mandatory too. In this example the user would be client.csi-rbd-node-rookstorage-replicapool (following the pattern csi-user-clusterName-poolName ) 1 python3 create-external-cluster-resources.py --upgrade --rbd-data-pool-name replicapool --cluster-name rookstorage --run-as-user client.csi-rbd-node-rookstorage-replicapool Note An existing non-restricted user cannot be converted to a restricted user by upgrading. The upgrade flag should only be used to append new permissions to users. It shouldn't be used for changing a csi user already applied permissions. For example, you shouldn't change the pool(s) a user has access to.","title":"Upgrade Example"},{"location":"CRDs/Cluster/external-cluster/#2-copy-the-bash-output","text":"Example Output: 1 2 3 4 5 6 7 8 9 10 11 export ROOK_EXTERNAL_FSID=797f411a-aafe-11ec-a254-fa163e1539f5 export ROOK_EXTERNAL_USERNAME=client.healthchecker export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-rados-upstream-w4pdvq-node1-installer=10.0.210.83:6789 export ROOK_EXTERNAL_USER_SECRET=AQAdm0FilZDSJxAAMucfuu/j0ZYYP4Bia8Us+w== export ROOK_EXTERNAL_DASHBOARD_LINK=https://10.0.210.83:8443/ export CSI_RBD_NODE_SECRET=AQC1iDxip45JDRAAVahaBhKz1z0WW98+ACLqMQ== export CSI_RBD_PROVISIONER_SECRET=AQC1iDxiMM+LLhAA0PucjNZI8sG9Eh+pcvnWhQ== export MONITORING_ENDPOINT=10.0.210.83 export MONITORING_ENDPOINT_PORT=9283 export RBD_POOL_NAME=replicated_2g export RGW_POOL_PREFIX=default","title":"2. Copy the bash output"},{"location":"CRDs/Cluster/external-cluster/#commands-on-the-k8s-consumer-cluster","text":"","title":"Commands on the K8s consumer cluster"},{"location":"CRDs/Cluster/external-cluster/#import-the-source-data","text":"Paste the above output from create-external-cluster-resources.py into your current shell to allow importing the source data. Run the import script. !!! note If your Rook cluster nodes are running a kernel earlier than or equivalent to 5.4, remove fast-diff,object-map,deep-flatten,exclusive-lock from the imageFeatures line. 1 2 3 ```console . import-external-cluster.sh ```","title":"Import the Source Data"},{"location":"CRDs/Cluster/external-cluster/#helm-installation","text":"To install with Helm, the rook cluster helm chart will configure the necessary resources for the external cluster with the example values-external.yaml . 1 2 3 4 5 6 7 clusterNamespace=rook-ceph operatorNamespace=rook-ceph cd deploy/examples/charts/rook-ceph-cluster helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace $clusterNamespace rook-ceph rook-release/rook-ceph -f values.yaml helm install --create-namespace --namespace $clusterNamespace rook-ceph-cluster \\ --set operatorNamespace=$operatorNamespace rook-release/rook-ceph-cluster -f values-external.yaml Skip the manifest installation section and continue with Cluster Verification .","title":"Helm Installation"},{"location":"CRDs/Cluster/external-cluster/#manifest-installation","text":"If not installing with Helm, here are the steps to install with manifests. Deploy Rook, create common.yaml , crds.yaml and operator.yaml manifests. Create common-external.yaml and cluster-external.yaml","title":"Manifest Installation"},{"location":"CRDs/Cluster/external-cluster/#cluster-verification","text":"Verify the consumer cluster is connected to the source ceph cluster: 1 2 3 $ kubectl -n rook-ceph-external get CephCluster NAME DATADIRHOSTPATH MONCOUNT AGE STATE HEALTH rook-ceph-external /var/lib/rook 162m Connected HEALTH_OK Verify the creation of the storage class depending on the rbd pools and filesystem provided. ceph-rbd and cephfs would be the respective names for the RBD and CephFS storage classes. 1 kubectl -n rook-ceph-external get sc Then you can now create a persistent volume based on these StorageClass.","title":"Cluster Verification"},{"location":"CRDs/Cluster/external-cluster/#connect-to-an-external-object-store","text":"Create the object store resources: Create the external object store CR to configure connection to external gateways. Create an Object store user for credentials to access the S3 endpoint. Create a bucket storage class where a client can request creating buckets. Create the Object Bucket Claim , which will create an individual bucket for reading and writing objects. 1 2 3 4 5 cd deploy/examples kubectl create -f object-external.yaml kubectl create -f object-user.yaml kubectl create -f storageclass-bucket-delete.yaml kubectl create -f object-bucket-claim-delete.yaml Hint For more details see the Object Store topic","title":"Connect to an External Object Store"},{"location":"CRDs/Cluster/external-cluster/#connect-to-v2-mon-port","text":"If encryption or compression on the wire is needed, specify the --v2-port-enable flag. If the v2 address type is present in the ceph quorum_status , then the output of 'ceph mon data' i.e, ROOK_EXTERNAL_CEPH_MON_DATA will use the v2 port( 3300 ).","title":"Connect to v2 mon port"},{"location":"CRDs/Cluster/external-cluster/#exporting-rook-to-another-cluster","text":"If you have multiple K8s clusters running, and want to use the local rook-ceph cluster as the central storage, you can export the settings from this cluster with the following steps. 1) Copy create-external-cluster-resources.py into the directory /etc/ceph/ of the toolbox. 1 2 toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}') kubectl -n rook-ceph cp deploy/examples/create-external-cluster-resources.py $toolbox:/etc/ceph 2) Exec to the toolbox pod and execute create-external-cluster-resources.py with needed options to create required users and keys . Important For other clusters to connect to storage in this cluster, Rook must be configured with a networking configuration that is accessible from other clusters. Most commonly this is done by enabling host networking in the CephCluster CR so the Ceph daemons will be addressable by their host IPs.","title":"Exporting Rook to another cluster"},{"location":"CRDs/Cluster/host-cluster/","text":"A host storage cluster is one where Rook configures Ceph to store data directly on the host. The Ceph mons will store the metadata on the host (at a path defined by the dataDirHostPath ), and the OSDs will consume raw devices or partitions. The Ceph persistent data is stored directly on a host path (Ceph Mons) and on raw devices (Ceph OSDs). To get you started, here are several example of the Cluster CR to configure the host. All Devices \u00b6 For the simplest possible configuration, this example shows that all devices or partitions should be consumed by Ceph. The mons will store the metadata on the host node under /var/lib/rook . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : # see the \"Cluster Settings\" section below for more details on which image of ceph to run image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false storage : useAllNodes : true useAllDevices : true Node and Device Filters \u00b6 More commonly, you will want to be more specific about which nodes and devices where Rook should configure the storage. The placement settings are very flexible to add node affinity, anti-affinity, or tolerations. For more options, see the placement documentation . In this example, Rook will only configure Ceph daemons to run on nodes that are labeled with role=rook-node , and more specifically the OSDs will only be created on nodes labeled with role=rook-osd-node . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false dashboard : enabled : true # cluster level storage configuration and selection storage : useAllNodes : true useAllDevices : true # Only create OSDs on devices that match the regular expression filter, \"sdb\" in this example deviceFilter : sdb # To control where various services will be scheduled by kubernetes, use the placement configuration sections below. # The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=rook-node' and # the OSDs would specifically only be created on nodes labeled with roke=rook-osd-node. placement : all : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - rook-node osd : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - rook-osd-node Specific Nodes and Devices \u00b6 If you need fine-grained control for every node and every device that is being configured, individual nodes and their config can be specified. In this example, we see that specific node names and devices can be specified. Hint Each node's 'name' field should match their 'kubernetes.io/hostname' label. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false dashboard : enabled : true # cluster level storage configuration and selection storage : useAllNodes : false useAllDevices : false deviceFilter : config : metadataDevice : databaseSizeMB : \"1024\" # this value can be removed for environments with normal sized disks (100 GB or larger) nodes : - name : \"172.17.4.201\" devices : # specific devices to use for storage can be specified for each node - name : \"sdb\" # Whole storage device - name : \"sdc1\" # One specific partition. Should not have a file system on it. - name : \"/dev/disk/by-id/ata-ST4000DM004-XXXX\" # both device name and explicit udev links are supported config : # configuration can be specified at the node level which overrides the cluster level config - name : \"172.17.4.301\" deviceFilter : \"^sd.\"","title":"Host Storage Cluster"},{"location":"CRDs/Cluster/host-cluster/#all-devices","text":"For the simplest possible configuration, this example shows that all devices or partitions should be consumed by Ceph. The mons will store the metadata on the host node under /var/lib/rook . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : # see the \"Cluster Settings\" section below for more details on which image of ceph to run image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false storage : useAllNodes : true useAllDevices : true","title":"All Devices"},{"location":"CRDs/Cluster/host-cluster/#node-and-device-filters","text":"More commonly, you will want to be more specific about which nodes and devices where Rook should configure the storage. The placement settings are very flexible to add node affinity, anti-affinity, or tolerations. For more options, see the placement documentation . In this example, Rook will only configure Ceph daemons to run on nodes that are labeled with role=rook-node , and more specifically the OSDs will only be created on nodes labeled with role=rook-osd-node . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false dashboard : enabled : true # cluster level storage configuration and selection storage : useAllNodes : true useAllDevices : true # Only create OSDs on devices that match the regular expression filter, \"sdb\" in this example deviceFilter : sdb # To control where various services will be scheduled by kubernetes, use the placement configuration sections below. # The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=rook-node' and # the OSDs would specifically only be created on nodes labeled with roke=rook-osd-node. placement : all : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - rook-node osd : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - rook-osd-node","title":"Node and Device Filters"},{"location":"CRDs/Cluster/host-cluster/#specific-nodes-and-devices","text":"If you need fine-grained control for every node and every device that is being configured, individual nodes and their config can be specified. In this example, we see that specific node names and devices can be specified. Hint Each node's 'name' field should match their 'kubernetes.io/hostname' label. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false dashboard : enabled : true # cluster level storage configuration and selection storage : useAllNodes : false useAllDevices : false deviceFilter : config : metadataDevice : databaseSizeMB : \"1024\" # this value can be removed for environments with normal sized disks (100 GB or larger) nodes : - name : \"172.17.4.201\" devices : # specific devices to use for storage can be specified for each node - name : \"sdb\" # Whole storage device - name : \"sdc1\" # One specific partition. Should not have a file system on it. - name : \"/dev/disk/by-id/ata-ST4000DM004-XXXX\" # both device name and explicit udev links are supported config : # configuration can be specified at the node level which overrides the cluster level config - name : \"172.17.4.301\" deviceFilter : \"^sd.\"","title":"Specific Nodes and Devices"},{"location":"CRDs/Cluster/pvc-cluster/","text":"In a \"PVC-based cluster\", the Ceph persistent data is stored on volumes requested from a storage class of your choice. This type of cluster is recommended in a cloud environment where volumes can be dynamically created and also in clusters where a local PV provisioner is available. AWS Storage Example \u00b6 In this example, the mon and OSD volumes are provisioned from the AWS gp2 storage class. This storage class can be replaced by any storage class that provides file mode (for mons) and block mode (for OSDs). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : gp2 resources : requests : storage : 10Gi storage : storageClassDeviceSets : - name : set1 count : 3 portable : false encrypted : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce onlyApplyOSDPlacement : false Local Storage Example \u00b6 In the CRD specification below, 3 OSDs (having specific placement and resource values) and 3 mons with each using a 10Gi PVC, are created by Rook using the local-storage storage class. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : local-storage resources : requests : storage : 10Gi cephVersion : image : quay.io/ceph/ceph:v17.2.6 allowUnsupported : false dashboard : enabled : true network : hostNetwork : false storage : storageClassDeviceSets : - name : set1 count : 3 portable : false resources : limits : cpu : \"500m\" memory : \"4Gi\" requests : cpu : \"500m\" memory : \"4Gi\" placement : podAntiAffinity : preferredDuringSchedulingIgnoredDuringExecution : - weight : 100 podAffinityTerm : labelSelector : matchExpressions : - key : \"rook.io/cluster\" operator : In values : - cluster1 topologyKey : \"topology.kubernetes.io/zone\" volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi storageClassName : local-storage volumeMode : Block accessModes : - ReadWriteOnce PVC storage only for monitors \u00b6 In the CRD specification below three monitors are created each using a 10Gi PVC created by Rook using the local-storage storage class. Even while the mons consume PVCs, the OSDs in this example will still consume raw devices on the host. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : local-storage resources : requests : storage : 10Gi dashboard : enabled : true storage : useAllNodes : true useAllDevices : true Dedicated metadata and wal device for OSD on PVC \u00b6 In the simplest case, Ceph OSD BlueStore consumes a single (primary) storage device. BlueStore is the engine used by the OSD to store data. The storage device is normally used as a whole, occupying the full device that is managed directly by BlueStore. It is also possible to deploy BlueStore across additional devices such as a DB device. This device can be used for storing BlueStore\u2019s internal metadata. BlueStore (or rather, the embedded RocksDB) will put as much metadata as it can on the DB device to improve performance. If the DB device fills up, metadata will spill back onto the primary device (where it would have been otherwise). Again, it is only helpful to provision a DB device if it is faster than the primary device. You can have multiple volumeClaimTemplates where each might either represent a device or a metadata device. An example of the storage section when specifying the metadata device is: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce Note Note that Rook only supports three naming convention for a given template: \"data\": represents the main OSD block device, where your data is being stored. \"metadata\": represents the metadata (including block.db and block.wal) device used to store the Ceph Bluestore database for an OSD. \"wal\": represents the block.wal device used to store the Ceph Bluestore database for an OSD. If this device is set, \"metadata\" device will refer specifically to block.db device. It is recommended to use a faster storage class for the metadata or wal device, with a slower device for the data. Otherwise, having a separate metadata device will not improve the performance. The bluestore partition has the following reference combinations supported by the ceph-volume utility: A single \"data\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device and a \"metadata\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device and a \"wal\" device. A WAL device can be used for BlueStore\u2019s internal journal or write-ahead log (block.wal), it is only useful to use a WAL device if the device is faster than the primary device (data device). There is no separate \"metadata\" device in this case, the data of main OSD block and block.db located in \"data\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : wal spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device, a \"metadata\" device and a \"wal\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : wal spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce To determine the size of the metadata block follow the official Ceph sizing guide . With the present configuration, each OSD will have its main block allocated a 10GB device as well a 5GB device to act as a bluestore database.","title":"PVC Storage Cluster"},{"location":"CRDs/Cluster/pvc-cluster/#aws-storage-example","text":"In this example, the mon and OSD volumes are provisioned from the AWS gp2 storage class. This storage class can be replaced by any storage class that provides file mode (for mons) and block mode (for OSDs). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : gp2 resources : requests : storage : 10Gi storage : storageClassDeviceSets : - name : set1 count : 3 portable : false encrypted : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce onlyApplyOSDPlacement : false","title":"AWS Storage Example"},{"location":"CRDs/Cluster/pvc-cluster/#local-storage-example","text":"In the CRD specification below, 3 OSDs (having specific placement and resource values) and 3 mons with each using a 10Gi PVC, are created by Rook using the local-storage storage class. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : local-storage resources : requests : storage : 10Gi cephVersion : image : quay.io/ceph/ceph:v17.2.6 allowUnsupported : false dashboard : enabled : true network : hostNetwork : false storage : storageClassDeviceSets : - name : set1 count : 3 portable : false resources : limits : cpu : \"500m\" memory : \"4Gi\" requests : cpu : \"500m\" memory : \"4Gi\" placement : podAntiAffinity : preferredDuringSchedulingIgnoredDuringExecution : - weight : 100 podAffinityTerm : labelSelector : matchExpressions : - key : \"rook.io/cluster\" operator : In values : - cluster1 topologyKey : \"topology.kubernetes.io/zone\" volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi storageClassName : local-storage volumeMode : Block accessModes : - ReadWriteOnce","title":"Local Storage Example"},{"location":"CRDs/Cluster/pvc-cluster/#pvc-storage-only-for-monitors","text":"In the CRD specification below three monitors are created each using a 10Gi PVC created by Rook using the local-storage storage class. Even while the mons consume PVCs, the OSDs in this example will still consume raw devices on the host. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : local-storage resources : requests : storage : 10Gi dashboard : enabled : true storage : useAllNodes : true useAllDevices : true","title":"PVC storage only for monitors"},{"location":"CRDs/Cluster/pvc-cluster/#dedicated-metadata-and-wal-device-for-osd-on-pvc","text":"In the simplest case, Ceph OSD BlueStore consumes a single (primary) storage device. BlueStore is the engine used by the OSD to store data. The storage device is normally used as a whole, occupying the full device that is managed directly by BlueStore. It is also possible to deploy BlueStore across additional devices such as a DB device. This device can be used for storing BlueStore\u2019s internal metadata. BlueStore (or rather, the embedded RocksDB) will put as much metadata as it can on the DB device to improve performance. If the DB device fills up, metadata will spill back onto the primary device (where it would have been otherwise). Again, it is only helpful to provision a DB device if it is faster than the primary device. You can have multiple volumeClaimTemplates where each might either represent a device or a metadata device. An example of the storage section when specifying the metadata device is: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce Note Note that Rook only supports three naming convention for a given template: \"data\": represents the main OSD block device, where your data is being stored. \"metadata\": represents the metadata (including block.db and block.wal) device used to store the Ceph Bluestore database for an OSD. \"wal\": represents the block.wal device used to store the Ceph Bluestore database for an OSD. If this device is set, \"metadata\" device will refer specifically to block.db device. It is recommended to use a faster storage class for the metadata or wal device, with a slower device for the data. Otherwise, having a separate metadata device will not improve the performance. The bluestore partition has the following reference combinations supported by the ceph-volume utility: A single \"data\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device and a \"metadata\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device and a \"wal\" device. A WAL device can be used for BlueStore\u2019s internal journal or write-ahead log (block.wal), it is only useful to use a WAL device if the device is faster than the primary device (data device). There is no separate \"metadata\" device in this case, the data of main OSD block and block.db located in \"data\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : wal spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device, a \"metadata\" device and a \"wal\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : wal spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce To determine the size of the metadata block follow the official Ceph sizing guide . With the present configuration, each OSD will have its main block allocated a 10GB device as well a 5GB device to act as a bluestore database.","title":"Dedicated metadata and wal device for OSD on PVC"},{"location":"CRDs/Cluster/stretch-cluster/","text":"For environments that only have two failure domains available where data can be replicated, consider the case where one failure domain is down and the data is still fully available in the remaining failure domain. To support this scenario, Ceph has integrated support for \"stretch\" clusters. Rook requires three zones. Two zones (A and B) will each run all types of Rook pods, which we call the \"data\" zones. Two mons run in each of the two data zones, while two replicas of the data are in each zone for a total of four data replicas. The third zone (arbiter) runs a single mon. No other Rook or Ceph daemons need to be run in the arbiter zone. For this example, we assume the desired failure domain is a zone. Another failure domain can also be specified with a known topology node label which is already being used for OSD failure domains. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : dataDirHostPath : /var/lib/rook mon : # Five mons must be created for stretch mode count : 5 allowMultiplePerNode : false stretchCluster : failureDomainLabel : topology.kubernetes.io/zone subFailureDomain : host zones : - name : a arbiter : true - name : b - name : c cephVersion : # Stretch cluster is supported in Ceph Pacific or newer. image : quay.io/ceph/ceph:v17.2.6 allowUnsupported : true # Either storageClassDeviceSets or the storage section can be specified for creating OSDs. # This example uses all devices for simplicity. storage : useAllNodes : true useAllDevices : true deviceFilter : \"\" # OSD placement is expected to include the non-arbiter zones placement : osd : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : topology.kubernetes.io/zone operator : In values : - b - c For more details, see the Stretch Cluster design doc .","title":"Stretch Storage Cluster"},{"location":"CRDs/Object-Storage/ceph-object-realm-crd/","text":"Rook allows creation of a realm in a Ceph Object Multisite configuration through a CRD. The following settings are available for Ceph object store realms. Example \u00b6 1 2 3 4 5 6 7 8 9 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : realm-a namespace : rook-ceph # This endpoint in this section needs is an endpoint from the master zone in the master zone group of realm-a. See object-multisite.md for more details. spec : pull : endpoint : http://10.2.105.133:80 Settings \u00b6 Metadata \u00b6 name : The name of the object realm to create namespace : The namespace of the Rook cluster where the object realm is created. Spec \u00b6 pull : This optional section is for the pulling the realm for another ceph cluster. endpoint : The endpoint in the realm from another ceph cluster you want to pull from. This endpoint must be in the master zone of the master zone group of the realm.","title":"CephObjectRealm CRD"},{"location":"CRDs/Object-Storage/ceph-object-realm-crd/#example","text":"1 2 3 4 5 6 7 8 9 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : realm-a namespace : rook-ceph # This endpoint in this section needs is an endpoint from the master zone in the master zone group of realm-a. See object-multisite.md for more details. spec : pull : endpoint : http://10.2.105.133:80","title":"Example"},{"location":"CRDs/Object-Storage/ceph-object-realm-crd/#settings","text":"","title":"Settings"},{"location":"CRDs/Object-Storage/ceph-object-realm-crd/#metadata","text":"name : The name of the object realm to create namespace : The namespace of the Rook cluster where the object realm is created.","title":"Metadata"},{"location":"CRDs/Object-Storage/ceph-object-realm-crd/#spec","text":"pull : This optional section is for the pulling the realm for another ceph cluster. endpoint : The endpoint in the realm from another ceph cluster you want to pull from. This endpoint must be in the master zone of the master zone group of the realm.","title":"Spec"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/","text":"Rook allows creation and customization of object stores through the custom resource definitions (CRDs). The following settings are available for Ceph object stores. Example \u00b6 Erasure Coded \u00b6 Erasure coded pools can only be used with dataPools . The metadataPool must use a replicated pool. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : host erasureCoded : dataChunks : 2 codingChunks : 1 preservePoolsOnDelete : true gateway : # sslCertificateRef: # caBundleRef: port : 80 # securePort: 443 instances : 1 # A key/value list of annotations annotations : # key: value placement : # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: # nodeSelectorTerms: # - matchExpressions: # - key: role # operator: In # values: # - rgw-node # tolerations: # - key: rgw-node # operator: Exists # podAffinity: # podAntiAffinity: # topologySpreadConstraints: resources : # limits: # cpu: \"500m\" # memory: \"1024Mi\" # requests: # cpu: \"500m\" # memory: \"1024Mi\" #zone: #name: zone-a Object Store Settings \u00b6 Metadata \u00b6 name : The name of the object store to create, which will be reflected in the pool and other resource names. namespace : The namespace of the Rook cluster where the object store is created. Pools \u00b6 The pools allow all of the settings defined in the Block Pool CRD spec. For more details, see the Block Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least three devices (2 data + 1 coding chunks) in the cluster. When the zone section is set pools with the object stores name will not be created since the object-store will the using the pools created by the ceph-object-zone. metadataPool : The settings used to create all of the object store metadata pools. Must use replication. dataPool : The settings to create the object store data pool. Can use replication or erasure coding. preservePoolsOnDelete : If it is set to 'true' the pools used to support the object store will remain when the object store will be deleted. This is a security measure to avoid accidental loss of data. It is set to 'false' by default. If not specified is also deemed as 'false'. Gateway Settings \u00b6 The gateway settings correspond to the RGW daemon settings. type : S3 is supported sslCertificateRef : If specified, this is the name of the Kubernetes secret( opaque or tls type) that contains the TLS certificate to be used for secure connections to the object store. If it is an opaque Kubernetes Secret, Rook will look in the secret provided at the cert key name. The value of the cert key must be in the format expected by the RGW service : \"The server key, server certificate, and any other CA or intermediate certificates be supplied in one file. Each of these items must be in PEM form.\" They are scenarios where the certificate DNS is set for a particular domain that does not include the local Kubernetes DNS, namely the object store DNS service endpoint. If adding the service DNS name to the certificate is not empty another key can be specified in the secret's data: insecureSkipVerify: true to skip the certificate verification. It is not recommended to enable this option since TLS is susceptible to machine-in-the-middle attacks unless custom verification is used. caBundleRef : If specified, this is the name of the Kubernetes secret (type opaque ) that contains additional custom ca-bundle to use. The secret must be in the same namespace as the Rook cluster. Rook will look in the secret provided at the cabundle key name. hostNetwork : Whether host networking is enabled for the rgw daemon. If not set, the network settings from the cluster CR will be applied. port : The port on which the Object service will be reachable. If host networking is enabled, the RGW daemons will also listen on that port. If running on SDN, the RGW daemon listening port will be 8080 internally. securePort : The secure port on which RGW pods will be listening. A TLS certificate must be specified either via sslCerticateRef or service.annotations instances : The number of pods that will be started to load balance this object store. externalRgwEndpoints : A list of IP addresses to connect to external existing Rados Gateways (works with external mode). This setting will be ignored if the CephCluster does not have external spec enabled. Refer to the external cluster section for more details. Multiple endpoints can be given, but for stability of ObjectBucketClaims, we highly recommend that users give only a single external RGW endpoint that is a load balancer that sends requests to the multiple RGWs. annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. placement : The Kubernetes placement settings to determine where the RGW pods should be started in the cluster. resources : Set resource requests/limits for the Gateway Pod(s), see Resource Requirements/Limits . priorityClassName : Set priority class name for the Gateway Pod(s) service : The annotations to set on to the Kubernetes Service of RGW. The service serving cert feature supported in Openshift is enabled by the following example: 1 2 3 4 gateway : service : annotations : service.beta.openshift.io/serving-cert-secret-name :  Example of external rgw endpoints to connect to: 1 2 3 4 5 gateway : port : 80 externalRgwEndpoints : - ip : 192.168.39.182 # hostname: example.com This will create a service with the endpoint 192.168.39.182 on port 80 , pointing to the Ceph object external gateway. All the other settings from the gateway section will be ignored, except for securePort . Zone Settings \u00b6 The zone settings allow the object store to join custom created ceph-object-zone . name : the name of the ceph-object-zone the object store will be in. Runtime settings \u00b6 MIME types \u00b6 Rook provides a default mime.types file for each Ceph object store. This file is stored in a Kubernetes ConfigMap with the name rook-ceph-rgw--mime-types . For most users, the default file should suffice, however, the option is available to users to edit the mime.types file in the ConfigMap as they desire. Users may have their own special file types, and particularly security conscious users may wish to pare down the file to reduce the possibility of a file type execution attack. Rook will not overwrite an existing mime.types ConfigMap so that user modifications will not be destroyed. If the object store is destroyed and recreated, the ConfigMap will also be destroyed and created anew. Health settings \u00b6 Rook will be default monitor the state of the object store endpoints. The following CRD settings are available: healthCheck : main object store health monitoring section startupProbe : Disable, or override timing and threshold values of the object gateway startup probe. readinessProbe : Disable, or override timing and threshold values of the object gateway readiness probe. Here is a complete example: 1 2 3 4 5 6 7 healthCheck : startupProbe : disabled : false readinessProbe : disabled : false periodSeconds : 5 failureThreshold : 2 You can monitor the health of a CephObjectStore by monitoring the gateway deployments it creates. The primary deployment created is named rook-ceph-rgw--a where store-name is the name of the CephObjectStore (don't forget the -a at the end). Security settings \u00b6 Ceph RGW supports Server Side Encryption as defined in AWS S3 protocol with three different modes: AWS-SSE:C, AWS-SSE:KMS and AWS-SSE:S3. The last two modes require a Key Management System (KMS) like HashiCorp Vault. Currently, Vault is the only supported KMS backend for CephObjectStore. Refer to the Vault KMS section for details about Vault. If these settings are defined, then RGW will establish a connection between Vault and whenever S3 client sends request with Server Side Encryption. Ceph's Vault documentation has more details. The security section contains settings related to KMS encryption of the RGW. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 security : kms : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : http://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rgw VAULT_SECRET_ENGINE : kv VAULT_BACKEND : v2 # name of the k8s secret containing the kms authentication token tokenSecretName : rgw-vault-kms-token s3 : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : http://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rgw VAULT_SECRET_ENGINE : transit # name of the k8s secret containing the kms authentication token tokenSecretName : rgw-vault-s3-token For RGW, please note the following: VAULT_SECRET_ENGINE : the secret engine which Vault should use. Currently supports kv and transit . AWS-SSE:KMS supports transit engine and kv engine version 2. AWS-SSE:S3 only supports transit engine. The Storage administrator needs to create a secret in the Vault server so that S3 clients use that key for encryption for AWS-SSE:KMS 1 2 vault kv put rook/ key=$(openssl rand -base64 32) # kv engine vault write -f transit/keys/ exportable=true # transit engine TLS authentication with custom certificates between Vault and CephObjectStore RGWs are supported from ceph v16.2.6 onwards tokenSecretName can be (and often will be) the same for both kms and s3 configurations. AWS-SSE:S3 requires Ceph Quincy (v17.2.3) and later. Deleting a CephObjectStore \u00b6 During deletion of a CephObjectStore resource, Rook protects against accidental or premature destruction of user data by blocking deletion if there are any object buckets in the object store being deleted. Buckets may have been created by users or by ObjectBucketClaims. For deletion to be successful, all buckets in the object store must be removed. This may require manual deletion or removal of all ObjectBucketClaims. Alternately, the cephobjectstore.ceph.rook.io finalizer on the CephObjectStore can be removed to remove the Kubernetes Custom Resource, but the Ceph pools which store the data will not be removed in this case. Rook will warn about which buckets are blocking deletion in three ways: An event will be registered on the CephObjectStore resource A status condition will be added to the CephObjectStore resource An error will be added to the Rook Ceph Operator log If the CephObjectStore is configured in a multisite setup the above conditions are applicable only to stores that belong to a single master zone. Otherwise the conditions are ignored. Even if the store is removed the user can access the data from a peer object store.","title":"CephObjectStore CRD"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#example","text":"","title":"Example"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#erasure-coded","text":"Erasure coded pools can only be used with dataPools . The metadataPool must use a replicated pool. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : host erasureCoded : dataChunks : 2 codingChunks : 1 preservePoolsOnDelete : true gateway : # sslCertificateRef: # caBundleRef: port : 80 # securePort: 443 instances : 1 # A key/value list of annotations annotations : # key: value placement : # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: # nodeSelectorTerms: # - matchExpressions: # - key: role # operator: In # values: # - rgw-node # tolerations: # - key: rgw-node # operator: Exists # podAffinity: # podAntiAffinity: # topologySpreadConstraints: resources : # limits: # cpu: \"500m\" # memory: \"1024Mi\" # requests: # cpu: \"500m\" # memory: \"1024Mi\" #zone: #name: zone-a","title":"Erasure Coded"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#object-store-settings","text":"","title":"Object Store Settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#metadata","text":"name : The name of the object store to create, which will be reflected in the pool and other resource names. namespace : The namespace of the Rook cluster where the object store is created.","title":"Metadata"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#pools","text":"The pools allow all of the settings defined in the Block Pool CRD spec. For more details, see the Block Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least three devices (2 data + 1 coding chunks) in the cluster. When the zone section is set pools with the object stores name will not be created since the object-store will the using the pools created by the ceph-object-zone. metadataPool : The settings used to create all of the object store metadata pools. Must use replication. dataPool : The settings to create the object store data pool. Can use replication or erasure coding. preservePoolsOnDelete : If it is set to 'true' the pools used to support the object store will remain when the object store will be deleted. This is a security measure to avoid accidental loss of data. It is set to 'false' by default. If not specified is also deemed as 'false'.","title":"Pools"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#gateway-settings","text":"The gateway settings correspond to the RGW daemon settings. type : S3 is supported sslCertificateRef : If specified, this is the name of the Kubernetes secret( opaque or tls type) that contains the TLS certificate to be used for secure connections to the object store. If it is an opaque Kubernetes Secret, Rook will look in the secret provided at the cert key name. The value of the cert key must be in the format expected by the RGW service : \"The server key, server certificate, and any other CA or intermediate certificates be supplied in one file. Each of these items must be in PEM form.\" They are scenarios where the certificate DNS is set for a particular domain that does not include the local Kubernetes DNS, namely the object store DNS service endpoint. If adding the service DNS name to the certificate is not empty another key can be specified in the secret's data: insecureSkipVerify: true to skip the certificate verification. It is not recommended to enable this option since TLS is susceptible to machine-in-the-middle attacks unless custom verification is used. caBundleRef : If specified, this is the name of the Kubernetes secret (type opaque ) that contains additional custom ca-bundle to use. The secret must be in the same namespace as the Rook cluster. Rook will look in the secret provided at the cabundle key name. hostNetwork : Whether host networking is enabled for the rgw daemon. If not set, the network settings from the cluster CR will be applied. port : The port on which the Object service will be reachable. If host networking is enabled, the RGW daemons will also listen on that port. If running on SDN, the RGW daemon listening port will be 8080 internally. securePort : The secure port on which RGW pods will be listening. A TLS certificate must be specified either via sslCerticateRef or service.annotations instances : The number of pods that will be started to load balance this object store. externalRgwEndpoints : A list of IP addresses to connect to external existing Rados Gateways (works with external mode). This setting will be ignored if the CephCluster does not have external spec enabled. Refer to the external cluster section for more details. Multiple endpoints can be given, but for stability of ObjectBucketClaims, we highly recommend that users give only a single external RGW endpoint that is a load balancer that sends requests to the multiple RGWs. annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. placement : The Kubernetes placement settings to determine where the RGW pods should be started in the cluster. resources : Set resource requests/limits for the Gateway Pod(s), see Resource Requirements/Limits . priorityClassName : Set priority class name for the Gateway Pod(s) service : The annotations to set on to the Kubernetes Service of RGW. The service serving cert feature supported in Openshift is enabled by the following example: 1 2 3 4 gateway : service : annotations : service.beta.openshift.io/serving-cert-secret-name :  Example of external rgw endpoints to connect to: 1 2 3 4 5 gateway : port : 80 externalRgwEndpoints : - ip : 192.168.39.182 # hostname: example.com This will create a service with the endpoint 192.168.39.182 on port 80 , pointing to the Ceph object external gateway. All the other settings from the gateway section will be ignored, except for securePort .","title":"Gateway Settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#zone-settings","text":"The zone settings allow the object store to join custom created ceph-object-zone . name : the name of the ceph-object-zone the object store will be in.","title":"Zone Settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#runtime-settings","text":"","title":"Runtime settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#mime-types","text":"Rook provides a default mime.types file for each Ceph object store. This file is stored in a Kubernetes ConfigMap with the name rook-ceph-rgw--mime-types . For most users, the default file should suffice, however, the option is available to users to edit the mime.types file in the ConfigMap as they desire. Users may have their own special file types, and particularly security conscious users may wish to pare down the file to reduce the possibility of a file type execution attack. Rook will not overwrite an existing mime.types ConfigMap so that user modifications will not be destroyed. If the object store is destroyed and recreated, the ConfigMap will also be destroyed and created anew.","title":"MIME types"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#health-settings","text":"Rook will be default monitor the state of the object store endpoints. The following CRD settings are available: healthCheck : main object store health monitoring section startupProbe : Disable, or override timing and threshold values of the object gateway startup probe. readinessProbe : Disable, or override timing and threshold values of the object gateway readiness probe. Here is a complete example: 1 2 3 4 5 6 7 healthCheck : startupProbe : disabled : false readinessProbe : disabled : false periodSeconds : 5 failureThreshold : 2 You can monitor the health of a CephObjectStore by monitoring the gateway deployments it creates. The primary deployment created is named rook-ceph-rgw--a where store-name is the name of the CephObjectStore (don't forget the -a at the end).","title":"Health settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#security-settings","text":"Ceph RGW supports Server Side Encryption as defined in AWS S3 protocol with three different modes: AWS-SSE:C, AWS-SSE:KMS and AWS-SSE:S3. The last two modes require a Key Management System (KMS) like HashiCorp Vault. Currently, Vault is the only supported KMS backend for CephObjectStore. Refer to the Vault KMS section for details about Vault. If these settings are defined, then RGW will establish a connection between Vault and whenever S3 client sends request with Server Side Encryption. Ceph's Vault documentation has more details. The security section contains settings related to KMS encryption of the RGW. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 security : kms : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : http://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rgw VAULT_SECRET_ENGINE : kv VAULT_BACKEND : v2 # name of the k8s secret containing the kms authentication token tokenSecretName : rgw-vault-kms-token s3 : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : http://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rgw VAULT_SECRET_ENGINE : transit # name of the k8s secret containing the kms authentication token tokenSecretName : rgw-vault-s3-token For RGW, please note the following: VAULT_SECRET_ENGINE : the secret engine which Vault should use. Currently supports kv and transit . AWS-SSE:KMS supports transit engine and kv engine version 2. AWS-SSE:S3 only supports transit engine. The Storage administrator needs to create a secret in the Vault server so that S3 clients use that key for encryption for AWS-SSE:KMS 1 2 vault kv put rook/ key=$(openssl rand -base64 32) # kv engine vault write -f transit/keys/ exportable=true # transit engine TLS authentication with custom certificates between Vault and CephObjectStore RGWs are supported from ceph v16.2.6 onwards tokenSecretName can be (and often will be) the same for both kms and s3 configurations. AWS-SSE:S3 requires Ceph Quincy (v17.2.3) and later.","title":"Security settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#deleting-a-cephobjectstore","text":"During deletion of a CephObjectStore resource, Rook protects against accidental or premature destruction of user data by blocking deletion if there are any object buckets in the object store being deleted. Buckets may have been created by users or by ObjectBucketClaims. For deletion to be successful, all buckets in the object store must be removed. This may require manual deletion or removal of all ObjectBucketClaims. Alternately, the cephobjectstore.ceph.rook.io finalizer on the CephObjectStore can be removed to remove the Kubernetes Custom Resource, but the Ceph pools which store the data will not be removed in this case. Rook will warn about which buckets are blocking deletion in three ways: An event will be registered on the CephObjectStore resource A status condition will be added to the CephObjectStore resource An error will be added to the Rook Ceph Operator log If the CephObjectStore is configured in a multisite setup the above conditions are applicable only to stores that belong to a single master zone. Otherwise the conditions are ignored. Even if the store is removed the user can access the data from a peer object store.","title":"Deleting a CephObjectStore"},{"location":"CRDs/Object-Storage/ceph-object-store-user-crd/","text":"Rook allows creation and customization of object store users through the custom resource definitions (CRDs). The following settings are available for Ceph object store users. Example \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : my-user namespace : rook-ceph spec : store : my-store displayName : my-display-name quotas : maxBuckets : 100 maxSize : 10G maxObjects : 10000 capabilities : user : \"*\" bucket : \"*\" Object Store User Settings \u00b6 Metadata \u00b6 name : The name of the object store user to create, which will be reflected in the secret and other resource names. namespace : The namespace of the Rook cluster where the object store user is created. Spec \u00b6 store : The object store in which the user will be created. This matches the name of the objectstore CRD. displayName : The display name which will be passed to the radosgw-admin user create command. quotas : This represents quota limitation can be set on the user. Please refer here for details. maxBuckets : The maximum bucket limit for the user. maxSize : Maximum size limit of all objects across all the user's buckets. maxObjects : Maximum number of objects across all the user's buckets. capabilities : Ceph allows users to be given additional permissions. Due to missing APIs in go-ceph for updating the user capabilities, this setting can currently only be used during the creation of the object store user. If a user's capabilities need modified, the user must be deleted and re-created. See the Ceph docs for more info. Rook supports adding read , write , read, write , or * permissions for the following resources: users buckets usage metadata zone roles info amz-cache bilog mdlog datalog user-policy odic-provider ratelimit","title":"CephObjectStoreUser CRD"},{"location":"CRDs/Object-Storage/ceph-object-store-user-crd/#example","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : my-user namespace : rook-ceph spec : store : my-store displayName : my-display-name quotas : maxBuckets : 100 maxSize : 10G maxObjects : 10000 capabilities : user : \"*\" bucket : \"*\"","title":"Example"},{"location":"CRDs/Object-Storage/ceph-object-store-user-crd/#object-store-user-settings","text":"","title":"Object Store User Settings"},{"location":"CRDs/Object-Storage/ceph-object-store-user-crd/#metadata","text":"name : The name of the object store user to create, which will be reflected in the secret and other resource names. namespace : The namespace of the Rook cluster where the object store user is created.","title":"Metadata"},{"location":"CRDs/Object-Storage/ceph-object-store-user-crd/#spec","text":"store : The object store in which the user will be created. This matches the name of the objectstore CRD. displayName : The display name which will be passed to the radosgw-admin user create command. quotas : This represents quota limitation can be set on the user. Please refer here for details. maxBuckets : The maximum bucket limit for the user. maxSize : Maximum size limit of all objects across all the user's buckets. maxObjects : Maximum number of objects across all the user's buckets. capabilities : Ceph allows users to be given additional permissions. Due to missing APIs in go-ceph for updating the user capabilities, this setting can currently only be used during the creation of the object store user. If a user's capabilities need modified, the user must be deleted and re-created. See the Ceph docs for more info. Rook supports adding read , write , read, write , or * permissions for the following resources: users buckets usage metadata zone roles info amz-cache bilog mdlog datalog user-policy odic-provider ratelimit","title":"Spec"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/","text":"Rook allows creation of zones in a ceph cluster for a Ceph Object Multisite configuration through a CRD. The following settings are available for Ceph object store zones. Example \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : ceph.rook.io/v1 kind : CephObjectZone metadata : name : zone-a namespace : rook-ceph spec : zoneGroup : zonegroup-a metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : osd erasureCoded : dataChunks : 2 codingChunks : 1 customEndpoints : - \"http://rgw-a.fqdn\" preservePoolsOnDelete : true Settings \u00b6 Metadata \u00b6 name : The name of the object zone to create namespace : The namespace of the Rook cluster where the object zone is created. Pools \u00b6 The pools allow all of the settings defined in the Pool CRD spec. For more details, see the Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least three devices (2 data + 1 coding chunks) in the cluster. Spec \u00b6 zonegroup : The object zonegroup in which the zone will be created. This matches the name of the object zone group CRD. metadataPool : The settings used to create all of the object store metadata pools. Must use replication. dataPool : The settings to create the object store data pool. Can use replication or erasure coding. customEndpoints : Specify the endpoint(s) that will accept multisite replication traffic for this zone. You may include the port in the definition if necessary. For example: \" https://my-object-store.my-domain.net:443 \". By default, Rook will set this to the DNS name of the ClusterIP Service created for the CephObjectStore that corresponds to this zone. Most multisite configurations will not exist within the same Kubernetes cluster, meaning the default value will not be useful. In these cases, you will be required to create your own custom ingress resource for the CephObjectStore in order to make the zone available for replication. You must add the endpoint for your custom ingress resource to this list to allow the store to accept replication traffic. In the case of multiple stores (or multiple endpoints for a single store), you are not required to put all endpoints in this list. Only specify the endpoints that should be used for replication traffic. If you update customEndpoints to return to an empty list, you must the Rook operator to automatically add the CephObjectStore service endpoint to Ceph's internal configuration. preservePoolsOnDelete : If it is set to 'true' the pools used to support the CephObjectZone will remain when it is deleted. This is a security measure to avoid accidental loss of data. It is set to 'true' by default. It is better to check whether data synced with other peer zones before triggering the deletion to avoid accidental loss of data via steps mentioned here When deleting a CephObjectZone, deletion will be blocked until all CephObjectStores belonging to the zone are removed.","title":"CephObjectZone CRD"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/#example","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : ceph.rook.io/v1 kind : CephObjectZone metadata : name : zone-a namespace : rook-ceph spec : zoneGroup : zonegroup-a metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : osd erasureCoded : dataChunks : 2 codingChunks : 1 customEndpoints : - \"http://rgw-a.fqdn\" preservePoolsOnDelete : true","title":"Example"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/#settings","text":"","title":"Settings"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/#metadata","text":"name : The name of the object zone to create namespace : The namespace of the Rook cluster where the object zone is created.","title":"Metadata"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/#pools","text":"The pools allow all of the settings defined in the Pool CRD spec. For more details, see the Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least three devices (2 data + 1 coding chunks) in the cluster.","title":"Pools"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/#spec","text":"zonegroup : The object zonegroup in which the zone will be created. This matches the name of the object zone group CRD. metadataPool : The settings used to create all of the object store metadata pools. Must use replication. dataPool : The settings to create the object store data pool. Can use replication or erasure coding. customEndpoints : Specify the endpoint(s) that will accept multisite replication traffic for this zone. You may include the port in the definition if necessary. For example: \" https://my-object-store.my-domain.net:443 \". By default, Rook will set this to the DNS name of the ClusterIP Service created for the CephObjectStore that corresponds to this zone. Most multisite configurations will not exist within the same Kubernetes cluster, meaning the default value will not be useful. In these cases, you will be required to create your own custom ingress resource for the CephObjectStore in order to make the zone available for replication. You must add the endpoint for your custom ingress resource to this list to allow the store to accept replication traffic. In the case of multiple stores (or multiple endpoints for a single store), you are not required to put all endpoints in this list. Only specify the endpoints that should be used for replication traffic. If you update customEndpoints to return to an empty list, you must the Rook operator to automatically add the CephObjectStore service endpoint to Ceph's internal configuration. preservePoolsOnDelete : If it is set to 'true' the pools used to support the CephObjectZone will remain when it is deleted. This is a security measure to avoid accidental loss of data. It is set to 'true' by default. It is better to check whether data synced with other peer zones before triggering the deletion to avoid accidental loss of data via steps mentioned here When deleting a CephObjectZone, deletion will be blocked until all CephObjectStores belonging to the zone are removed.","title":"Spec"},{"location":"CRDs/Object-Storage/ceph-object-zonegroup-crd/","text":"Rook allows creation of zone groups in a Ceph Object Multisite configuration through a CRD. The following settings are available for Ceph object store zone groups. Example \u00b6 1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephObjectZoneGroup metadata : name : zonegroup-a namespace : rook-ceph spec : realm : realm-a Settings \u00b6 Metadata \u00b6 name : The name of the object zone group to create namespace : The namespace of the Rook cluster where the object zone group is created. Spec \u00b6 realm : The object realm in which the zone group will be created. This matches the name of the object realm CRD.","title":"CephObjectZoneGroup CRD"},{"location":"CRDs/Object-Storage/ceph-object-zonegroup-crd/#example","text":"1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephObjectZoneGroup metadata : name : zonegroup-a namespace : rook-ceph spec : realm : realm-a","title":"Example"},{"location":"CRDs/Object-Storage/ceph-object-zonegroup-crd/#settings","text":"","title":"Settings"},{"location":"CRDs/Object-Storage/ceph-object-zonegroup-crd/#metadata","text":"name : The name of the object zone group to create namespace : The namespace of the Rook cluster where the object zone group is created.","title":"Metadata"},{"location":"CRDs/Object-Storage/ceph-object-zonegroup-crd/#spec","text":"realm : The object realm in which the zone group will be created. This matches the name of the object realm CRD.","title":"Spec"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/","text":"Rook allows creation and customization of shared filesystems through the custom resource definitions (CRDs). The following settings are available for Ceph filesystems. Examples \u00b6 Replicated \u00b6 Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because both of the defined pools set the failureDomain to host and the replicated.size to 3 . The failureDomain can also be set to another location type (e.g. rack ), if it has been added as a location in the Storage Selection Settings . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPools : - name : replicated failureDomain : host replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true # A key/value list of annotations annotations : # key: value placement : # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: # nodeSelectorTerms: # - matchExpressions: # - key: role # operator: In # values: # - mds-node # tolerations: # - key: mds-node # operator: Exists # podAffinity: # podAntiAffinity: # topologySpreadConstraints: resources : # limits: # cpu: \"500m\" # memory: \"1024Mi\" # requests: # cpu: \"500m\" # memory: \"1024Mi\" (These definitions can also be found in the filesystem.yaml file) Erasure Coded \u00b6 Erasure coded pools require the OSDs to use bluestore for the configured storeType . Additionally, erasure coded pools can only be used with dataPools . The metadataPool must use a replicated pool. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain will be set to host by default, and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs-ec namespace : rook-ceph spec : metadataPool : replicated : size : 3 dataPools : - name : default replicated : size : 3 - name : erasurecoded erasureCoded : dataChunks : 2 codingChunks : 1 metadataServer : activeCount : 1 activeStandby : true IMPORTANT : For erasure coded pools, we have to create a replicated pool as the default data pool and an erasure-coded pool as a secondary pool. (These definitions can also be found in the filesystem-ec.yaml file. Also see an example in the storageclass-ec.yaml for how to configure the volume.) Filesystem Settings \u00b6 Metadata \u00b6 name : The name of the filesystem to create, which will be reflected in the pool and other resource names. namespace : The namespace of the Rook cluster where the filesystem is created. Pools \u00b6 The pools allow all of the settings defined in the Pool CRD spec. For more details, see the Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least eight devices (6 data + 2 coding chunks) in the cluster. metadataPool : The settings used to create the filesystem metadata pool. Must use replication. dataPools : The settings to create the filesystem data pools. Optionally (and we highly recommend), a pool name can be specified with the name field to override the default generated name; see more below. If multiple pools are specified, Rook will add the pools to the filesystem. Assigning users or files to a pool is left as an exercise for the reader with the CephFS documentation . The data pools can use replication or erasure coding. If erasure coding pools are specified, the cluster must be running with bluestore enabled on the OSDs. name : (optional, and highly recommended) Override the default generated name of the pool. The final pool name will consist of the filesystem name and pool name, e.g., - . We highly recommend to specify name to prevent issues that can arise from modifying the spec in a way that causes Rook to lose the original pool ordering. preserveFilesystemOnDelete : If it is set to 'true' the filesystem will remain when the CephFilesystem resource is deleted. This is a security measure to avoid loss of data if the CephFilesystem resource is deleted accidentally. The default value is 'false'. This option replaces preservePoolsOnDelete which should no longer be set. (deprecated) preservePoolsOnDelete : This option is replaced by the above preserveFilesystemOnDelete . For backwards compatibility and upgradeability, if this is set to 'true', Rook will treat preserveFilesystemOnDelete as being set to 'true'. Metadata Server Settings \u00b6 The metadata server settings correspond to the MDS daemon settings. activeCount : The number of active MDS instances. As load increases, CephFS will automatically partition the filesystem across the MDS instances. Rook will create double the number of MDS instances as requested by the active count. The extra instances will be in standby mode for failover. activeStandby : If true, the extra MDS instances will be in active standby mode and will keep a warm cache of the filesystem metadata for faster failover. The instances will be assigned by CephFS in failover pairs. If false, the extra MDS instances will all be on passive standby mode and will not maintain a warm cache of the metadata. mirroring : Sets up mirroring of the filesystem enabled : whether mirroring is enabled on that filesystem (default: false) peers : to configure mirroring peers secretNames : a list of peers to connect to. Currently (Ceph Pacific release) only a single peer is supported where a peer represents a Ceph cluster. snapshotSchedules : schedule(s) snapshot.One or more schedules are supported. path : filesystem source path to take the snapshot on interval : frequency of the snapshots. The interval can be specified in days, hours, or minutes using d, h, m suffix respectively. startTime : optional, determines at what time the snapshot process starts, specified using the ISO 8601 time format. snapshotRetention : allow to manage retention policies: path : filesystem source path to apply the retention on duration : annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. placement : The mds pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD . resources : Set resource requests/limits for the Filesystem MDS Pod(s), see MDS Resources Configuration Settings priorityClassName : Set priority class name for the Filesystem MDS Pod(s) startupProbe : Disable, or override timing and threshold values of the Filesystem MDS startup probe livenessProbe : Disable, or override timing and threshold values of the Filesystem MDS livenessProbe. MDS Resources Configuration Settings \u00b6 The format of the resource requests/limits structure is the same as described in the Ceph Cluster CRD documentation . If the memory resource limit is declared Rook will automatically set the MDS configuration mds_cache_memory_limit . The configuration value is calculated with the aim that the actual MDS memory consumption remains consistent with the MDS pods' resource declaration. In order to provide the best possible experience running Ceph in containers, Rook internally recommends the memory for MDS daemons to be at least 4096MB. If a user configures a limit or request value that is too low, Rook will still run the pod(s) and print a warning to the operator log.","title":"CephFilesystem CRD"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#examples","text":"","title":"Examples"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#replicated","text":"Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because both of the defined pools set the failureDomain to host and the replicated.size to 3 . The failureDomain can also be set to another location type (e.g. rack ), if it has been added as a location in the Storage Selection Settings . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPools : - name : replicated failureDomain : host replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true # A key/value list of annotations annotations : # key: value placement : # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: # nodeSelectorTerms: # - matchExpressions: # - key: role # operator: In # values: # - mds-node # tolerations: # - key: mds-node # operator: Exists # podAffinity: # podAntiAffinity: # topologySpreadConstraints: resources : # limits: # cpu: \"500m\" # memory: \"1024Mi\" # requests: # cpu: \"500m\" # memory: \"1024Mi\" (These definitions can also be found in the filesystem.yaml file)","title":"Replicated"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#erasure-coded","text":"Erasure coded pools require the OSDs to use bluestore for the configured storeType . Additionally, erasure coded pools can only be used with dataPools . The metadataPool must use a replicated pool. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain will be set to host by default, and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs-ec namespace : rook-ceph spec : metadataPool : replicated : size : 3 dataPools : - name : default replicated : size : 3 - name : erasurecoded erasureCoded : dataChunks : 2 codingChunks : 1 metadataServer : activeCount : 1 activeStandby : true IMPORTANT : For erasure coded pools, we have to create a replicated pool as the default data pool and an erasure-coded pool as a secondary pool. (These definitions can also be found in the filesystem-ec.yaml file. Also see an example in the storageclass-ec.yaml for how to configure the volume.)","title":"Erasure Coded"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#filesystem-settings","text":"","title":"Filesystem Settings"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#metadata","text":"name : The name of the filesystem to create, which will be reflected in the pool and other resource names. namespace : The namespace of the Rook cluster where the filesystem is created.","title":"Metadata"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#pools","text":"The pools allow all of the settings defined in the Pool CRD spec. For more details, see the Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least eight devices (6 data + 2 coding chunks) in the cluster. metadataPool : The settings used to create the filesystem metadata pool. Must use replication. dataPools : The settings to create the filesystem data pools. Optionally (and we highly recommend), a pool name can be specified with the name field to override the default generated name; see more below. If multiple pools are specified, Rook will add the pools to the filesystem. Assigning users or files to a pool is left as an exercise for the reader with the CephFS documentation . The data pools can use replication or erasure coding. If erasure coding pools are specified, the cluster must be running with bluestore enabled on the OSDs. name : (optional, and highly recommended) Override the default generated name of the pool. The final pool name will consist of the filesystem name and pool name, e.g., - . We highly recommend to specify name to prevent issues that can arise from modifying the spec in a way that causes Rook to lose the original pool ordering. preserveFilesystemOnDelete : If it is set to 'true' the filesystem will remain when the CephFilesystem resource is deleted. This is a security measure to avoid loss of data if the CephFilesystem resource is deleted accidentally. The default value is 'false'. This option replaces preservePoolsOnDelete which should no longer be set. (deprecated) preservePoolsOnDelete : This option is replaced by the above preserveFilesystemOnDelete . For backwards compatibility and upgradeability, if this is set to 'true', Rook will treat preserveFilesystemOnDelete as being set to 'true'.","title":"Pools"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#metadata-server-settings","text":"The metadata server settings correspond to the MDS daemon settings. activeCount : The number of active MDS instances. As load increases, CephFS will automatically partition the filesystem across the MDS instances. Rook will create double the number of MDS instances as requested by the active count. The extra instances will be in standby mode for failover. activeStandby : If true, the extra MDS instances will be in active standby mode and will keep a warm cache of the filesystem metadata for faster failover. The instances will be assigned by CephFS in failover pairs. If false, the extra MDS instances will all be on passive standby mode and will not maintain a warm cache of the metadata. mirroring : Sets up mirroring of the filesystem enabled : whether mirroring is enabled on that filesystem (default: false) peers : to configure mirroring peers secretNames : a list of peers to connect to. Currently (Ceph Pacific release) only a single peer is supported where a peer represents a Ceph cluster. snapshotSchedules : schedule(s) snapshot.One or more schedules are supported. path : filesystem source path to take the snapshot on interval : frequency of the snapshots. The interval can be specified in days, hours, or minutes using d, h, m suffix respectively. startTime : optional, determines at what time the snapshot process starts, specified using the ISO 8601 time format. snapshotRetention : allow to manage retention policies: path : filesystem source path to apply the retention on duration : annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. placement : The mds pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD . resources : Set resource requests/limits for the Filesystem MDS Pod(s), see MDS Resources Configuration Settings priorityClassName : Set priority class name for the Filesystem MDS Pod(s) startupProbe : Disable, or override timing and threshold values of the Filesystem MDS startup probe livenessProbe : Disable, or override timing and threshold values of the Filesystem MDS livenessProbe.","title":"Metadata Server Settings"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#mds-resources-configuration-settings","text":"The format of the resource requests/limits structure is the same as described in the Ceph Cluster CRD documentation . If the memory resource limit is declared Rook will automatically set the MDS configuration mds_cache_memory_limit . The configuration value is calculated with the aim that the actual MDS memory consumption remains consistent with the MDS pods' resource declaration. In order to provide the best possible experience running Ceph in containers, Rook internally recommends the memory for MDS daemons to be at least 4096MB. If a user configures a limit or request value that is too low, Rook will still run the pod(s) and print a warning to the operator log.","title":"MDS Resources Configuration Settings"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/","text":"This guide assumes you have created a Rook cluster as explained in the main Quickstart guide Rook allows creation and updating the fs-mirror daemon through the custom resource definitions (CRDs). CephFS will support asynchronous replication of snapshots to a remote (different Ceph cluster) CephFS file system via cephfs-mirror tool. Snapshots are synchronized by mirroring snapshot data followed by creating a snapshot with the same name (for a given directory on the remote file system) as the snapshot being synchronized. For more information about user management and capabilities see the Ceph docs . Creating daemon \u00b6 To get you started, here is a simple example of a CRD to deploy an cephfs-mirror daemon. 1 2 3 4 5 6 apiVersion : ceph.rook.io/v1 kind : CephFilesystemMirror metadata : name : my-fs-mirror namespace : rook-ceph spec : {} Settings \u00b6 If any setting is unspecified, a suitable default will be used automatically. FilesystemMirror metadata \u00b6 name : The name that will be used for the Ceph cephfs-mirror daemon. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace. FilesystemMirror Settings \u00b6 placement : The cephfs-mirror pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD . annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. resources : The resource requirements for the cephfs-mirror pods. priorityClassName : The priority class to set on the cephfs-mirror pods. Configuring mirroring peers \u00b6 In order to configure mirroring peers, please refer to the CephFilesystem documentation .","title":"CephFilesystemMirror CRD"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/#creating-daemon","text":"To get you started, here is a simple example of a CRD to deploy an cephfs-mirror daemon. 1 2 3 4 5 6 apiVersion : ceph.rook.io/v1 kind : CephFilesystemMirror metadata : name : my-fs-mirror namespace : rook-ceph spec : {}","title":"Creating daemon"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/#settings","text":"If any setting is unspecified, a suitable default will be used automatically.","title":"Settings"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/#filesystemmirror-metadata","text":"name : The name that will be used for the Ceph cephfs-mirror daemon. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace.","title":"FilesystemMirror metadata"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/#filesystemmirror-settings","text":"placement : The cephfs-mirror pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD . annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. resources : The resource requirements for the cephfs-mirror pods. priorityClassName : The priority class to set on the cephfs-mirror pods.","title":"FilesystemMirror Settings"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/#configuring-mirroring-peers","text":"In order to configure mirroring peers, please refer to the CephFilesystem documentation .","title":"Configuring mirroring peers"},{"location":"CRDs/Shared-Filesystem/ceph-fs-subvolumegroup-crd/","text":"Info This guide assumes you have created a Rook cluster as explained in the main Quickstart guide Rook allows creation of Ceph Filesystem SubVolumeGroups through the custom resource definitions (CRDs). Filesystem subvolume groups are an abstraction for a directory level higher than Filesystem subvolumes to effect policies (e.g., File layouts) across a set of subvolumes. For more information about CephFS volume, subvolumegroup and subvolume refer to the Ceph docs . Creating daemon \u00b6 To get you started, here is a simple example of a CRD to create a subvolumegroup on the CephFilesystem \"myfs\". 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephFilesystemSubVolumeGroup metadata : name : group-a namespace : rook-ceph # namespace:cluster spec : # filesystemName is the metadata name of the CephFilesystem CR where the subvolume group will be created filesystemName : myfs Settings \u00b6 If any setting is unspecified, a suitable default will be used automatically. CephFilesystemSubVolumeGroup metadata \u00b6 name : The name that will be used for the Ceph Filesystem subvolume group. CephFilesystemSubVolumeGroup spec \u00b6 filesystemName : The metadata name of the CephFilesystem CR where the subvolume group will be created.","title":"FilesystemSubVolumeGroup CRD"},{"location":"CRDs/Shared-Filesystem/ceph-fs-subvolumegroup-crd/#creating-daemon","text":"To get you started, here is a simple example of a CRD to create a subvolumegroup on the CephFilesystem \"myfs\". 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephFilesystemSubVolumeGroup metadata : name : group-a namespace : rook-ceph # namespace:cluster spec : # filesystemName is the metadata name of the CephFilesystem CR where the subvolume group will be created filesystemName : myfs","title":"Creating daemon"},{"location":"CRDs/Shared-Filesystem/ceph-fs-subvolumegroup-crd/#settings","text":"If any setting is unspecified, a suitable default will be used automatically.","title":"Settings"},{"location":"CRDs/Shared-Filesystem/ceph-fs-subvolumegroup-crd/#cephfilesystemsubvolumegroup-metadata","text":"name : The name that will be used for the Ceph Filesystem subvolume group.","title":"CephFilesystemSubVolumeGroup metadata"},{"location":"CRDs/Shared-Filesystem/ceph-fs-subvolumegroup-crd/#cephfilesystemsubvolumegroup-spec","text":"filesystemName : The metadata name of the CephFilesystem CR where the subvolume group will be created.","title":"CephFilesystemSubVolumeGroup spec"},{"location":"Contributing/ci-configuration/","text":"This page contains information regarding the CI configuration used for the Rook project to test, build and release the project. Secrets \u00b6 Snyk (Security Scan): SNYK_TOKEN - API Token for the snyk security scanner (workflow file: synk.yaml ). Testing: IBM_INSTANCE_ID : Used for KMS (Key Management System) IBM Key Protect access (see .github/workflows/encryption-pvc-kms-ibm-kp/action.yml ). IBM_SERVICE_API_KEY : Used for KMS (Key Management System) IBM Key Protect access (see .github/workflows/encryption-pvc-kms-ibm-kp/action.yml ). Publishing: DOCKER_USERNAME + DOCKER_PASSWORD : Username and password of registry. DOCKER_REGISTRY : Target registry namespace (e.g., rook ) AWS_USR + AWS_PSW : AWS credentials with access to S3 for Helm chart publishing. GIT_API_TOKEN : GitHub access token, used to push docs changes to the docs repositories gh-pages branch.","title":"CI Configuration"},{"location":"Contributing/ci-configuration/#secrets","text":"Snyk (Security Scan): SNYK_TOKEN - API Token for the snyk security scanner (workflow file: synk.yaml ). Testing: IBM_INSTANCE_ID : Used for KMS (Key Management System) IBM Key Protect access (see .github/workflows/encryption-pvc-kms-ibm-kp/action.yml ). IBM_SERVICE_API_KEY : Used for KMS (Key Management System) IBM Key Protect access (see .github/workflows/encryption-pvc-kms-ibm-kp/action.yml ). Publishing: DOCKER_USERNAME + DOCKER_PASSWORD : Username and password of registry. DOCKER_REGISTRY : Target registry namespace (e.g., rook ) AWS_USR + AWS_PSW : AWS credentials with access to S3 for Helm chart publishing. GIT_API_TOKEN : GitHub access token, used to push docs changes to the docs repositories gh-pages branch.","title":"Secrets"},{"location":"Contributing/development-environment/","text":"Install Kubernetes \u00b6 You can choose any Kubernetes install of your choice. The test framework only depends on kubectl being configured. To install kubectl , please see the official guide . Minikube \u00b6 The developers of Rook are working on Minikube and thus it is the recommended way to quickly get Rook up and running. Minikube should not be used for production but the Rook authors consider it great for development. While other tools such as k3d/kind are great, users have faced issues deploying Rook. Always use a virtual machine when testing Rook. Never use your host system where local devices may mistakenly be consumed. To install Minikube follow the official guide . It is recommended to use the kvm2 driver when running on a Linux machine and the hyperkit driver when running on a MacOS. Both allow to create and attach additional disks to the virtual machine. This is required for the Ceph OSD to consume one drive. We don't recommend any other drivers for Rook. You will need a Minikube version 1.23 or higher. Starting the cluster on Minikube is as simple as running: 1 2 3 4 5 6 7 8 # On Linux minikube start --disk-size=40g --extra-disks=1 --driver kvm2 # On MacOS with Intel processor minikube start --disk-size=40g --extra-disks=1 --driver hyperkit # On MacOS with Apple silicon minikube start --disk-size=40g --extra-disks 1 --driver qemu It is recommended to install a Docker client on your host system too. Depending on your operating system follow the official guide . Stopping the cluster and destroying the Minikube virtual machine can be done with: 1 minikube delete Install Helm \u00b6 Use helm.sh to install Helm and set up Rook charts defined under _output/charts (generated by build): To install and set up Helm charts for Rook run tests/scripts/helm.sh up . To clean up tests/scripts/helm.sh clean . Note These helper scripts depend on some artifacts under the _output/ directory generated during build time. These scripts should be run from the project root. Note If Helm is not available in your PATH , Helm will be downloaded to a temporary directory ( /tmp/rook-tests-scripts-helm ) and used from that directory.","title":"Developer Environment"},{"location":"Contributing/development-environment/#install-kubernetes","text":"You can choose any Kubernetes install of your choice. The test framework only depends on kubectl being configured. To install kubectl , please see the official guide .","title":"Install Kubernetes"},{"location":"Contributing/development-environment/#minikube","text":"The developers of Rook are working on Minikube and thus it is the recommended way to quickly get Rook up and running. Minikube should not be used for production but the Rook authors consider it great for development. While other tools such as k3d/kind are great, users have faced issues deploying Rook. Always use a virtual machine when testing Rook. Never use your host system where local devices may mistakenly be consumed. To install Minikube follow the official guide . It is recommended to use the kvm2 driver when running on a Linux machine and the hyperkit driver when running on a MacOS. Both allow to create and attach additional disks to the virtual machine. This is required for the Ceph OSD to consume one drive. We don't recommend any other drivers for Rook. You will need a Minikube version 1.23 or higher. Starting the cluster on Minikube is as simple as running: 1 2 3 4 5 6 7 8 # On Linux minikube start --disk-size=40g --extra-disks=1 --driver kvm2 # On MacOS with Intel processor minikube start --disk-size=40g --extra-disks=1 --driver hyperkit # On MacOS with Apple silicon minikube start --disk-size=40g --extra-disks 1 --driver qemu It is recommended to install a Docker client on your host system too. Depending on your operating system follow the official guide . Stopping the cluster and destroying the Minikube virtual machine can be done with: 1 minikube delete","title":"Minikube"},{"location":"Contributing/development-environment/#install-helm","text":"Use helm.sh to install Helm and set up Rook charts defined under _output/charts (generated by build): To install and set up Helm charts for Rook run tests/scripts/helm.sh up . To clean up tests/scripts/helm.sh clean . Note These helper scripts depend on some artifacts under the _output/ directory generated during build time. These scripts should be run from the project root. Note If Helm is not available in your PATH , Helm will be downloaded to a temporary directory ( /tmp/rook-tests-scripts-helm ) and used from that directory.","title":"Install Helm"},{"location":"Contributing/development-flow/","text":"Thank you for your time and effort to help us improve Rook! Here are a few steps to get started. If you have any questions, don't hesitate to reach out to us on our Slack dev channel. Prerequisites \u00b6 GO 1.20 or greater installed Git client installed GitHub account Initial Setup \u00b6 Create a Fork \u00b6 Navigate to http://github.com/rook/rook and click the \"Fork\" button. Clone Your Fork \u00b6 In a console window: 1 2 3 4 5 6 7 8 # Create the rook repo path mkdir -p $GOPATH/src/github.com/rook # Navigate to the local repo path cd $GOPATH/src/github.com/rook # Clone your fork, where  is your GitHub account name git clone https://github.com//rook.git Add Upstream Remote \u00b6 Add the upstream remote to your local git: 1 2 3 4 5 6 # Add 'upstream' to the list of remotes cd rook git remote add upstream https://github.com/rook/rook.git # Verify the remote was added git remote -v Two remotes should be available: origin and upstream . Build \u00b6 Before building the project, fetch the remotes to synchronize tags. 1 2 3 # Fetch all remotes git fetch -a make build Tip If in a Linux environment and make build command throws an error like unknown revision for some imports, add export GOPROXY=https://proxy.golang.org,direct to ~/.bashrc . Reload your environment and confirm with go env that GOPROXY is set. Hint Make will automatically pick up podman if docker packages are not available on your machine. Development Settings \u00b6 For consistent whitespace and other formatting in .go and other source files, apply the following settings in your IDE: Format with the goreturns tool Trim trailing whitespace Markdown Table of Contents is correctly updated automatically VS Code \u00b6 Tip VS Code will prompt you automatically with some recommended extensions to install, such as Markdown, Go, YAML validator, and ShellCheck. VS Code will automatically use the recommended settings in the .vscode/settings.json file. Self assign Issue \u00b6 To self-assign an issue that is not yet assigned to anyone else, add a comment in the issue with /assign in the body. Layout \u00b6 The overall source code layout is summarized: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 rook \u251c\u2500\u2500 build # build makefiles and logic to build, publish and release all Rook artifacts \u251c\u2500\u2500 cluster \u2502 \u251c\u2500\u2500 charts # Helm charts \u2502 \u2502 \u2514\u2500\u2500 rook-ceph \u2502 \u2502 \u2514\u2500\u2500 rook-ceph-cluster \u2502 \u2514\u2500\u2500 examples # Sample manifestes to configure the cluster \u2502 \u251c\u2500\u2500 cmd \u2502 \u251c\u2500\u2500 rook # Main command entry points for operators and daemons \u2502 \u251c\u2500\u2500 design # Design documents \u251c\u2500\u2500 Documentation # Documentation that is published to rook.io \u251c\u2500\u2500 images # Rook docker image sources \u2502 \u251c\u2500\u2500 pkg \u2502 \u251c\u2500\u2500 apis \u2502 \u2502 \u251c\u2500\u2500 ceph.rook.io # ceph specs used in the CRDs \u2502 \u2502 \u2502 \u251c\u2500\u2500 v1 \u2502 \u251c\u2500\u2500 client # auto-generated strongly typed client code to access Rook APIs \u2502 \u251c\u2500\u2500 clusterd \u2502 \u251c\u2500\u2500 daemon # daemons for configuring ceph \u2502 \u2502 \u251c\u2500\u2500 ceph \u2502 \u2502 \u2514\u2500\u2500 discover \u2502 \u251c\u2500\u2500 operator # all reconcile logic and custom controllers \u2502 \u2502 \u251c\u2500\u2500 ceph \u2502 \u2502 \u251c\u2500\u2500 discover \u2502 \u2502 \u251c\u2500\u2500 k8sutil \u2502 \u251c\u2500\u2500 util \u2502 \u2514\u2500\u2500 version \u2514\u2500\u2500 tests \u251c\u2500\u2500 framework # integration test framework \u2502 \u251c\u2500\u2500 clients \u2502 \u251c\u2500\u2500 installer \u2502 \u2514\u2500\u2500 utils \u251c\u2500\u2500 integration # integration test cases that will be invoked during golang testing \u2514\u2500\u2500 scripts # scripts for setting up integration and manual testing environments Development \u00b6 To submit a change, create a branch in your fork and then submit a pull request (PR) from the branch. Design Document \u00b6 For new features of significant scope and complexity, a design document is recommended before work begins on the implementation. Create a design document if: Adding a new CRD Adding a significant feature. For smaller, straightforward features and bug fixes, there is no need for a design document. Authoring a design document has many advantages: Forces the author to think critically about the feature and identify potential issues early in the design Obtain agreement amongst the community before code is written to avoid wasted effort in the wrong direction Newcomers may more quickly understand the feature Note Writing code to prototype the feature while working on the design may be very useful to help flesh out the approach. A design document should be written as a markdown file in the design folder . Follow the process outlined in the design template . There are many examples of previous design documents in that folder. Submit a pull request for the design to be discussed and approved by the community, just like any other change to the repository. Create a Branch \u00b6 From a console, create a new branch based on your fork where changes will be developed: 1 2 3 4 5 6 # Update the remotes git fetch --all # Create a new branch that is based off upstream master. Give it a simple, but descriptive name. # Generally it will be two to three words separated by dashes and without numbers. git checkout -b feature-name upstream/master Updating Your Fork \u00b6 During the development lifecycle, keep your branch(es) updated with the latest upstream master. As others on the team push changes, rebase your commits on top of the latest. This avoids unnecessary merge commits and keeps the commit history clean. Whenever an update is needed to the local repository, never perform a merge, always rebase. This will avoid merge commits in the git history. If there are any modified files, first stash them with git stash . 1 2 git fetch --all git rebase upstream/master Rebasing is a very powerful feature of Git. You need to understand how it works to avoid risking losing your work. Read about it in the Git documentation . Briefly, rebasing does the following: \"Unwinds\" the local commits. The local commits are removed temporarily from the history. The latest changes from upstream are added to the history The local commits are re-applied one by one If there are merge conflicts, there will be a prompt to fix them before continuing. Read the output closely. It will instruct how to complete the rebase. When rebasing is completed, all of the commits are restored in the history. Submitting a Pull Request \u00b6 After a feature or bug fix is completed in your branch, open a Pull Request (PR) to the upstream Rook repository . Before opening the PR: If there are code changes, add unit tests and verify that all unit tests are passing. See Unit Tests below on running unit tests. Rebase on the latest upstream changes Regression Testing \u00b6 All pull requests must pass all continuous integration (CI) tests before they can be merged. These tests automatically run against every pull request. The results of these tests along with code review feedback determine whether your request will be merged. Unit Tests \u00b6 From the root of your local Rook repo execute the following to run all of the unit tests: 1 make test Unit tests for individual packages can be run with the standard go test command. To see code coverage on the packages that you changed, view the coverage.html in a browser to inspect your new code. 1 2 go test -coverprofile=coverage.out go tool cover -html=coverage.out -o coverage.html Writing unit tests \u00b6 Good unit tests start with easily testable code. Small chunks (\"units\") of code can be easily tested for every possible input. Higher-level code units that are built from smaller, already-tested units can more easily verify that the units are combined together correctly. Common cases that may need tests: the feature is enabled the feature is disabled the feature is only partially enabled, for every possible way it can be partially enabled every error that can be encountered during execution of the feature the feature can be disabled (including partially) after it was enabled the feature can be modified (including partially) after it was enabled if there is a slice/array involved, test length = 0, length = 1, length = 3, length == max, length > max an input is not specified, for each input an input is specified incorrectly, for each input a resource the code relies on doesn't exist, for each dependency Integration Tests \u00b6 Rook's upstream continuous integration (CI) tests will run integration tests against your changes automatically. Integration tests will be run in Github actions. If an integration test fails, a tmate session will be available for troubleshooting for a short time. See the action details for an ssh connection to the Github runner. Commit structure \u00b6 Rook maintainers value clear, lengthy and explanatory commit messages. Requirements for commits: A commit prefix from the list of known prefixes At least one paragraph that explains the original issue and the changes in the commit The Signed-off-by tag is at the end of the commit message, achieved by committing with git commit -s An example acceptable commit message: 1 2 3 4 5 6 component: commit title This is the commit message. Here I'm explaining what the bug was along with its root cause. Then I'm explaining how I fixed it. Signed-off-by: FirstName LastName  Commit History \u00b6 To prepare your branch to open a PR, the minimal number of logical commits is preferred to maintain a clean commit history. Most commonly a PR will include a single commit where all changes are squashed, although sometimes there will be multiple logical commits. 1 2 # Inspect your commit history to determine if you need to squash commits git log To squash multiple commits or make other changes to the commit history, use git rebase : 1 2 3 # # In this example, the last 5 commits will be opened in the git rebase tool. git rebase -i HEAD~5 Once your commit history is clean, ensure the branch is rebased on the latest upstream before opening the PR. Submitting \u00b6 Go to the Rook github to open the PR. If you have pushed recently to a branch, you will see an obvious link to open the PR. If you have not pushed recently, go to the Pull Request tab and select your fork and branch for the PR. After the PR is open, make changes simply by pushing new commits. The PR will track the changes in your fork and rerun the CI automatically. Always open a pull request against master. Never open a pull request against a released branch (e.g. release-1.2) unless working directly with a maintainer. Backporting to a Release Branch \u00b6 The flow for getting a fix into a release branch is: Open a PR to merge changes to master following the process outlined above Add the backport label to that PR such as backport-release-1.11 After the PR is merged to master, the mergify bot will automatically open a PR with the commits backported to the release branch After the CI is green and a maintainer has approved the PR, the bot will automatically merge the backport PR Debugging issues in Ceph manager modules \u00b6 The Ceph manager modules are written in Python and can be individually and dynamically loaded from the manager. We can take advantage of this feature in order to test changes and to debug issues in the modules. This is just a hack to debug any modification in the manager modules. The dashboard and the rook orchestrator modules are the two modules most commonly have modifications that need to be tested. Make modifications directly in the manager module and reload: Update the cluster so only a single mgr pod is running. Set the mgr.count: 1 in the CephCluster CR if it is not already. Shell into the manager container: 1 kubectl exec -n rook-ceph --stdin --tty $(kubectl get pod -n rook-ceph -l ceph_daemon_type=mgr,instance=a -o jsonpath='{.items[0].metadata.name}') -c mgr -- /bin/bash Make the modifications needed in the required manager module. The manager module source code is found in /usr/share/ceph/mgr/ . Note If the manager pod is restarted, all modifications made in the mgr container will be lost Restart the modified manager module to test the modifications: Example for restarting the rook manager module with the krew plugin : 1 2 kubectl rook-ceph ceph mgr module disable rook kubectl rook-ceph ceph mgr module enable rook Once the module is restarted the modifications will be running in the active manager. View the manager pod log or other changed behavior to validate the changes.","title":"Development Flow"},{"location":"Contributing/development-flow/#prerequisites","text":"GO 1.20 or greater installed Git client installed GitHub account","title":"Prerequisites"},{"location":"Contributing/development-flow/#initial-setup","text":"","title":"Initial Setup"},{"location":"Contributing/development-flow/#create-a-fork","text":"Navigate to http://github.com/rook/rook and click the \"Fork\" button.","title":"Create a Fork"},{"location":"Contributing/development-flow/#clone-your-fork","text":"In a console window: 1 2 3 4 5 6 7 8 # Create the rook repo path mkdir -p $GOPATH/src/github.com/rook # Navigate to the local repo path cd $GOPATH/src/github.com/rook # Clone your fork, where  is your GitHub account name git clone https://github.com//rook.git","title":"Clone Your Fork"},{"location":"Contributing/development-flow/#add-upstream-remote","text":"Add the upstream remote to your local git: 1 2 3 4 5 6 # Add 'upstream' to the list of remotes cd rook git remote add upstream https://github.com/rook/rook.git # Verify the remote was added git remote -v Two remotes should be available: origin and upstream .","title":"Add Upstream Remote"},{"location":"Contributing/development-flow/#build","text":"Before building the project, fetch the remotes to synchronize tags. 1 2 3 # Fetch all remotes git fetch -a make build Tip If in a Linux environment and make build command throws an error like unknown revision for some imports, add export GOPROXY=https://proxy.golang.org,direct to ~/.bashrc . Reload your environment and confirm with go env that GOPROXY is set. Hint Make will automatically pick up podman if docker packages are not available on your machine.","title":"Build"},{"location":"Contributing/development-flow/#development-settings","text":"For consistent whitespace and other formatting in .go and other source files, apply the following settings in your IDE: Format with the goreturns tool Trim trailing whitespace Markdown Table of Contents is correctly updated automatically","title":"Development Settings"},{"location":"Contributing/development-flow/#vs-code","text":"Tip VS Code will prompt you automatically with some recommended extensions to install, such as Markdown, Go, YAML validator, and ShellCheck. VS Code will automatically use the recommended settings in the .vscode/settings.json file.","title":"VS Code"},{"location":"Contributing/development-flow/#self-assign-issue","text":"To self-assign an issue that is not yet assigned to anyone else, add a comment in the issue with /assign in the body.","title":"Self assign Issue"},{"location":"Contributing/development-flow/#layout","text":"The overall source code layout is summarized: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 rook \u251c\u2500\u2500 build # build makefiles and logic to build, publish and release all Rook artifacts \u251c\u2500\u2500 cluster \u2502 \u251c\u2500\u2500 charts # Helm charts \u2502 \u2502 \u2514\u2500\u2500 rook-ceph \u2502 \u2502 \u2514\u2500\u2500 rook-ceph-cluster \u2502 \u2514\u2500\u2500 examples # Sample manifestes to configure the cluster \u2502 \u251c\u2500\u2500 cmd \u2502 \u251c\u2500\u2500 rook # Main command entry points for operators and daemons \u2502 \u251c\u2500\u2500 design # Design documents \u251c\u2500\u2500 Documentation # Documentation that is published to rook.io \u251c\u2500\u2500 images # Rook docker image sources \u2502 \u251c\u2500\u2500 pkg \u2502 \u251c\u2500\u2500 apis \u2502 \u2502 \u251c\u2500\u2500 ceph.rook.io # ceph specs used in the CRDs \u2502 \u2502 \u2502 \u251c\u2500\u2500 v1 \u2502 \u251c\u2500\u2500 client # auto-generated strongly typed client code to access Rook APIs \u2502 \u251c\u2500\u2500 clusterd \u2502 \u251c\u2500\u2500 daemon # daemons for configuring ceph \u2502 \u2502 \u251c\u2500\u2500 ceph \u2502 \u2502 \u2514\u2500\u2500 discover \u2502 \u251c\u2500\u2500 operator # all reconcile logic and custom controllers \u2502 \u2502 \u251c\u2500\u2500 ceph \u2502 \u2502 \u251c\u2500\u2500 discover \u2502 \u2502 \u251c\u2500\u2500 k8sutil \u2502 \u251c\u2500\u2500 util \u2502 \u2514\u2500\u2500 version \u2514\u2500\u2500 tests \u251c\u2500\u2500 framework # integration test framework \u2502 \u251c\u2500\u2500 clients \u2502 \u251c\u2500\u2500 installer \u2502 \u2514\u2500\u2500 utils \u251c\u2500\u2500 integration # integration test cases that will be invoked during golang testing \u2514\u2500\u2500 scripts # scripts for setting up integration and manual testing environments","title":"Layout"},{"location":"Contributing/development-flow/#development","text":"To submit a change, create a branch in your fork and then submit a pull request (PR) from the branch.","title":"Development"},{"location":"Contributing/development-flow/#design-document","text":"For new features of significant scope and complexity, a design document is recommended before work begins on the implementation. Create a design document if: Adding a new CRD Adding a significant feature. For smaller, straightforward features and bug fixes, there is no need for a design document. Authoring a design document has many advantages: Forces the author to think critically about the feature and identify potential issues early in the design Obtain agreement amongst the community before code is written to avoid wasted effort in the wrong direction Newcomers may more quickly understand the feature Note Writing code to prototype the feature while working on the design may be very useful to help flesh out the approach. A design document should be written as a markdown file in the design folder . Follow the process outlined in the design template . There are many examples of previous design documents in that folder. Submit a pull request for the design to be discussed and approved by the community, just like any other change to the repository.","title":"Design Document"},{"location":"Contributing/development-flow/#create-a-branch","text":"From a console, create a new branch based on your fork where changes will be developed: 1 2 3 4 5 6 # Update the remotes git fetch --all # Create a new branch that is based off upstream master. Give it a simple, but descriptive name. # Generally it will be two to three words separated by dashes and without numbers. git checkout -b feature-name upstream/master","title":"Create a Branch"},{"location":"Contributing/development-flow/#updating-your-fork","text":"During the development lifecycle, keep your branch(es) updated with the latest upstream master. As others on the team push changes, rebase your commits on top of the latest. This avoids unnecessary merge commits and keeps the commit history clean. Whenever an update is needed to the local repository, never perform a merge, always rebase. This will avoid merge commits in the git history. If there are any modified files, first stash them with git stash . 1 2 git fetch --all git rebase upstream/master Rebasing is a very powerful feature of Git. You need to understand how it works to avoid risking losing your work. Read about it in the Git documentation . Briefly, rebasing does the following: \"Unwinds\" the local commits. The local commits are removed temporarily from the history. The latest changes from upstream are added to the history The local commits are re-applied one by one If there are merge conflicts, there will be a prompt to fix them before continuing. Read the output closely. It will instruct how to complete the rebase. When rebasing is completed, all of the commits are restored in the history.","title":"Updating Your Fork"},{"location":"Contributing/development-flow/#submitting-a-pull-request","text":"After a feature or bug fix is completed in your branch, open a Pull Request (PR) to the upstream Rook repository . Before opening the PR: If there are code changes, add unit tests and verify that all unit tests are passing. See Unit Tests below on running unit tests. Rebase on the latest upstream changes","title":"Submitting a Pull Request"},{"location":"Contributing/development-flow/#regression-testing","text":"All pull requests must pass all continuous integration (CI) tests before they can be merged. These tests automatically run against every pull request. The results of these tests along with code review feedback determine whether your request will be merged.","title":"Regression Testing"},{"location":"Contributing/development-flow/#unit-tests","text":"From the root of your local Rook repo execute the following to run all of the unit tests: 1 make test Unit tests for individual packages can be run with the standard go test command. To see code coverage on the packages that you changed, view the coverage.html in a browser to inspect your new code. 1 2 go test -coverprofile=coverage.out go tool cover -html=coverage.out -o coverage.html","title":"Unit Tests"},{"location":"Contributing/development-flow/#writing-unit-tests","text":"Good unit tests start with easily testable code. Small chunks (\"units\") of code can be easily tested for every possible input. Higher-level code units that are built from smaller, already-tested units can more easily verify that the units are combined together correctly. Common cases that may need tests: the feature is enabled the feature is disabled the feature is only partially enabled, for every possible way it can be partially enabled every error that can be encountered during execution of the feature the feature can be disabled (including partially) after it was enabled the feature can be modified (including partially) after it was enabled if there is a slice/array involved, test length = 0, length = 1, length = 3, length == max, length > max an input is not specified, for each input an input is specified incorrectly, for each input a resource the code relies on doesn't exist, for each dependency","title":"Writing unit tests"},{"location":"Contributing/development-flow/#integration-tests","text":"Rook's upstream continuous integration (CI) tests will run integration tests against your changes automatically. Integration tests will be run in Github actions. If an integration test fails, a tmate session will be available for troubleshooting for a short time. See the action details for an ssh connection to the Github runner.","title":"Integration Tests"},{"location":"Contributing/development-flow/#commit-structure","text":"Rook maintainers value clear, lengthy and explanatory commit messages. Requirements for commits: A commit prefix from the list of known prefixes At least one paragraph that explains the original issue and the changes in the commit The Signed-off-by tag is at the end of the commit message, achieved by committing with git commit -s An example acceptable commit message: 1 2 3 4 5 6 component: commit title This is the commit message. Here I'm explaining what the bug was along with its root cause. Then I'm explaining how I fixed it. Signed-off-by: FirstName LastName ","title":"Commit structure"},{"location":"Contributing/development-flow/#commit-history","text":"To prepare your branch to open a PR, the minimal number of logical commits is preferred to maintain a clean commit history. Most commonly a PR will include a single commit where all changes are squashed, although sometimes there will be multiple logical commits. 1 2 # Inspect your commit history to determine if you need to squash commits git log To squash multiple commits or make other changes to the commit history, use git rebase : 1 2 3 # # In this example, the last 5 commits will be opened in the git rebase tool. git rebase -i HEAD~5 Once your commit history is clean, ensure the branch is rebased on the latest upstream before opening the PR.","title":"Commit History"},{"location":"Contributing/development-flow/#submitting","text":"Go to the Rook github to open the PR. If you have pushed recently to a branch, you will see an obvious link to open the PR. If you have not pushed recently, go to the Pull Request tab and select your fork and branch for the PR. After the PR is open, make changes simply by pushing new commits. The PR will track the changes in your fork and rerun the CI automatically. Always open a pull request against master. Never open a pull request against a released branch (e.g. release-1.2) unless working directly with a maintainer.","title":"Submitting"},{"location":"Contributing/development-flow/#backporting-to-a-release-branch","text":"The flow for getting a fix into a release branch is: Open a PR to merge changes to master following the process outlined above Add the backport label to that PR such as backport-release-1.11 After the PR is merged to master, the mergify bot will automatically open a PR with the commits backported to the release branch After the CI is green and a maintainer has approved the PR, the bot will automatically merge the backport PR","title":"Backporting to a Release Branch"},{"location":"Contributing/development-flow/#debugging-issues-in-ceph-manager-modules","text":"The Ceph manager modules are written in Python and can be individually and dynamically loaded from the manager. We can take advantage of this feature in order to test changes and to debug issues in the modules. This is just a hack to debug any modification in the manager modules. The dashboard and the rook orchestrator modules are the two modules most commonly have modifications that need to be tested. Make modifications directly in the manager module and reload: Update the cluster so only a single mgr pod is running. Set the mgr.count: 1 in the CephCluster CR if it is not already. Shell into the manager container: 1 kubectl exec -n rook-ceph --stdin --tty $(kubectl get pod -n rook-ceph -l ceph_daemon_type=mgr,instance=a -o jsonpath='{.items[0].metadata.name}') -c mgr -- /bin/bash Make the modifications needed in the required manager module. The manager module source code is found in /usr/share/ceph/mgr/ . Note If the manager pod is restarted, all modifications made in the mgr container will be lost Restart the modified manager module to test the modifications: Example for restarting the rook manager module with the krew plugin : 1 2 kubectl rook-ceph ceph mgr module disable rook kubectl rook-ceph ceph mgr module enable rook Once the module is restarted the modifications will be running in the active manager. View the manager pod log or other changed behavior to validate the changes.","title":"Debugging issues in Ceph manager modules"},{"location":"Contributing/documentation/","text":"We are using MkDocs with the Material for MkDocs theme . Markdown Extensions \u00b6 Thanks to the MkDocs Material theme we have certain \"markdown syntax extensions\" available: Admonitions Footnotes Icons, Emojis Task lists And more .. For a whole list of features Reference - Material for MkDocs . Local Preview \u00b6 To locally preview the documentation, you can run the following command (in the root of the repository): 1 make docs-preview When previewing, now you can navigate your browser to http://127.0.0.1:8000/ to open the preview of the documentation. Hint Should you encounter a command not found error while trying to preview the docs for the first time on a machine, you probably need to install the dependencies for MkDocs and extensions used. 1 pip3 install -r build/release/requirements_docs.txt Please make sure that your Python binary path is included in your PATH . Running helm-docs \u00b6 helm-docs is a tool that generates the documentation for a helm chart automatically. If there are changes in the helm chart, you need to run helm-docs manually, and check in the resulting autogenerated md files at the path /Documentation/Helm-Charts 1 make helm-docs","title":"Documentation"},{"location":"Contributing/documentation/#markdown-extensions","text":"Thanks to the MkDocs Material theme we have certain \"markdown syntax extensions\" available: Admonitions Footnotes Icons, Emojis Task lists And more .. For a whole list of features Reference - Material for MkDocs .","title":"Markdown Extensions"},{"location":"Contributing/documentation/#local-preview","text":"To locally preview the documentation, you can run the following command (in the root of the repository): 1 make docs-preview When previewing, now you can navigate your browser to http://127.0.0.1:8000/ to open the preview of the documentation. Hint Should you encounter a command not found error while trying to preview the docs for the first time on a machine, you probably need to install the dependencies for MkDocs and extensions used. 1 pip3 install -r build/release/requirements_docs.txt Please make sure that your Python binary path is included in your PATH .","title":"Local Preview"},{"location":"Contributing/documentation/#running-helm-docs","text":"helm-docs is a tool that generates the documentation for a helm chart automatically. If there are changes in the helm chart, you need to run helm-docs manually, and check in the resulting autogenerated md files at the path /Documentation/Helm-Charts 1 make helm-docs","title":"Running helm-docs"},{"location":"Contributing/rook-test-framework/","text":"Integration Tests \u00b6 The integration tests run end-to-end tests on Rook in a running instance of Kubernetes. The framework includes scripts for starting Kubernetes so users can quickly spin up a Kubernetes cluster. The tests are generally designed to install Rook, run tests, and uninstall Rook. The CI runs the integration tests with each PR and each master or release branch build. If the tests fail in a PR, a tmate session is started which will allow you to connect via ssh and troubleshoot the failure. The CI is the most efficient way to troubleshoot the tests since the environment is started automatically and you will only need to connect to investigate. This document will outline the steps to run the integration tests locally in a minikube environment, should the CI not be sufficient to troubleshoot. Hint The CI is generally much simpler to troubleshoot than running these tests locally. Running the tests locally is rarely necessary. Warning A risk of running the tests locally is that a local disk is required during the tests. If not running in a VM, your laptop or other test machine could be destroyed. Install Minikube \u00b6 Follow Rook's developer guide to install Minikube. Build Rook image \u00b6 Now that the Kubernetes cluster is running we need to populate the Docker registry to allow local image builds to be easily used inside Minikube. 1 eval $(minikube docker-env -p minikube) make build will now build and push the images to the Docker registry inside the Minikube virtual machine. 1 make build Tag the newly built images to rook/ceph:local-build for running tests, or rook/ceph:master if creating example manifests:: 1 2 docker tag $(docker images|awk '/build-/ {print $1}') rook/ceph:local-build docker tag rook/ceph:local-build rook/ceph:master Run integration tests \u00b6 Some settings are available to run the tests under different environments. The settings are all configured with environment variables. See environment.go for the available environment variables. Set the following variables: 1 2 3 export TEST_HELM_PATH=/tmp/rook-tests-scripts-helm/linux-amd64/helm export TEST_BASE_DIR=WORKING_DIR export TEST_SCRATCH_DEVICE=/dev/vdb Set TEST_SCRATCH_DEVICE to the correct block device name based on the driver that's being used. Hint If using the virtualbox minikube driver, the device should be /dev/sdb Warning The integration tests erase the contents of TEST_SCRATCH_DEVICE when the test is completed To run a specific suite, specify the suite name: 1 go test -v -timeout 1800s -run CephSmokeSuite github.com/rook/rook/tests/integration After running tests, see test logs under tests/integration/_output . To run specific tests inside a suite: 1 go test -v -timeout 1800s -run CephSmokeSuite github.com/rook/rook/tests/integration -testify.m TestARookClusterInstallation_SmokeTest Info Only the golang test suites are documented to run locally. Canary and other tests have only ever been supported in the CI. Running tests on OpenShift \u00b6 Setup OpenShift environment and export KUBECONFIG Make sure oc executable file is in the PATH. Only the CephSmokeSuite is currently supported on OpenShift. Set the following environment variables depending on the environment: 1 2 3 export TEST_ENV_NAME=openshift export TEST_STORAGE_CLASS=gp2 export TEST_BASE_DIR=/tmp Run the integration tests","title":"Rook Test Framework"},{"location":"Contributing/rook-test-framework/#integration-tests","text":"The integration tests run end-to-end tests on Rook in a running instance of Kubernetes. The framework includes scripts for starting Kubernetes so users can quickly spin up a Kubernetes cluster. The tests are generally designed to install Rook, run tests, and uninstall Rook. The CI runs the integration tests with each PR and each master or release branch build. If the tests fail in a PR, a tmate session is started which will allow you to connect via ssh and troubleshoot the failure. The CI is the most efficient way to troubleshoot the tests since the environment is started automatically and you will only need to connect to investigate. This document will outline the steps to run the integration tests locally in a minikube environment, should the CI not be sufficient to troubleshoot. Hint The CI is generally much simpler to troubleshoot than running these tests locally. Running the tests locally is rarely necessary. Warning A risk of running the tests locally is that a local disk is required during the tests. If not running in a VM, your laptop or other test machine could be destroyed.","title":"Integration Tests"},{"location":"Contributing/rook-test-framework/#install-minikube","text":"Follow Rook's developer guide to install Minikube.","title":"Install Minikube"},{"location":"Contributing/rook-test-framework/#build-rook-image","text":"Now that the Kubernetes cluster is running we need to populate the Docker registry to allow local image builds to be easily used inside Minikube. 1 eval $(minikube docker-env -p minikube) make build will now build and push the images to the Docker registry inside the Minikube virtual machine. 1 make build Tag the newly built images to rook/ceph:local-build for running tests, or rook/ceph:master if creating example manifests:: 1 2 docker tag $(docker images|awk '/build-/ {print $1}') rook/ceph:local-build docker tag rook/ceph:local-build rook/ceph:master","title":"Build Rook image"},{"location":"Contributing/rook-test-framework/#run-integration-tests","text":"Some settings are available to run the tests under different environments. The settings are all configured with environment variables. See environment.go for the available environment variables. Set the following variables: 1 2 3 export TEST_HELM_PATH=/tmp/rook-tests-scripts-helm/linux-amd64/helm export TEST_BASE_DIR=WORKING_DIR export TEST_SCRATCH_DEVICE=/dev/vdb Set TEST_SCRATCH_DEVICE to the correct block device name based on the driver that's being used. Hint If using the virtualbox minikube driver, the device should be /dev/sdb Warning The integration tests erase the contents of TEST_SCRATCH_DEVICE when the test is completed To run a specific suite, specify the suite name: 1 go test -v -timeout 1800s -run CephSmokeSuite github.com/rook/rook/tests/integration After running tests, see test logs under tests/integration/_output . To run specific tests inside a suite: 1 go test -v -timeout 1800s -run CephSmokeSuite github.com/rook/rook/tests/integration -testify.m TestARookClusterInstallation_SmokeTest Info Only the golang test suites are documented to run locally. Canary and other tests have only ever been supported in the CI.","title":"Run integration tests"},{"location":"Contributing/rook-test-framework/#running-tests-on-openshift","text":"Setup OpenShift environment and export KUBECONFIG Make sure oc executable file is in the PATH. Only the CephSmokeSuite is currently supported on OpenShift. Set the following environment variables depending on the environment: 1 2 3 export TEST_ENV_NAME=openshift export TEST_STORAGE_CLASS=gp2 export TEST_BASE_DIR=/tmp Run the integration tests","title":"Running tests on OpenShift"},{"location":"Getting-Started/ceph-openshift/","text":"OpenShift \u00b6 OpenShift adds a number of security and other enhancements to Kubernetes. In particular, security context constraints allow the cluster admin to define exactly which permissions are allowed to pods running in the cluster. You will need to define those permissions that allow the Rook pods to run. The settings for Rook in OpenShift are described below, and are also included in the example yaml files : operator-openshift.yaml : Creates the security context constraints and starts the operator deployment object-openshift.yaml : Creates an object store with rgw listening on a valid port number for OpenShift TL;DR \u00b6 To create an OpenShift cluster, the commands basically include: 1 2 3 oc create -f crds.yaml -f common.yaml oc create -f operator-openshift.yaml oc create -f cluster.yaml Helm Installation \u00b6 Configuration required for Openshift is automatically created by the Helm charts, such as the SecurityContextConstraints. See the Rook Helm Charts . Rook Privileges \u00b6 To orchestrate the storage platform, Rook requires the following access in the cluster: Create hostPath volumes, for persistence by the Ceph mon and osd pods Run pods in privileged mode, for access to /dev and hostPath volumes Host networking for the Rook agent and clusters that require host networking Ceph OSDs require host PIDs for communication on the same node Security Context Constraints \u00b6 Before starting the Rook operator or cluster, create the security context constraints needed by the Rook pods. The following yaml is found in operator-openshift.yaml under /deploy/examples . Hint Older versions of OpenShift may require apiVersion: v1 . Important to note is that if you plan on running Rook in namespaces other than the default rook-ceph , the example scc will need to be modified to accommodate for your namespaces where the Rook pods are running. To create the scc you will need a privileged account: 1 oc login -u system:admin We will create the security context constraints with the operator in the next section. Rook Settings \u00b6 There are some Rook settings that also need to be adjusted to work in OpenShift. Operator Settings \u00b6 There is an environment variable that needs to be set in the operator spec that will allow Rook to run in OpenShift clusters. ROOK_HOSTPATH_REQUIRES_PRIVILEGED : Must be set to true . Writing to the hostPath is required for the Ceph mon and osd pods. Given the restricted permissions in OpenShift with SELinux, the pod must be running privileged in order to write to the hostPath volume. 1 2 - name : ROOK_HOSTPATH_REQUIRES_PRIVILEGED value : \"true\" Now create the security context constraints and the operator: 1 oc create -f operator-openshift.yaml Cluster Settings \u00b6 The cluster settings in cluster.yaml are largely isolated from the differences in OpenShift. There is perhaps just one to take note of: dataDirHostPath : Ensure that it points to a valid, writable path on the host systems. Object Store Settings \u00b6 In OpenShift, ports less than 1024 cannot be bound. In the object store CRD , ensure the port is modified to meet this requirement. 1 2 gateway : port : 8080 You can expose a different port such as 80 by creating a service. A sample object store can be created with these settings: 1 oc create -f object-openshift.yaml","title":"OpenShift"},{"location":"Getting-Started/ceph-openshift/#openshift","text":"OpenShift adds a number of security and other enhancements to Kubernetes. In particular, security context constraints allow the cluster admin to define exactly which permissions are allowed to pods running in the cluster. You will need to define those permissions that allow the Rook pods to run. The settings for Rook in OpenShift are described below, and are also included in the example yaml files : operator-openshift.yaml : Creates the security context constraints and starts the operator deployment object-openshift.yaml : Creates an object store with rgw listening on a valid port number for OpenShift","title":"OpenShift"},{"location":"Getting-Started/ceph-openshift/#tldr","text":"To create an OpenShift cluster, the commands basically include: 1 2 3 oc create -f crds.yaml -f common.yaml oc create -f operator-openshift.yaml oc create -f cluster.yaml","title":"TL;DR"},{"location":"Getting-Started/ceph-openshift/#helm-installation","text":"Configuration required for Openshift is automatically created by the Helm charts, such as the SecurityContextConstraints. See the Rook Helm Charts .","title":"Helm Installation"},{"location":"Getting-Started/ceph-openshift/#rook-privileges","text":"To orchestrate the storage platform, Rook requires the following access in the cluster: Create hostPath volumes, for persistence by the Ceph mon and osd pods Run pods in privileged mode, for access to /dev and hostPath volumes Host networking for the Rook agent and clusters that require host networking Ceph OSDs require host PIDs for communication on the same node","title":"Rook Privileges"},{"location":"Getting-Started/ceph-openshift/#security-context-constraints","text":"Before starting the Rook operator or cluster, create the security context constraints needed by the Rook pods. The following yaml is found in operator-openshift.yaml under /deploy/examples . Hint Older versions of OpenShift may require apiVersion: v1 . Important to note is that if you plan on running Rook in namespaces other than the default rook-ceph , the example scc will need to be modified to accommodate for your namespaces where the Rook pods are running. To create the scc you will need a privileged account: 1 oc login -u system:admin We will create the security context constraints with the operator in the next section.","title":"Security Context Constraints"},{"location":"Getting-Started/ceph-openshift/#rook-settings","text":"There are some Rook settings that also need to be adjusted to work in OpenShift.","title":"Rook Settings"},{"location":"Getting-Started/ceph-openshift/#operator-settings","text":"There is an environment variable that needs to be set in the operator spec that will allow Rook to run in OpenShift clusters. ROOK_HOSTPATH_REQUIRES_PRIVILEGED : Must be set to true . Writing to the hostPath is required for the Ceph mon and osd pods. Given the restricted permissions in OpenShift with SELinux, the pod must be running privileged in order to write to the hostPath volume. 1 2 - name : ROOK_HOSTPATH_REQUIRES_PRIVILEGED value : \"true\" Now create the security context constraints and the operator: 1 oc create -f operator-openshift.yaml","title":"Operator Settings"},{"location":"Getting-Started/ceph-openshift/#cluster-settings","text":"The cluster settings in cluster.yaml are largely isolated from the differences in OpenShift. There is perhaps just one to take note of: dataDirHostPath : Ensure that it points to a valid, writable path on the host systems.","title":"Cluster Settings"},{"location":"Getting-Started/ceph-openshift/#object-store-settings","text":"In OpenShift, ports less than 1024 cannot be bound. In the object store CRD , ensure the port is modified to meet this requirement. 1 2 gateway : port : 8080 You can expose a different port such as 80 by creating a service. A sample object store can be created with these settings: 1 oc create -f object-openshift.yaml","title":"Object Store Settings"},{"location":"Getting-Started/ceph-teardown/","text":"Cleaning up a Cluster \u00b6 If you want to tear down the cluster and bring up a new one, be aware of the following resources that will need to be cleaned up: The resources created under Rook's namespace (assume rook-ceph here): The Rook operator and cluster created by operator.yaml and cluster.yaml (the cluster CRD) /var/lib/rook/rook-ceph : Path on each host in the cluster where configuration is cached by the ceph mons and osds Note that if you changed the default namespaces or paths such as dataDirHostPath in the sample yaml files, you will need to adjust these namespaces and paths throughout these instructions. If you see issues tearing down the cluster, see the Troubleshooting section below. If you are tearing down a cluster frequently for development purposes, it is instead recommended to use an environment such as Minikube that can easily be reset without worrying about any of these steps. Delete the Block and File artifacts \u00b6 First you will need to clean up the resources created on top of the Rook cluster. These commands will clean up the resources from the block and file walkthroughs (unmount volumes, delete volume claims, etc). If you did not complete those parts of the walkthrough, you can skip these instructions: 1 2 3 4 5 6 kubectl delete -f ../wordpress.yaml kubectl delete -f ../mysql.yaml kubectl delete -n rook-ceph cephblockpool replicapool kubectl delete storageclass rook-ceph-block kubectl delete -f csi/cephfs/kube-registry.yaml kubectl delete storageclass csi-cephfs After those block and file resources have been cleaned up, you can then delete your Rook cluster. This is important to delete before removing the Rook operator and agent or else resources may not be cleaned up properly . Delete the CephCluster CRD \u00b6 Edit the CephCluster and add the cleanupPolicy WARNING: DATA WILL BE PERMANENTLY DELETED AFTER DELETING THE CephCluster CR WITH cleanupPolicy . 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Once the cleanup policy is enabled, any new configuration changes in the CephCluster will be blocked. Nothing will happen until the deletion of the CR is requested, so this cleanupPolicy change can still be reverted if needed. Checkout more details about the cleanupPolicy here Delete the CephCluster CR. 1 kubectl -n rook-ceph delete cephcluster rook-ceph Verify that the cluster CR has been deleted before continuing to the next step. 1 kubectl -n rook-ceph get cephcluster If the cleanupPolicy was applied, then wait for the rook-ceph-cleanup jobs to be completed on all the nodes. These jobs will perform the following operations: Delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph , on all the nodes Wipe the data on the drives on all the nodes where OSDs were running in this cluster Note: The cleanup jobs might not start if the resources created on top of Rook Cluster are not deleted completely. See Delete the Operator and related Resources \u00b6 This will begin the process of the Rook Ceph operator and all other resources being cleaned up. This includes related resources such as the agent and discover daemonsets with the following commands: 1 2 3 kubectl delete -f operator.yaml kubectl delete -f common.yaml kubectl delete -f crds.yaml If the cleanupPolicy was applied and the cleanup jobs have completed on all the nodes, then the cluster tear down has been successful. If you skipped adding the cleanupPolicy then follow the manual steps mentioned below to tear down the cluster. Delete the data on hosts \u00b6 Attention The final cleanup step requires deleting files on each host in the cluster. All files under the dataDirHostPath property specified in the cluster CRD will need to be deleted. Otherwise, inconsistent state will remain when a new cluster is started. Connect to each machine and delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph . In the future this step will not be necessary when we build on the K8s local storage feature. If you modified the demo settings, additional cleanup is up to you for devices, host paths, etc. Zapping Devices \u00b6 Disks on nodes used by Rook for osds can be reset to a usable state with methods suggested below. Note that these scripts are not one-size-fits-all. Please use them with discretion to ensure you are not removing data unrelated to Rook and/or Ceph. Disks can be zapped fairly easily. A single disk can usually be cleared with some or all of the steps below. 1 2 3 4 5 6 7 8 9 10 11 12 13 DISK=\"/dev/sdX\" # Zap the disk to a fresh, usable state ( zap-all is important, b/c MBR has to be clean ) sgdisk --zap-all $DISK # Wipe a large portion of the beginning of the disk to remove more LVM metadata that may be present dd if=/dev/zero of=\"$DISK\" bs=1M count=100 oflag=direct,dsync # SSDs may be better cleaned with blkdiscard instead of dd blkdiscard $DISK # Inform the OS of partition table changes partprobe $DISK Ceph can leave LVM and device mapper data that can lock the disks, preventing the disks from being used again. These steps can help to free up old Ceph disks for re-use. Note that this only needs to be run once on each node. If you have only one Rook cluster and all Ceph disks are being wiped, run the following command. 1 2 3 4 5 6 # This command hangs on some systems: with caution, 'dmsetup remove_all --force' can be used ls /dev/mapper/ceph-* | xargs -I% -- dmsetup remove % # ceph-volume setup can leave ceph- directories in /dev and /dev/mapper ( unnecessary clutter ) rm -rf /dev/ceph-* rm -rf /dev/mapper/ceph--* If disks are still reported locked, rebooting the node often helps clear LVM-related holds on disks. If there are multiple Ceph clusters and some disks are not wiped yet, it is necessary to manually determine which disks map to which device mapper devices. Troubleshooting \u00b6 If the cleanup instructions are not executed in the order above, or you otherwise have difficulty cleaning up the cluster, here are a few things to try. The most common issue cleaning up the cluster is that the rook-ceph namespace or the cluster CRD remain indefinitely in the terminating state. A namespace cannot be removed until all of its resources are removed, so look at which resources are pending termination. Look at the pods: 1 kubectl -n rook-ceph get pod If a pod is still terminating, you will need to wait or else attempt to forcefully terminate it ( kubectl delete pod  ). Now look at the cluster CRD: 1 kubectl -n rook-ceph get cephcluster If the cluster CRD still exists even though you have executed the delete command earlier, see the next section on removing the finalizer. Removing the Cluster CRD Finalizer \u00b6 When a Cluster CRD is created, a finalizer is added automatically by the Rook operator. The finalizer will allow the operator to ensure that before the cluster CRD is deleted, all block and file mounts will be cleaned up. Without proper cleanup, pods consuming the storage will be hung indefinitely until a system reboot. The operator is responsible for removing the finalizer after the mounts have been cleaned up. If for some reason the operator is not able to remove the finalizer (i.e., the operator is not running anymore), you can delete the finalizer manually with the following command: 1 2 3 4 for CRD in $(kubectl get crd -n rook-ceph | awk '/ceph.rook.io/ {print $1}'); do kubectl get -n rook-ceph \"$CRD\" -o name | \\ xargs -I {} kubectl patch -n rook-ceph {} --type merge -p '{\"metadata\":{\"finalizers\": []}}' done This command will patch the following CRDs on v1.3: 1 2 3 4 5 6 cephblockpools.ceph.rook.io cephclients.ceph.rook.io cephfilesystems.ceph.rook.io cephnfses.ceph.rook.io cephobjectstores.ceph.rook.io cephobjectstoreusers.ceph.rook.io Within a few seconds you should see that the cluster CRD has been deleted and will no longer block other cleanup such as deleting the rook-ceph namespace. If the namespace is still stuck in Terminating state, you can check which resources are holding up the deletion and remove the finalizers and delete those 1 2 kubectl api-resources --verbs=list --namespaced -o name \\ | xargs -n 1 kubectl get --show-kind --ignore-not-found -n rook-ceph Remove critical resource finalizers \u00b6 Rook adds a finalizer ceph.rook.io/disaster-protection to resources critical to the Ceph cluster so that the resources will not be accidentally deleted. The operator is responsible for removing the finalizers when a CephCluster is deleted. If for some reason the operator is not able to remove the finalizers (i.e., the operator is not running anymore), you can remove the finalizers manually with the following commands: 1 2 kubectl -n rook-ceph patch configmap rook-ceph-mon-endpoints --type merge -p '{\"metadata\":{\"finalizers\": []}}' kubectl -n rook-ceph patch secrets rook-ceph-mon --type merge -p '{\"metadata\":{\"finalizers\": []}}'","title":"Cleanup"},{"location":"Getting-Started/ceph-teardown/#cleaning-up-a-cluster","text":"If you want to tear down the cluster and bring up a new one, be aware of the following resources that will need to be cleaned up: The resources created under Rook's namespace (assume rook-ceph here): The Rook operator and cluster created by operator.yaml and cluster.yaml (the cluster CRD) /var/lib/rook/rook-ceph : Path on each host in the cluster where configuration is cached by the ceph mons and osds Note that if you changed the default namespaces or paths such as dataDirHostPath in the sample yaml files, you will need to adjust these namespaces and paths throughout these instructions. If you see issues tearing down the cluster, see the Troubleshooting section below. If you are tearing down a cluster frequently for development purposes, it is instead recommended to use an environment such as Minikube that can easily be reset without worrying about any of these steps.","title":"Cleaning up a Cluster"},{"location":"Getting-Started/ceph-teardown/#delete-the-block-and-file-artifacts","text":"First you will need to clean up the resources created on top of the Rook cluster. These commands will clean up the resources from the block and file walkthroughs (unmount volumes, delete volume claims, etc). If you did not complete those parts of the walkthrough, you can skip these instructions: 1 2 3 4 5 6 kubectl delete -f ../wordpress.yaml kubectl delete -f ../mysql.yaml kubectl delete -n rook-ceph cephblockpool replicapool kubectl delete storageclass rook-ceph-block kubectl delete -f csi/cephfs/kube-registry.yaml kubectl delete storageclass csi-cephfs After those block and file resources have been cleaned up, you can then delete your Rook cluster. This is important to delete before removing the Rook operator and agent or else resources may not be cleaned up properly .","title":"Delete the Block and File artifacts"},{"location":"Getting-Started/ceph-teardown/#delete-the-cephcluster-crd","text":"Edit the CephCluster and add the cleanupPolicy WARNING: DATA WILL BE PERMANENTLY DELETED AFTER DELETING THE CephCluster CR WITH cleanupPolicy . 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Once the cleanup policy is enabled, any new configuration changes in the CephCluster will be blocked. Nothing will happen until the deletion of the CR is requested, so this cleanupPolicy change can still be reverted if needed. Checkout more details about the cleanupPolicy here Delete the CephCluster CR. 1 kubectl -n rook-ceph delete cephcluster rook-ceph Verify that the cluster CR has been deleted before continuing to the next step. 1 kubectl -n rook-ceph get cephcluster If the cleanupPolicy was applied, then wait for the rook-ceph-cleanup jobs to be completed on all the nodes. These jobs will perform the following operations: Delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph , on all the nodes Wipe the data on the drives on all the nodes where OSDs were running in this cluster Note: The cleanup jobs might not start if the resources created on top of Rook Cluster are not deleted completely. See","title":"Delete the CephCluster CRD"},{"location":"Getting-Started/ceph-teardown/#delete-the-operator-and-related-resources","text":"This will begin the process of the Rook Ceph operator and all other resources being cleaned up. This includes related resources such as the agent and discover daemonsets with the following commands: 1 2 3 kubectl delete -f operator.yaml kubectl delete -f common.yaml kubectl delete -f crds.yaml If the cleanupPolicy was applied and the cleanup jobs have completed on all the nodes, then the cluster tear down has been successful. If you skipped adding the cleanupPolicy then follow the manual steps mentioned below to tear down the cluster.","title":"Delete the Operator and related Resources"},{"location":"Getting-Started/ceph-teardown/#delete-the-data-on-hosts","text":"Attention The final cleanup step requires deleting files on each host in the cluster. All files under the dataDirHostPath property specified in the cluster CRD will need to be deleted. Otherwise, inconsistent state will remain when a new cluster is started. Connect to each machine and delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph . In the future this step will not be necessary when we build on the K8s local storage feature. If you modified the demo settings, additional cleanup is up to you for devices, host paths, etc.","title":"Delete the data on hosts"},{"location":"Getting-Started/ceph-teardown/#zapping-devices","text":"Disks on nodes used by Rook for osds can be reset to a usable state with methods suggested below. Note that these scripts are not one-size-fits-all. Please use them with discretion to ensure you are not removing data unrelated to Rook and/or Ceph. Disks can be zapped fairly easily. A single disk can usually be cleared with some or all of the steps below. 1 2 3 4 5 6 7 8 9 10 11 12 13 DISK=\"/dev/sdX\" # Zap the disk to a fresh, usable state ( zap-all is important, b/c MBR has to be clean ) sgdisk --zap-all $DISK # Wipe a large portion of the beginning of the disk to remove more LVM metadata that may be present dd if=/dev/zero of=\"$DISK\" bs=1M count=100 oflag=direct,dsync # SSDs may be better cleaned with blkdiscard instead of dd blkdiscard $DISK # Inform the OS of partition table changes partprobe $DISK Ceph can leave LVM and device mapper data that can lock the disks, preventing the disks from being used again. These steps can help to free up old Ceph disks for re-use. Note that this only needs to be run once on each node. If you have only one Rook cluster and all Ceph disks are being wiped, run the following command. 1 2 3 4 5 6 # This command hangs on some systems: with caution, 'dmsetup remove_all --force' can be used ls /dev/mapper/ceph-* | xargs -I% -- dmsetup remove % # ceph-volume setup can leave ceph- directories in /dev and /dev/mapper ( unnecessary clutter ) rm -rf /dev/ceph-* rm -rf /dev/mapper/ceph--* If disks are still reported locked, rebooting the node often helps clear LVM-related holds on disks. If there are multiple Ceph clusters and some disks are not wiped yet, it is necessary to manually determine which disks map to which device mapper devices.","title":"Zapping Devices"},{"location":"Getting-Started/ceph-teardown/#troubleshooting","text":"If the cleanup instructions are not executed in the order above, or you otherwise have difficulty cleaning up the cluster, here are a few things to try. The most common issue cleaning up the cluster is that the rook-ceph namespace or the cluster CRD remain indefinitely in the terminating state. A namespace cannot be removed until all of its resources are removed, so look at which resources are pending termination. Look at the pods: 1 kubectl -n rook-ceph get pod If a pod is still terminating, you will need to wait or else attempt to forcefully terminate it ( kubectl delete pod  ). Now look at the cluster CRD: 1 kubectl -n rook-ceph get cephcluster If the cluster CRD still exists even though you have executed the delete command earlier, see the next section on removing the finalizer.","title":"Troubleshooting"},{"location":"Getting-Started/ceph-teardown/#removing-the-cluster-crd-finalizer","text":"When a Cluster CRD is created, a finalizer is added automatically by the Rook operator. The finalizer will allow the operator to ensure that before the cluster CRD is deleted, all block and file mounts will be cleaned up. Without proper cleanup, pods consuming the storage will be hung indefinitely until a system reboot. The operator is responsible for removing the finalizer after the mounts have been cleaned up. If for some reason the operator is not able to remove the finalizer (i.e., the operator is not running anymore), you can delete the finalizer manually with the following command: 1 2 3 4 for CRD in $(kubectl get crd -n rook-ceph | awk '/ceph.rook.io/ {print $1}'); do kubectl get -n rook-ceph \"$CRD\" -o name | \\ xargs -I {} kubectl patch -n rook-ceph {} --type merge -p '{\"metadata\":{\"finalizers\": []}}' done This command will patch the following CRDs on v1.3: 1 2 3 4 5 6 cephblockpools.ceph.rook.io cephclients.ceph.rook.io cephfilesystems.ceph.rook.io cephnfses.ceph.rook.io cephobjectstores.ceph.rook.io cephobjectstoreusers.ceph.rook.io Within a few seconds you should see that the cluster CRD has been deleted and will no longer block other cleanup such as deleting the rook-ceph namespace. If the namespace is still stuck in Terminating state, you can check which resources are holding up the deletion and remove the finalizers and delete those 1 2 kubectl api-resources --verbs=list --namespaced -o name \\ | xargs -n 1 kubectl get --show-kind --ignore-not-found -n rook-ceph","title":"Removing the Cluster CRD Finalizer"},{"location":"Getting-Started/ceph-teardown/#remove-critical-resource-finalizers","text":"Rook adds a finalizer ceph.rook.io/disaster-protection to resources critical to the Ceph cluster so that the resources will not be accidentally deleted. The operator is responsible for removing the finalizers when a CephCluster is deleted. If for some reason the operator is not able to remove the finalizers (i.e., the operator is not running anymore), you can remove the finalizers manually with the following commands: 1 2 kubectl -n rook-ceph patch configmap rook-ceph-mon-endpoints --type merge -p '{\"metadata\":{\"finalizers\": []}}' kubectl -n rook-ceph patch secrets rook-ceph-mon --type merge -p '{\"metadata\":{\"finalizers\": []}}'","title":"Remove critical resource finalizers"},{"location":"Getting-Started/example-configurations/","text":"Configuration for Rook and Ceph can be configured in multiple ways to provide block devices, shared filesystem volumes or object storage in a kubernetes namespace. While several examples are provided to simplify storage setup, settings are available to optimize various production environments. See the example yaml files folder for all the rook/ceph setup example spec files. Common Resources \u00b6 The first step to deploy Rook is to create the CRDs and other common resources. The configuration for these resources will be the same for most deployments. The crds.yaml and common.yaml sets these resources up. 1 kubectl create -f crds.yaml -f common.yaml The examples all assume the operator and all Ceph daemons will be started in the same namespace. If deploying the operator in a separate namespace, see the comments throughout common.yaml . Operator \u00b6 After the common resources are created, the next step is to create the Operator deployment. Several spec file examples are provided in this directory : operator.yaml : The most common settings for production deployments kubectl create -f operator.yaml operator-openshift.yaml : Includes all of the operator settings for running a basic Rook cluster in an OpenShift environment. You will also want to review the OpenShift Prerequisites to confirm the settings. oc create -f operator-openshift.yaml Settings for the operator are configured through environment variables on the operator deployment. The individual settings are documented in operator.yaml . Cluster CRD \u00b6 Now that the operator is running, create the Ceph storage cluster with the CephCluster CR. This CR contains the most critical settings that will influence how the operator configures the storage. It is important to understand the various ways to configure the cluster. These examples represent several different ways to configure the storage. cluster.yaml : Common settings for a production storage cluster. Requires at least three worker nodes. cluster-test.yaml : Settings for a test cluster where redundancy is not configured. Requires only a single node. cluster-on-pvc.yaml : Common settings for backing the Ceph Mons and OSDs by PVs. Useful when running in cloud environments or where local PVs have been created for Ceph to consume. cluster-external.yaml : Connect to an external Ceph cluster with minimal access to monitor the health of the cluster and connect to the storage. cluster-external-management.yaml : Connect to an external Ceph cluster with the admin key of the external cluster to enable remote creation of pools and configure services such as an Object Store or a Shared Filesystem . cluster-stretched.yaml : Create a cluster in \"stretched\" mode, with five mons stretched across three zones, and the OSDs across two zones. See the Stretch documentation . See the Cluster CRD topic for more details and more examples for the settings. Setting up consumable storage \u00b6 Now we are ready to setup Block, Shared Filesystem or Object storage in the Rook cluster. These storage types are respectively created with the CephBlockPool, CephFilesystem and CephObjectStore CRs. Block Devices \u00b6 Ceph provides raw block device volumes to pods. Each example below sets up a storage class which can then be used to provision a block device in application pods. The storage class is defined with a Ceph pool which defines the level of data redundancy in Ceph: storageclass.yaml : This example illustrates replication of 3 for production scenarios and requires at least three worker nodes. Data is replicated on three different kubernetes worker nodes. Intermittent or long-lasting single node failures will not result in data unavailability or loss. storageclass-ec.yaml : Configures erasure coding for data durability rather than replication. Ceph's erasure coding is more efficient than replication so you can get high reliability without the 3x replication cost of the preceding example (but at the cost of higher computational encoding and decoding costs on the worker nodes). Erasure coding requires at least three worker nodes. See the Erasure coding documentation. storageclass-test.yaml : Replication of 1 for test scenarios. Requires only a single node. Do not use this for production applications. A single node failure can result in full data loss. The block storage classes are found in the examples directory: csi/rbd : the CSI driver examples for block devices See the CephBlockPool CRD topic for more block storage settings. Shared Filesystem \u00b6 Ceph filesystem (CephFS) allows the user to mount a shared posix-compliant folder into one or more application pods. This storage is similar to NFS shared storage or CIFS shared folders, as explained here . Shared Filesystem storage contains configurable pools for different scenarios: filesystem.yaml : Replication of 3 for production scenarios. Requires at least three worker nodes. filesystem-ec.yaml : Erasure coding for production scenarios. Requires at least three worker nodes. filesystem-test.yaml : Replication of 1 for test scenarios. Requires only a single node. Dynamic provisioning is possible with the CSI driver. The storage class for shared filesystems is found in the csi/cephfs directory. See the Shared Filesystem CRD topic for more details on the settings. Object Storage \u00b6 Ceph supports storing blobs of data called objects that support HTTP(s)-type get/put/post and delete semantics. This storage is similar to AWS S3 storage, for example. Object storage contains multiple pools that can be configured for different scenarios: object.yaml : Replication of 3 for production scenarios. Requires at least three worker nodes. object-openshift.yaml : Replication of 3 with rgw in a port range valid for OpenShift. Requires at least three worker nodes. object-ec.yaml : Erasure coding rather than replication for production scenarios. Requires at least three worker nodes. object-test.yaml : Replication of 1 for test scenarios. Requires only a single node. See the Object Store CRD topic for more details on the settings. Object Storage User \u00b6 object-user.yaml : Creates a simple object storage user and generates credentials for the S3 API Object Storage Buckets \u00b6 The Ceph operator also runs an object store bucket provisioner which can grant access to existing buckets or dynamically provision new buckets. object-bucket-claim-retain.yaml Creates a request for a new bucket by referencing a StorageClass which saves the bucket when the initiating OBC is deleted. object-bucket-claim-delete.yaml Creates a request for a new bucket by referencing a StorageClass which deletes the bucket when the initiating OBC is deleted. storageclass-bucket-retain.yaml Creates a new StorageClass which defines the Ceph Object Store and retains the bucket after the initiating OBC is deleted. storageclass-bucket-delete.yaml Creates a new StorageClass which defines the Ceph Object Store and deletes the bucket after the initiating OBC is deleted.","title":"Example Configurations"},{"location":"Getting-Started/example-configurations/#common-resources","text":"The first step to deploy Rook is to create the CRDs and other common resources. The configuration for these resources will be the same for most deployments. The crds.yaml and common.yaml sets these resources up. 1 kubectl create -f crds.yaml -f common.yaml The examples all assume the operator and all Ceph daemons will be started in the same namespace. If deploying the operator in a separate namespace, see the comments throughout common.yaml .","title":"Common Resources"},{"location":"Getting-Started/example-configurations/#operator","text":"After the common resources are created, the next step is to create the Operator deployment. Several spec file examples are provided in this directory : operator.yaml : The most common settings for production deployments kubectl create -f operator.yaml operator-openshift.yaml : Includes all of the operator settings for running a basic Rook cluster in an OpenShift environment. You will also want to review the OpenShift Prerequisites to confirm the settings. oc create -f operator-openshift.yaml Settings for the operator are configured through environment variables on the operator deployment. The individual settings are documented in operator.yaml .","title":"Operator"},{"location":"Getting-Started/example-configurations/#cluster-crd","text":"Now that the operator is running, create the Ceph storage cluster with the CephCluster CR. This CR contains the most critical settings that will influence how the operator configures the storage. It is important to understand the various ways to configure the cluster. These examples represent several different ways to configure the storage. cluster.yaml : Common settings for a production storage cluster. Requires at least three worker nodes. cluster-test.yaml : Settings for a test cluster where redundancy is not configured. Requires only a single node. cluster-on-pvc.yaml : Common settings for backing the Ceph Mons and OSDs by PVs. Useful when running in cloud environments or where local PVs have been created for Ceph to consume. cluster-external.yaml : Connect to an external Ceph cluster with minimal access to monitor the health of the cluster and connect to the storage. cluster-external-management.yaml : Connect to an external Ceph cluster with the admin key of the external cluster to enable remote creation of pools and configure services such as an Object Store or a Shared Filesystem . cluster-stretched.yaml : Create a cluster in \"stretched\" mode, with five mons stretched across three zones, and the OSDs across two zones. See the Stretch documentation . See the Cluster CRD topic for more details and more examples for the settings.","title":"Cluster CRD"},{"location":"Getting-Started/example-configurations/#setting-up-consumable-storage","text":"Now we are ready to setup Block, Shared Filesystem or Object storage in the Rook cluster. These storage types are respectively created with the CephBlockPool, CephFilesystem and CephObjectStore CRs.","title":"Setting up consumable storage"},{"location":"Getting-Started/example-configurations/#block-devices","text":"Ceph provides raw block device volumes to pods. Each example below sets up a storage class which can then be used to provision a block device in application pods. The storage class is defined with a Ceph pool which defines the level of data redundancy in Ceph: storageclass.yaml : This example illustrates replication of 3 for production scenarios and requires at least three worker nodes. Data is replicated on three different kubernetes worker nodes. Intermittent or long-lasting single node failures will not result in data unavailability or loss. storageclass-ec.yaml : Configures erasure coding for data durability rather than replication. Ceph's erasure coding is more efficient than replication so you can get high reliability without the 3x replication cost of the preceding example (but at the cost of higher computational encoding and decoding costs on the worker nodes). Erasure coding requires at least three worker nodes. See the Erasure coding documentation. storageclass-test.yaml : Replication of 1 for test scenarios. Requires only a single node. Do not use this for production applications. A single node failure can result in full data loss. The block storage classes are found in the examples directory: csi/rbd : the CSI driver examples for block devices See the CephBlockPool CRD topic for more block storage settings.","title":"Block Devices"},{"location":"Getting-Started/example-configurations/#shared-filesystem","text":"Ceph filesystem (CephFS) allows the user to mount a shared posix-compliant folder into one or more application pods. This storage is similar to NFS shared storage or CIFS shared folders, as explained here . Shared Filesystem storage contains configurable pools for different scenarios: filesystem.yaml : Replication of 3 for production scenarios. Requires at least three worker nodes. filesystem-ec.yaml : Erasure coding for production scenarios. Requires at least three worker nodes. filesystem-test.yaml : Replication of 1 for test scenarios. Requires only a single node. Dynamic provisioning is possible with the CSI driver. The storage class for shared filesystems is found in the csi/cephfs directory. See the Shared Filesystem CRD topic for more details on the settings.","title":"Shared Filesystem"},{"location":"Getting-Started/example-configurations/#object-storage","text":"Ceph supports storing blobs of data called objects that support HTTP(s)-type get/put/post and delete semantics. This storage is similar to AWS S3 storage, for example. Object storage contains multiple pools that can be configured for different scenarios: object.yaml : Replication of 3 for production scenarios. Requires at least three worker nodes. object-openshift.yaml : Replication of 3 with rgw in a port range valid for OpenShift. Requires at least three worker nodes. object-ec.yaml : Erasure coding rather than replication for production scenarios. Requires at least three worker nodes. object-test.yaml : Replication of 1 for test scenarios. Requires only a single node. See the Object Store CRD topic for more details on the settings.","title":"Object Storage"},{"location":"Getting-Started/example-configurations/#object-storage-user","text":"object-user.yaml : Creates a simple object storage user and generates credentials for the S3 API","title":"Object Storage User"},{"location":"Getting-Started/example-configurations/#object-storage-buckets","text":"The Ceph operator also runs an object store bucket provisioner which can grant access to existing buckets or dynamically provision new buckets. object-bucket-claim-retain.yaml Creates a request for a new bucket by referencing a StorageClass which saves the bucket when the initiating OBC is deleted. object-bucket-claim-delete.yaml Creates a request for a new bucket by referencing a StorageClass which deletes the bucket when the initiating OBC is deleted. storageclass-bucket-retain.yaml Creates a new StorageClass which defines the Ceph Object Store and retains the bucket after the initiating OBC is deleted. storageclass-bucket-delete.yaml Creates a new StorageClass which defines the Ceph Object Store and deletes the bucket after the initiating OBC is deleted.","title":"Object Storage Buckets"},{"location":"Getting-Started/glossary/","text":"Glossary \u00b6 Rook \u00b6 CephBlockPool CRD \u00b6 The CephBlockPool CRD is used by Rook to allow creation and customization of storage pools. CephBlockPoolRadosNamespace CRD \u00b6 The CephBlockPoolRadosNamespace CRD is used by Rook to allow creation of Ceph RADOS Namespaces. CephClient CRD \u00b6 CephClient CRD is used by Rook to allow creation and updating clients. CephCluster CRD \u00b6 The CephCluster CRD is used by Rook to allow creation and customization of storage clusters through the custom resource definitions (CRDs). Ceph CSI \u00b6 The Ceph CSI plugins implement an interface between a CSI-enabled Container Orchestrator (CO) and Ceph clusters. CephFilesystem CRD \u00b6 The CephFilesystem CRD is used by Rook to allow creation and customization of shared filesystems through the custom resource definitions (CRDs). CephFilesystemMirror CRD \u00b6 The CephFilesystemMirror CRD is used by Rook to allow creation and updating the Ceph fs-mirror daemon. CephFilesystemSubVolumeGroup CRD \u00b6 CephFilesystemMirror CRD is used by Rook to allow creation of Ceph Filesystem SubVolumeGroups. CephNFS CRD \u00b6 CephNFS CRD is used by Rook to allow exporting NFS shares of a CephFilesystem or CephObjectStore through the CephNFS custom resource definition. For further information please refer to the example here . CephObjectStore CRD \u00b6 CephObjectStore CRD is used by Rook to allow creation and customization of object stores. CephObjectStoreUser CRD \u00b6 CephObjectStoreUser CRD is used by Rook to allow creation and customization of object store users. For more information and examples refer to this documentation . CephObjectRealm CRD \u00b6 CephObjectRealm CRD is used by Rook to allow creation of a realm in a Ceph Object Multisite configuration. For more information and examples refer to this documentation . CephObjectZoneGroup CRD \u00b6 CephObjectZoneGroup CRD is used by Rook to allow creation of zone groups in a Ceph Object Multisite configuration. For more information and examples refer to this documentation . CephObjectZone CRD \u00b6 CephObjectZone CRD is used by Rook to allow creation of zones in a ceph cluster for a Ceph Object Multisite configuration. For more information and examples refer to this documentation . CephRBDMirror CRD \u00b6 CephRBDMirror CRD is used by Rook to allow creation and updating rbd-mirror daemon(s) through the custom resource definitions (CRDs). For more information and examples refer to this documentation . External Storage Cluster \u00b6 An external cluster is a Ceph configuration that is managed outside of the local K8s cluster. Host Storage Cluster \u00b6 A host storage cluster is where Rook configures Ceph to store data directly on the host devices. Krew Plugin \u00b6 The Rook Krew plugin is a tool to help troubleshoot your Rook cluster. Object Bucket Claim (OBC) \u00b6 An Object Bucket Claim (OBC) is custom resource which requests a bucket (new or existing) from a Ceph object store. For further reference please refer to OBC Custom Resource . Object Bucket (OB) \u00b6 An Object Bucket (OB) is a custom resource automatically generated when a bucket is provisioned. It is a global resource, typically not visible to non-admin users, and contains information specific to the bucket. OpenShift \u00b6 OpenShift Container Platform is a distribution of the Kubernetes container platform. PVC Storage Cluster \u00b6 In a PersistentVolumeClaim-based cluster , the Ceph persistent data is stored on volumes requested from a storage class of your choice. Stretch Storage Cluster \u00b6 A stretched cluster is a deployment model in which two datacenters with low latency are available for storage in the same K8s cluster, rather than three or more. To support this scenario, Rook has integrated support for stretch clusters . Toolbox \u00b6 The Rook toolbox is a container with common tools used for rook debugging and testing. Ceph \u00b6 Ceph is a distributed network storage and file system with distributed metadata management and POSIX semantics. See also the Ceph Glossary . Here are a few of the important terms to understand: Ceph Monitor (MON) Ceph Manager (MGR) Ceph Metadata Server (MDS) Object Storage Device (OSD) RADOS Block Device (RBD) Ceph Object Gateway (RGW) Kubernetes \u00b6 Kubernetes, also known as K8s, is an open-source system for automating deployment, scaling, and management of containerized applications. For further information see also the Kubernetes Glossary for more definitions. Here are a few of the important terms to understand: Affinity Container Storage Interface (CSI) for Kubernetes CustomResourceDefinition (CRDs) DaemonSet Deployment Finalizer Node affinity Node Selector PersistentVolume (PV) PersistentVolumeClaim (PVC) Selector Storage Class Taint Toleration Volume","title":"Glossary"},{"location":"Getting-Started/glossary/#glossary","text":"","title":"Glossary"},{"location":"Getting-Started/glossary/#rook","text":"","title":"Rook"},{"location":"Getting-Started/glossary/#cephblockpool-crd","text":"The CephBlockPool CRD is used by Rook to allow creation and customization of storage pools.","title":"CephBlockPool CRD"},{"location":"Getting-Started/glossary/#cephblockpoolradosnamespace-crd","text":"The CephBlockPoolRadosNamespace CRD is used by Rook to allow creation of Ceph RADOS Namespaces.","title":"CephBlockPoolRadosNamespace CRD"},{"location":"Getting-Started/glossary/#cephclient-crd","text":"CephClient CRD is used by Rook to allow creation and updating clients.","title":"CephClient CRD"},{"location":"Getting-Started/glossary/#cephcluster-crd","text":"The CephCluster CRD is used by Rook to allow creation and customization of storage clusters through the custom resource definitions (CRDs).","title":"CephCluster CRD"},{"location":"Getting-Started/glossary/#ceph-csi","text":"The Ceph CSI plugins implement an interface between a CSI-enabled Container Orchestrator (CO) and Ceph clusters.","title":"Ceph CSI"},{"location":"Getting-Started/glossary/#cephfilesystem-crd","text":"The CephFilesystem CRD is used by Rook to allow creation and customization of shared filesystems through the custom resource definitions (CRDs).","title":"CephFilesystem CRD"},{"location":"Getting-Started/glossary/#cephfilesystemmirror-crd","text":"The CephFilesystemMirror CRD is used by Rook to allow creation and updating the Ceph fs-mirror daemon.","title":"CephFilesystemMirror CRD"},{"location":"Getting-Started/glossary/#cephfilesystemsubvolumegroup-crd","text":"CephFilesystemMirror CRD is used by Rook to allow creation of Ceph Filesystem SubVolumeGroups.","title":"CephFilesystemSubVolumeGroup CRD"},{"location":"Getting-Started/glossary/#cephnfs-crd","text":"CephNFS CRD is used by Rook to allow exporting NFS shares of a CephFilesystem or CephObjectStore through the CephNFS custom resource definition. For further information please refer to the example here .","title":"CephNFS CRD"},{"location":"Getting-Started/glossary/#cephobjectstore-crd","text":"CephObjectStore CRD is used by Rook to allow creation and customization of object stores.","title":"CephObjectStore CRD"},{"location":"Getting-Started/glossary/#cephobjectstoreuser-crd","text":"CephObjectStoreUser CRD is used by Rook to allow creation and customization of object store users. For more information and examples refer to this documentation .","title":"CephObjectStoreUser CRD"},{"location":"Getting-Started/glossary/#cephobjectrealm-crd","text":"CephObjectRealm CRD is used by Rook to allow creation of a realm in a Ceph Object Multisite configuration. For more information and examples refer to this documentation .","title":"CephObjectRealm CRD"},{"location":"Getting-Started/glossary/#cephobjectzonegroup-crd","text":"CephObjectZoneGroup CRD is used by Rook to allow creation of zone groups in a Ceph Object Multisite configuration. For more information and examples refer to this documentation .","title":"CephObjectZoneGroup CRD"},{"location":"Getting-Started/glossary/#cephobjectzone-crd","text":"CephObjectZone CRD is used by Rook to allow creation of zones in a ceph cluster for a Ceph Object Multisite configuration. For more information and examples refer to this documentation .","title":"CephObjectZone CRD"},{"location":"Getting-Started/glossary/#cephrbdmirror-crd","text":"CephRBDMirror CRD is used by Rook to allow creation and updating rbd-mirror daemon(s) through the custom resource definitions (CRDs). For more information and examples refer to this documentation .","title":"CephRBDMirror CRD"},{"location":"Getting-Started/glossary/#external-storage-cluster","text":"An external cluster is a Ceph configuration that is managed outside of the local K8s cluster.","title":"External Storage Cluster"},{"location":"Getting-Started/glossary/#host-storage-cluster","text":"A host storage cluster is where Rook configures Ceph to store data directly on the host devices.","title":"Host Storage Cluster"},{"location":"Getting-Started/glossary/#krew-plugin","text":"The Rook Krew plugin is a tool to help troubleshoot your Rook cluster.","title":"Krew Plugin"},{"location":"Getting-Started/glossary/#object-bucket-claim-obc","text":"An Object Bucket Claim (OBC) is custom resource which requests a bucket (new or existing) from a Ceph object store. For further reference please refer to OBC Custom Resource .","title":"Object Bucket Claim (OBC)"},{"location":"Getting-Started/glossary/#object-bucket-ob","text":"An Object Bucket (OB) is a custom resource automatically generated when a bucket is provisioned. It is a global resource, typically not visible to non-admin users, and contains information specific to the bucket.","title":"Object Bucket (OB)"},{"location":"Getting-Started/glossary/#openshift","text":"OpenShift Container Platform is a distribution of the Kubernetes container platform.","title":"OpenShift"},{"location":"Getting-Started/glossary/#pvc-storage-cluster","text":"In a PersistentVolumeClaim-based cluster , the Ceph persistent data is stored on volumes requested from a storage class of your choice.","title":"PVC Storage Cluster"},{"location":"Getting-Started/glossary/#stretch-storage-cluster","text":"A stretched cluster is a deployment model in which two datacenters with low latency are available for storage in the same K8s cluster, rather than three or more. To support this scenario, Rook has integrated support for stretch clusters .","title":"Stretch Storage Cluster"},{"location":"Getting-Started/glossary/#toolbox","text":"The Rook toolbox is a container with common tools used for rook debugging and testing.","title":"Toolbox"},{"location":"Getting-Started/glossary/#ceph","text":"Ceph is a distributed network storage and file system with distributed metadata management and POSIX semantics. See also the Ceph Glossary . Here are a few of the important terms to understand: Ceph Monitor (MON) Ceph Manager (MGR) Ceph Metadata Server (MDS) Object Storage Device (OSD) RADOS Block Device (RBD) Ceph Object Gateway (RGW)","title":"Ceph"},{"location":"Getting-Started/glossary/#kubernetes","text":"Kubernetes, also known as K8s, is an open-source system for automating deployment, scaling, and management of containerized applications. For further information see also the Kubernetes Glossary for more definitions. Here are a few of the important terms to understand: Affinity Container Storage Interface (CSI) for Kubernetes CustomResourceDefinition (CRDs) DaemonSet Deployment Finalizer Node affinity Node Selector PersistentVolume (PV) PersistentVolumeClaim (PVC) Selector Storage Class Taint Toleration Volume","title":"Kubernetes"},{"location":"Getting-Started/intro/","text":"Rook \u00b6 Rook is an open source cloud-native storage orchestrator , providing the platform, framework, and support for Ceph storage to natively integrate with cloud-native environments. Ceph is a distributed storage system that provides file, block and object storage and is deployed in large scale production clusters. Rook automates deployment and management of Ceph to provide self-managing, self-scaling, and self-healing storage services. The Rook operator does this by building on Kubernetes resources to deploy, configure, provision, scale, upgrade, and monitor Ceph. The Ceph operator was declared stable in December 2018 in the Rook v0.9 release, providing a production storage platform for many years. Rook is hosted by the Cloud Native Computing Foundation (CNCF) as a graduated level project. Quick Start Guide \u00b6 Starting Ceph in your cluster is as simple as a few kubectl commands. See our Quickstart guide to get started with the Ceph operator! Designs \u00b6 Ceph is a highly scalable distributed storage solution for block storage, object storage, and shared filesystems with years of production deployments. See the Ceph overview . For detailed design documentation, see also the design docs . Need help? Be sure to join the Rook Slack \u00b6 If you have any questions along the way, don't hesitate to ask in our Slack channel . Sign up for the Rook Slack here .","title":"Rook"},{"location":"Getting-Started/intro/#rook","text":"Rook is an open source cloud-native storage orchestrator , providing the platform, framework, and support for Ceph storage to natively integrate with cloud-native environments. Ceph is a distributed storage system that provides file, block and object storage and is deployed in large scale production clusters. Rook automates deployment and management of Ceph to provide self-managing, self-scaling, and self-healing storage services. The Rook operator does this by building on Kubernetes resources to deploy, configure, provision, scale, upgrade, and monitor Ceph. The Ceph operator was declared stable in December 2018 in the Rook v0.9 release, providing a production storage platform for many years. Rook is hosted by the Cloud Native Computing Foundation (CNCF) as a graduated level project.","title":"Rook"},{"location":"Getting-Started/intro/#quick-start-guide","text":"Starting Ceph in your cluster is as simple as a few kubectl commands. See our Quickstart guide to get started with the Ceph operator!","title":"Quick Start Guide"},{"location":"Getting-Started/intro/#designs","text":"Ceph is a highly scalable distributed storage solution for block storage, object storage, and shared filesystems with years of production deployments. See the Ceph overview . For detailed design documentation, see also the design docs .","title":"Designs"},{"location":"Getting-Started/intro/#need-help-be-sure-to-join-the-rook-slack","text":"If you have any questions along the way, don't hesitate to ask in our Slack channel . Sign up for the Rook Slack here .","title":"Need help? Be sure to join the Rook Slack"},{"location":"Getting-Started/quickstart/","text":"Welcome to Rook! We hope you have a great experience installing the Rook cloud-native storage orchestrator platform to enable highly available, durable Ceph storage in Kubernetes clusters. Don't hesitate to ask questions in our Slack channel . Sign up for the Rook Slack here . This guide will walk through the basic setup of a Ceph cluster and enable K8s applications to consume block, object, and file storage. Always use a virtual machine when testing Rook. Never use a host system where local devices may mistakenly be consumed. Minimum Version \u00b6 Kubernetes v1.22 or higher is supported by Rook. CPU Architecture \u00b6 Architectures released are amd64 / x86_64 and arm64 . Prerequisites \u00b6 To check if a Kubernetes cluster is ready for Rook , see the prerequisites . To configure the Ceph storage cluster, at least one of these local storage options are required: Raw devices (no partitions or formatted filesystems) Raw partitions (no formatted filesystem) LVM Logical Volumes (no formatted filesystem) Persistent Volumes available from a storage class in block mode TL;DR \u00b6 A simple Rook cluster is created for Kubernetes with the following kubectl commands and example manifests . 1 2 3 4 $ git clone --single-branch --branch master https://github.com/rook/rook.git cd rook/deploy/examples kubectl create -f crds.yaml -f common.yaml -f operator.yaml kubectl create -f cluster.yaml After the cluster is running, applications can consume block, object, or file storage. Deploy the Rook Operator \u00b6 The first step is to deploy the Rook operator. Important The Rook Helm Chart is available to deploy the operator instead of creating the below manifests. Note Check that the example yaml files are from a tagged release of Rook. Note These steps are for a standard production Rook deployment in Kubernetes. For Openshift, testing, or more options, see the example configurations documentation . 1 2 3 4 5 cd deploy/examples kubectl create -f crds.yaml -f common.yaml -f operator.yaml # verify the rook-ceph-operator is in the ` Running ` state before proceeding kubectl -n rook-ceph get pod Before starting the operator in production, consider these settings: Some Rook features are disabled by default. See the operator.yaml for these and other advanced settings. Device discovery: Rook will watch for new devices to configure if the ROOK_ENABLE_DISCOVERY_DAEMON setting is enabled, commonly used in bare metal clusters. Node affinity and tolerations: The CSI driver by default will run on any node in the cluster. To restrict the CSI driver affinity, several settings are available. If deploying Rook into a namespace other than the default rook-ceph , see the topic on using an alternative namespace . Cluster Environments \u00b6 The Rook documentation is focused around starting Rook in a variety of environments. While creating the cluster in this guide, consider these example cluster manifests: cluster.yaml : Cluster settings for a production cluster running on bare metal. Requires at least three worker nodes. cluster-on-pvc.yaml : Cluster settings for a production cluster running in a dynamic cloud environment. cluster-test.yaml : Cluster settings for a test environment such as minikube. See the Ceph example configurations for more details. Create a Ceph Cluster \u00b6 Now that the Rook operator is running we can create the Ceph cluster. Important The Rook Cluster Helm Chart is available to deploy the operator instead of creating the below manifests. Important For the cluster to survive reboots, set the dataDirHostPath property that is valid for the hosts. For more settings, see the documentation on configuring the cluster . Create the cluster: 1 kubectl create -f cluster.yaml Verify the cluster is running by viewing the pods in the rook-ceph namespace. The number of osd pods will depend on the number of nodes in the cluster and the number of devices configured. For the default cluster.yaml above, one OSD will be created for each available device found on each node. Hint If the rook-ceph-mon , rook-ceph-mgr , or rook-ceph-osd pods are not created, please refer to the Ceph common issues for more details and potential solutions. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 $ kubectl -n rook-ceph get pod NAME READY STATUS RESTARTS AGE csi-cephfsplugin-provisioner-d77bb49c6-n5tgs 5/5 Running 0 140s csi-cephfsplugin-provisioner-d77bb49c6-v9rvn 5/5 Running 0 140s csi-cephfsplugin-rthrp 3/3 Running 0 140s csi-rbdplugin-hbsm7 3/3 Running 0 140s csi-rbdplugin-provisioner-5b5cd64fd-nvk6c 6/6 Running 0 140s csi-rbdplugin-provisioner-5b5cd64fd-q7bxl 6/6 Running 0 140s rook-ceph-crashcollector-minikube-5b57b7c5d4-hfldl 1/1 Running 0 105s rook-ceph-mgr-a-64cd7cdf54-j8b5p 2/2 Running 0 77s rook-ceph-mgr-b-657d54fc89-2xxw7 2/2 Running 0 56s rook-ceph-mon-a-694bb7987d-fp9w7 1/1 Running 0 105s rook-ceph-mon-b-856fdd5cb9-5h2qk 1/1 Running 0 94s rook-ceph-mon-c-57545897fc-j576h 1/1 Running 0 85s rook-ceph-operator-85f5b946bd-s8grz 1/1 Running 0 92m rook-ceph-osd-0-6bb747b6c5-lnvb6 1/1 Running 0 23s rook-ceph-osd-1-7f67f9646d-44p7v 1/1 Running 0 24s rook-ceph-osd-2-6cd4b776ff-v4d68 1/1 Running 0 25s rook-ceph-osd-prepare-node1-vx2rz 0/2 Completed 0 60s rook-ceph-osd-prepare-node2-ab3fd 0/2 Completed 0 60s rook-ceph-osd-prepare-node3-w4xyz 0/2 Completed 0 60s To verify that the cluster is in a healthy state, connect to the Rook toolbox and run the ceph status command. All mons should be in quorum A mgr should be active At least three OSDs should be up and in If the health is not HEALTH_OK , the warnings or errors should be investigated 1 2 3 4 5 6 7 8 9 10 $ ceph status cluster: id: a0452c76-30d9-4c1a-a948-5d8405f19a7c health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 3m) mgr:a(active, since 2m), standbys: b osd: 3 osds: 3 up (since 1m), 3 in (since 1m) []...] Hint If the cluster is not healthy, please refer to the Ceph common issues for potential solutions. Storage \u00b6 For a walkthrough of the three types of storage exposed by Rook, see the guides for: Block : Create block storage to be consumed by a pod (RWO) Shared Filesystem : Create a filesystem to be shared across multiple pods (RWX) Object : Create an object store that is accessible with an S3 endpoint inside or outside the Kubernetes cluster Ceph Dashboard \u00b6 Ceph has a dashboard to view the status of the cluster. See the dashboard guide . Tools \u00b6 Create a toolbox pod for full access to a ceph admin client for debugging and troubleshooting the Rook cluster. See the toolbox documentation for setup and usage information. The Rook Krew plugin provides commands to view status and troubleshoot issues. See the advanced configuration document for helpful maintenance and tuning examples. Monitoring \u00b6 Each Rook cluster has built-in metrics collectors/exporters for monitoring with Prometheus. To configure monitoring, see the monitoring guide . Telemetry \u00b6 The Rook maintainers would like to receive telemetry reports for Rook clusters. The data is anonymous and does not include any identifying information. Enable the telemetry reporting feature with the following command in the toolbox: 1 ceph telemetry on For more details on what is reported and how your privacy is protected, see the Ceph Telemetry Documentation . Teardown \u00b6 When finished with the test cluster, see the cleanup guide .","title":"Quickstart"},{"location":"Getting-Started/quickstart/#minimum-version","text":"Kubernetes v1.22 or higher is supported by Rook.","title":"Minimum Version"},{"location":"Getting-Started/quickstart/#cpu-architecture","text":"Architectures released are amd64 / x86_64 and arm64 .","title":"CPU Architecture"},{"location":"Getting-Started/quickstart/#prerequisites","text":"To check if a Kubernetes cluster is ready for Rook , see the prerequisites . To configure the Ceph storage cluster, at least one of these local storage options are required: Raw devices (no partitions or formatted filesystems) Raw partitions (no formatted filesystem) LVM Logical Volumes (no formatted filesystem) Persistent Volumes available from a storage class in block mode","title":"Prerequisites"},{"location":"Getting-Started/quickstart/#tldr","text":"A simple Rook cluster is created for Kubernetes with the following kubectl commands and example manifests . 1 2 3 4 $ git clone --single-branch --branch master https://github.com/rook/rook.git cd rook/deploy/examples kubectl create -f crds.yaml -f common.yaml -f operator.yaml kubectl create -f cluster.yaml After the cluster is running, applications can consume block, object, or file storage.","title":"TL;DR"},{"location":"Getting-Started/quickstart/#deploy-the-rook-operator","text":"The first step is to deploy the Rook operator. Important The Rook Helm Chart is available to deploy the operator instead of creating the below manifests. Note Check that the example yaml files are from a tagged release of Rook. Note These steps are for a standard production Rook deployment in Kubernetes. For Openshift, testing, or more options, see the example configurations documentation . 1 2 3 4 5 cd deploy/examples kubectl create -f crds.yaml -f common.yaml -f operator.yaml # verify the rook-ceph-operator is in the ` Running ` state before proceeding kubectl -n rook-ceph get pod Before starting the operator in production, consider these settings: Some Rook features are disabled by default. See the operator.yaml for these and other advanced settings. Device discovery: Rook will watch for new devices to configure if the ROOK_ENABLE_DISCOVERY_DAEMON setting is enabled, commonly used in bare metal clusters. Node affinity and tolerations: The CSI driver by default will run on any node in the cluster. To restrict the CSI driver affinity, several settings are available. If deploying Rook into a namespace other than the default rook-ceph , see the topic on using an alternative namespace .","title":"Deploy the Rook Operator"},{"location":"Getting-Started/quickstart/#cluster-environments","text":"The Rook documentation is focused around starting Rook in a variety of environments. While creating the cluster in this guide, consider these example cluster manifests: cluster.yaml : Cluster settings for a production cluster running on bare metal. Requires at least three worker nodes. cluster-on-pvc.yaml : Cluster settings for a production cluster running in a dynamic cloud environment. cluster-test.yaml : Cluster settings for a test environment such as minikube. See the Ceph example configurations for more details.","title":"Cluster Environments"},{"location":"Getting-Started/quickstart/#create-a-ceph-cluster","text":"Now that the Rook operator is running we can create the Ceph cluster. Important The Rook Cluster Helm Chart is available to deploy the operator instead of creating the below manifests. Important For the cluster to survive reboots, set the dataDirHostPath property that is valid for the hosts. For more settings, see the documentation on configuring the cluster . Create the cluster: 1 kubectl create -f cluster.yaml Verify the cluster is running by viewing the pods in the rook-ceph namespace. The number of osd pods will depend on the number of nodes in the cluster and the number of devices configured. For the default cluster.yaml above, one OSD will be created for each available device found on each node. Hint If the rook-ceph-mon , rook-ceph-mgr , or rook-ceph-osd pods are not created, please refer to the Ceph common issues for more details and potential solutions. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 $ kubectl -n rook-ceph get pod NAME READY STATUS RESTARTS AGE csi-cephfsplugin-provisioner-d77bb49c6-n5tgs 5/5 Running 0 140s csi-cephfsplugin-provisioner-d77bb49c6-v9rvn 5/5 Running 0 140s csi-cephfsplugin-rthrp 3/3 Running 0 140s csi-rbdplugin-hbsm7 3/3 Running 0 140s csi-rbdplugin-provisioner-5b5cd64fd-nvk6c 6/6 Running 0 140s csi-rbdplugin-provisioner-5b5cd64fd-q7bxl 6/6 Running 0 140s rook-ceph-crashcollector-minikube-5b57b7c5d4-hfldl 1/1 Running 0 105s rook-ceph-mgr-a-64cd7cdf54-j8b5p 2/2 Running 0 77s rook-ceph-mgr-b-657d54fc89-2xxw7 2/2 Running 0 56s rook-ceph-mon-a-694bb7987d-fp9w7 1/1 Running 0 105s rook-ceph-mon-b-856fdd5cb9-5h2qk 1/1 Running 0 94s rook-ceph-mon-c-57545897fc-j576h 1/1 Running 0 85s rook-ceph-operator-85f5b946bd-s8grz 1/1 Running 0 92m rook-ceph-osd-0-6bb747b6c5-lnvb6 1/1 Running 0 23s rook-ceph-osd-1-7f67f9646d-44p7v 1/1 Running 0 24s rook-ceph-osd-2-6cd4b776ff-v4d68 1/1 Running 0 25s rook-ceph-osd-prepare-node1-vx2rz 0/2 Completed 0 60s rook-ceph-osd-prepare-node2-ab3fd 0/2 Completed 0 60s rook-ceph-osd-prepare-node3-w4xyz 0/2 Completed 0 60s To verify that the cluster is in a healthy state, connect to the Rook toolbox and run the ceph status command. All mons should be in quorum A mgr should be active At least three OSDs should be up and in If the health is not HEALTH_OK , the warnings or errors should be investigated 1 2 3 4 5 6 7 8 9 10 $ ceph status cluster: id: a0452c76-30d9-4c1a-a948-5d8405f19a7c health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 3m) mgr:a(active, since 2m), standbys: b osd: 3 osds: 3 up (since 1m), 3 in (since 1m) []...] Hint If the cluster is not healthy, please refer to the Ceph common issues for potential solutions.","title":"Create a Ceph Cluster"},{"location":"Getting-Started/quickstart/#storage","text":"For a walkthrough of the three types of storage exposed by Rook, see the guides for: Block : Create block storage to be consumed by a pod (RWO) Shared Filesystem : Create a filesystem to be shared across multiple pods (RWX) Object : Create an object store that is accessible with an S3 endpoint inside or outside the Kubernetes cluster","title":"Storage"},{"location":"Getting-Started/quickstart/#ceph-dashboard","text":"Ceph has a dashboard to view the status of the cluster. See the dashboard guide .","title":"Ceph Dashboard"},{"location":"Getting-Started/quickstart/#tools","text":"Create a toolbox pod for full access to a ceph admin client for debugging and troubleshooting the Rook cluster. See the toolbox documentation for setup and usage information. The Rook Krew plugin provides commands to view status and troubleshoot issues. See the advanced configuration document for helpful maintenance and tuning examples.","title":"Tools"},{"location":"Getting-Started/quickstart/#monitoring","text":"Each Rook cluster has built-in metrics collectors/exporters for monitoring with Prometheus. To configure monitoring, see the monitoring guide .","title":"Monitoring"},{"location":"Getting-Started/quickstart/#telemetry","text":"The Rook maintainers would like to receive telemetry reports for Rook clusters. The data is anonymous and does not include any identifying information. Enable the telemetry reporting feature with the following command in the toolbox: 1 ceph telemetry on For more details on what is reported and how your privacy is protected, see the Ceph Telemetry Documentation .","title":"Telemetry"},{"location":"Getting-Started/quickstart/#teardown","text":"When finished with the test cluster, see the cleanup guide .","title":"Teardown"},{"location":"Getting-Started/release-cycle/","text":"Release Cycle \u00b6 Rook plans to release a new minor version three times a year, or about every four months. The most recent two minor Rook releases are actively maintained. Patch releases for the latest minor release are typically bi-weekly. Urgent patches may be released sooner. Patch releases for the previous minor release are commonly monthly, though will vary depending on the urgency of fixes. Definition of Maintenance \u00b6 The Rook community defines maintenance in that relevant bug fixes that are merged to the main development branch will be eligible to be back-ported to the release branch of any currently maintained version. Patches will be released as needed. It is also possible that a fix may be merged directly to the release branch if no longer applicable on the main development branch. While Rook maintainers make significant efforts to release urgent issues in a timely manner, maintenance does not indicate any SLA on response time. K8s Versions \u00b6 The minimum version supported by a Rook release is specified in the Quickstart Guide . Rook expects to support the most recent six versions of Kubernetes. While these K8s versions may not all be supported by the K8s release cycle, we understand that clusters may take time to update.","title":"Release Cycle"},{"location":"Getting-Started/release-cycle/#release-cycle","text":"Rook plans to release a new minor version three times a year, or about every four months. The most recent two minor Rook releases are actively maintained. Patch releases for the latest minor release are typically bi-weekly. Urgent patches may be released sooner. Patch releases for the previous minor release are commonly monthly, though will vary depending on the urgency of fixes.","title":"Release Cycle"},{"location":"Getting-Started/release-cycle/#definition-of-maintenance","text":"The Rook community defines maintenance in that relevant bug fixes that are merged to the main development branch will be eligible to be back-ported to the release branch of any currently maintained version. Patches will be released as needed. It is also possible that a fix may be merged directly to the release branch if no longer applicable on the main development branch. While Rook maintainers make significant efforts to release urgent issues in a timely manner, maintenance does not indicate any SLA on response time.","title":"Definition of Maintenance"},{"location":"Getting-Started/release-cycle/#k8s-versions","text":"The minimum version supported by a Rook release is specified in the Quickstart Guide . Rook expects to support the most recent six versions of Kubernetes. While these K8s versions may not all be supported by the K8s release cycle, we understand that clusters may take time to update.","title":"K8s Versions"},{"location":"Getting-Started/storage-architecture/","text":"Ceph is a highly scalable distributed storage solution for block storage , object storage , and shared filesystems with years of production deployments. Design \u00b6 Rook enables Ceph storage to run on Kubernetes using Kubernetes primitives. With Ceph running in the Kubernetes cluster, Kubernetes applications can mount block devices and filesystems managed by Rook, or can use the S3/Swift API for object storage. The Rook operator automates configuration of storage components and monitors the cluster to ensure the storage remains available and healthy. The Rook operator is a simple container that has all that is needed to bootstrap and monitor the storage cluster. The operator will start and monitor Ceph monitor pods , the Ceph OSD daemons to provide RADOS storage, as well as start and manage other Ceph daemons. The operator manages CRDs for pools, object stores (S3/Swift), and filesystems by initializing the pods and other resources necessary to run the services. The operator will monitor the storage daemons to ensure the cluster is healthy. Ceph mons will be started or failed over when necessary, and other adjustments are made as the cluster grows or shrinks. The operator will also watch for desired state changes specified in the Ceph custom resources (CRs) and apply the changes. Rook automatically configures the Ceph-CSI driver to mount the storage to your pods. The rook/ceph image includes all necessary tools to manage the cluster. Rook is not in the Ceph data path. Many of the Ceph concepts like placement groups and crush maps are hidden so you don't have to worry about them. Instead, Rook creates a simplified user experience for admins that is in terms of physical resources, pools, volumes, filesystems, and buckets. Advanced configuration can be applied when needed with the Ceph tools. Rook is implemented in golang. Ceph is implemented in C++ where the data path is highly optimized. We believe this combination offers the best of both worlds. Architecture \u00b6 Example applications are shown above for the three supported storage types: Block Storage is represented with a blue app, which has a ReadWriteOnce (RWO) volume mounted. The application can read and write to the RWO volume, while Ceph manages the IO. Shared Filesystem is represented by two purple apps that are sharing a ReadWriteMany (RWX) volume. Both applications can actively read or write simultaneously to the volume. Ceph will ensure the data is safely protected for multiple writers with the MDS daemon. Object storage is represented by an orange app that can read and write to a bucket with a standard S3 client. Below the dotted line in the above diagram, the components fall into three categories: Rook operator (blue layer): The operator automates configuration of Ceph CSI plugins and provisioners (orange layer): The Ceph-CSI driver provides the provisioning and mounting of volumes Ceph daemons (red layer): The Ceph daemons run the core storage architecture. See the Glossary to learn more about each daemon. Production clusters must have three or more nodes for a resilient storage platform. Block Storage \u00b6 In the diagram above, the flow to create an application with an RWO volume is: The (blue) app creates a PVC to request storage The PVC defines the Ceph RBD storage class (sc) for provisioning the storage K8s calls the Ceph-CSI RBD provisioner to create the Ceph RBD image. The kubelet calls the CSI RBD volume plugin to mount the volume in the app The volume is now available for reads and writes. A ReadWriteOnce volume can be mounted on one node at a time. Shared Filesystem \u00b6 In the diagram above, the flow to create a applications with a RWX volume is: The (purple) app creates a PVC to request storage The PVC defines the CephFS storage class (sc) for provisioning the storage K8s calls the Ceph-CSI CephFS provisioner to create the CephFS subvolume The kubelet calls the CSI CephFS volume plugin to mount the volume in the app The volume is now available for reads and writes. A ReadWriteMany volume can be mounted on multiple nodes for your application to use. Object Storage S3 \u00b6 In the diagram above, the flow to create an application with access to an S3 bucket is: The (orange) app creates an ObjectBucketClaim (OBC) to request a bucket The Rook operator creates a Ceph RGW bucket (via the lib-bucket-provisioner) The Rook operator creates a secret with the credentials for accessing the bucket and a configmap with bucket information The app retrieves the credentials from the secret The app can now read and write to the bucket with an S3 client A S3 compatible client can use the S3 bucket right away using the credentials ( Secret ) and bucket info ( ConfigMap ).","title":"Storage Architecture"},{"location":"Getting-Started/storage-architecture/#design","text":"Rook enables Ceph storage to run on Kubernetes using Kubernetes primitives. With Ceph running in the Kubernetes cluster, Kubernetes applications can mount block devices and filesystems managed by Rook, or can use the S3/Swift API for object storage. The Rook operator automates configuration of storage components and monitors the cluster to ensure the storage remains available and healthy. The Rook operator is a simple container that has all that is needed to bootstrap and monitor the storage cluster. The operator will start and monitor Ceph monitor pods , the Ceph OSD daemons to provide RADOS storage, as well as start and manage other Ceph daemons. The operator manages CRDs for pools, object stores (S3/Swift), and filesystems by initializing the pods and other resources necessary to run the services. The operator will monitor the storage daemons to ensure the cluster is healthy. Ceph mons will be started or failed over when necessary, and other adjustments are made as the cluster grows or shrinks. The operator will also watch for desired state changes specified in the Ceph custom resources (CRs) and apply the changes. Rook automatically configures the Ceph-CSI driver to mount the storage to your pods. The rook/ceph image includes all necessary tools to manage the cluster. Rook is not in the Ceph data path. Many of the Ceph concepts like placement groups and crush maps are hidden so you don't have to worry about them. Instead, Rook creates a simplified user experience for admins that is in terms of physical resources, pools, volumes, filesystems, and buckets. Advanced configuration can be applied when needed with the Ceph tools. Rook is implemented in golang. Ceph is implemented in C++ where the data path is highly optimized. We believe this combination offers the best of both worlds.","title":"Design"},{"location":"Getting-Started/storage-architecture/#architecture","text":"Example applications are shown above for the three supported storage types: Block Storage is represented with a blue app, which has a ReadWriteOnce (RWO) volume mounted. The application can read and write to the RWO volume, while Ceph manages the IO. Shared Filesystem is represented by two purple apps that are sharing a ReadWriteMany (RWX) volume. Both applications can actively read or write simultaneously to the volume. Ceph will ensure the data is safely protected for multiple writers with the MDS daemon. Object storage is represented by an orange app that can read and write to a bucket with a standard S3 client. Below the dotted line in the above diagram, the components fall into three categories: Rook operator (blue layer): The operator automates configuration of Ceph CSI plugins and provisioners (orange layer): The Ceph-CSI driver provides the provisioning and mounting of volumes Ceph daemons (red layer): The Ceph daemons run the core storage architecture. See the Glossary to learn more about each daemon. Production clusters must have three or more nodes for a resilient storage platform.","title":"Architecture"},{"location":"Getting-Started/storage-architecture/#block-storage","text":"In the diagram above, the flow to create an application with an RWO volume is: The (blue) app creates a PVC to request storage The PVC defines the Ceph RBD storage class (sc) for provisioning the storage K8s calls the Ceph-CSI RBD provisioner to create the Ceph RBD image. The kubelet calls the CSI RBD volume plugin to mount the volume in the app The volume is now available for reads and writes. A ReadWriteOnce volume can be mounted on one node at a time.","title":"Block Storage"},{"location":"Getting-Started/storage-architecture/#shared-filesystem","text":"In the diagram above, the flow to create a applications with a RWX volume is: The (purple) app creates a PVC to request storage The PVC defines the CephFS storage class (sc) for provisioning the storage K8s calls the Ceph-CSI CephFS provisioner to create the CephFS subvolume The kubelet calls the CSI CephFS volume plugin to mount the volume in the app The volume is now available for reads and writes. A ReadWriteMany volume can be mounted on multiple nodes for your application to use.","title":"Shared Filesystem"},{"location":"Getting-Started/storage-architecture/#object-storage-s3","text":"In the diagram above, the flow to create an application with access to an S3 bucket is: The (orange) app creates an ObjectBucketClaim (OBC) to request a bucket The Rook operator creates a Ceph RGW bucket (via the lib-bucket-provisioner) The Rook operator creates a secret with the credentials for accessing the bucket and a configmap with bucket information The app retrieves the credentials from the secret The app can now read and write to the bucket with an S3 client A S3 compatible client can use the S3 bucket right away using the credentials ( Secret ) and bucket info ( ConfigMap ).","title":"Object Storage S3"},{"location":"Getting-Started/Prerequisites/authenticated-registry/","text":"If you want to use an image from authenticated docker registry (e.g. for image cache/mirror), you'll need to add an imagePullSecret to all relevant service accounts. This way all pods created by the operator (for service account: rook-ceph-system ) or all new pods in the namespace (for service account: default ) will have the imagePullSecret added to their spec. The whole process is described in the official kubernetes documentation . Example setup for a ceph cluster \u00b6 To get you started, here's a quick rundown for the ceph example from the quickstart guide . First, we'll create the secret for our registry as described here (the secret will be created in the rook-ceph namespace, make sure to change it if your Rook Ceph Operator/Cluster is in another namespace): 1 kubectl -n rook-ceph create secret docker-registry my-registry-secret --docker-server=DOCKER_REGISTRY_SERVER --docker-username=DOCKER_USER --docker-password=DOCKER_PASSWORD --docker-email=DOCKER_EMAIL Next we'll add the following snippet to all relevant service accounts as described here : 1 2 imagePullSecrets : - name : my-registry-secret The service accounts are: rook-ceph-system (namespace: rook-ceph ): Will affect all pods created by the rook operator in the rook-ceph namespace. default (namespace: rook-ceph ): Will affect most pods in the rook-ceph namespace. rook-ceph-mgr (namespace: rook-ceph ): Will affect the MGR pods in the rook-ceph namespace. rook-ceph-osd (namespace: rook-ceph ): Will affect the OSD pods in the rook-ceph namespace. rook-ceph-rgw (namespace: rook-ceph ): Will affect the RGW pods in the rook-ceph namespace. You can do it either via e.g. kubectl -n  edit serviceaccount default or by modifying the operator.yaml and cluster.yaml before deploying them. Since it's the same procedure for all service accounts, here is just one example: 1 kubectl -n rook-ceph edit serviceaccount default 1 2 3 4 5 6 7 8 9 10 apiVersion : v1 kind : ServiceAccount metadata : name : default namespace : rook-ceph secrets : - name : default-token-12345 # Add the highlighted lines: imagePullSecrets : - name : my-registry-secret After doing this for all service accounts all pods should be able to pull the image from your registry.","title":"Authenticated Container Registries"},{"location":"Getting-Started/Prerequisites/authenticated-registry/#example-setup-for-a-ceph-cluster","text":"To get you started, here's a quick rundown for the ceph example from the quickstart guide . First, we'll create the secret for our registry as described here (the secret will be created in the rook-ceph namespace, make sure to change it if your Rook Ceph Operator/Cluster is in another namespace): 1 kubectl -n rook-ceph create secret docker-registry my-registry-secret --docker-server=DOCKER_REGISTRY_SERVER --docker-username=DOCKER_USER --docker-password=DOCKER_PASSWORD --docker-email=DOCKER_EMAIL Next we'll add the following snippet to all relevant service accounts as described here : 1 2 imagePullSecrets : - name : my-registry-secret The service accounts are: rook-ceph-system (namespace: rook-ceph ): Will affect all pods created by the rook operator in the rook-ceph namespace. default (namespace: rook-ceph ): Will affect most pods in the rook-ceph namespace. rook-ceph-mgr (namespace: rook-ceph ): Will affect the MGR pods in the rook-ceph namespace. rook-ceph-osd (namespace: rook-ceph ): Will affect the OSD pods in the rook-ceph namespace. rook-ceph-rgw (namespace: rook-ceph ): Will affect the RGW pods in the rook-ceph namespace. You can do it either via e.g. kubectl -n  edit serviceaccount default or by modifying the operator.yaml and cluster.yaml before deploying them. Since it's the same procedure for all service accounts, here is just one example: 1 kubectl -n rook-ceph edit serviceaccount default 1 2 3 4 5 6 7 8 9 10 apiVersion : v1 kind : ServiceAccount metadata : name : default namespace : rook-ceph secrets : - name : default-token-12345 # Add the highlighted lines: imagePullSecrets : - name : my-registry-secret After doing this for all service accounts all pods should be able to pull the image from your registry.","title":"Example setup for a ceph cluster"},{"location":"Getting-Started/Prerequisites/prerequisites/","text":"Rook can be installed on any existing Kubernetes cluster as long as it meets the minimum version and Rook is granted the required privileges (see below for more information). Minimum Version \u00b6 Kubernetes v1.22 or higher is supported. CPU Architecture \u00b6 Architectures supported are amd64 / x86_64 and arm64 . Ceph Prerequisites \u00b6 To configure the Ceph storage cluster, at least one of these local storage types is required: Raw devices (no partitions or formatted filesystems) Raw partitions (no formatted filesystem) LVM Logical Volumes (no formatted filesystem) Persistent Volumes available from a storage class in block mode Confirm whether the partitions or devices are formatted with filesystems with the following command: 1 2 3 4 5 6 7 $ lsblk -f NAME FSTYPE LABEL UUID MOUNTPOINT vda \u2514\u2500vda1 LVM2_member >eSO50t-GkUV-YKTH-WsGq-hNJY-eKNf-3i07IB \u251c\u2500ubuntu--vg-root ext4 c2366f76-6e21-4f10-a8f3-6776212e2fe4 / \u2514\u2500ubuntu--vg-swap_1 swap 9492a3dc-ad75-47cd-9596-678e8cf17ff9 [SWAP] vdb If the FSTYPE field is not empty, there is a filesystem on top of the corresponding device. In this example, vdb is available to Rook, while vda and its partitions have a filesystem and are not available. Admission Controller \u00b6 Enabling the Rook admission controller is recommended to provide an additional level of validation that Rook is configured correctly with the custom resource (CR) settings. An admission controller intercepts requests to the Kubernetes API server prior to persistence of the object, but after the request is authenticated and authorized. To deploy the Rook admission controllers, install the cert manager before Rook is installed: 1 kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.11.1/cert-manager.yaml LVM package \u00b6 Ceph OSDs have a dependency on LVM in the following scenarios: If encryption is enabled ( encryptedDevice: \"true\" in the cluster CR) A metadata device is specified LVM is not required for OSDs in these scenarios: OSDs are created on raw devices or partitions OSDs are created on PVCs using the storageClassDeviceSets If LVM is required, LVM needs to be available on the hosts where OSDs will be running. Some Linux distributions do not ship with the lvm2 package. This package is required on all storage nodes in the k8s cluster to run Ceph OSDs. Without this package even though Rook will be able to successfully create the Ceph OSDs, when a node is rebooted the OSD pods running on the restarted node will fail to start . Please install LVM using your Linux distribution's package manager. For example: CentOS : 1 sudo yum install -y lvm2 Ubuntu : 1 sudo apt-get install -y lvm2 RancherOS : Since version 1.5.0 LVM is supported Logical volumes will not be activated during the boot process. You need to add an runcmd command for that. 1 2 runcmd : - [ \"vgchange\" , \"-ay\" ] Kernel \u00b6 RBD \u00b6 Ceph requires a Linux kernel built with the RBD module. Many Linux distributions have this module, but not all. For example, the GKE Container-Optimised OS (COS) does not have RBD. Test your Kubernetes nodes by running modprobe rbd . If the rbd module is 'not found', rebuild the kernel to include the rbd module, install a newer kernel, or choose a different Linux distribution. Rook's default RBD configuration specifies only the layering feature, for broad compatibility with older kernels. If your Kubernetes nodes run a 5.4 or later kernel, additional feature flags can be enabled in the storage class. The fast-diff and object-map features are especially useful. 1 imageFeatures : layering,fast-diff,object-map,deep-flatten,exclusive-lock CephFS \u00b6 If creating RWX volumes from a Ceph shared file system (CephFS), the recommended minimum kernel version is 4.17 . If the kernel version is less than 4.17, the requested PVC sizes will not be enforced. Storage quotas will only be enforced on newer kernels. Distro Notes \u00b6 Specific configurations for some distributions. NixOS \u00b6 For NixOS, the kernel modules will be found in the non-standard path /run/current-system/kernel-modules/lib/modules/ , and they'll be symlinked inside the also non-standard path /nix . Rook containers require read access to those locations to be able to load the required modules. They have to be bind-mounted as volumes in the CephFS and RBD plugin pods. If installing Rook with Helm, uncomment these example settings in values.yaml : csi.csiCephFSPluginVolume csi.csiCephFSPluginVolumeMount csi.csiRBDPluginVolume csi.csiRBDPluginVolumeMount If deploying without Helm, add those same values to the settings in the rook-ceph-operator-config ConfigMap found in operator.yaml: CSI_CEPHFS_PLUGIN_VOLUME CSI_CEPHFS_PLUGIN_VOLUME_MOUNT CSI_RBD_PLUGIN_VOLUME CSI_RBD_PLUGIN_VOLUME_MOUNT","title":"Prerequisites"},{"location":"Getting-Started/Prerequisites/prerequisites/#minimum-version","text":"Kubernetes v1.22 or higher is supported.","title":"Minimum Version"},{"location":"Getting-Started/Prerequisites/prerequisites/#cpu-architecture","text":"Architectures supported are amd64 / x86_64 and arm64 .","title":"CPU Architecture"},{"location":"Getting-Started/Prerequisites/prerequisites/#ceph-prerequisites","text":"To configure the Ceph storage cluster, at least one of these local storage types is required: Raw devices (no partitions or formatted filesystems) Raw partitions (no formatted filesystem) LVM Logical Volumes (no formatted filesystem) Persistent Volumes available from a storage class in block mode Confirm whether the partitions or devices are formatted with filesystems with the following command: 1 2 3 4 5 6 7 $ lsblk -f NAME FSTYPE LABEL UUID MOUNTPOINT vda \u2514\u2500vda1 LVM2_member >eSO50t-GkUV-YKTH-WsGq-hNJY-eKNf-3i07IB \u251c\u2500ubuntu--vg-root ext4 c2366f76-6e21-4f10-a8f3-6776212e2fe4 / \u2514\u2500ubuntu--vg-swap_1 swap 9492a3dc-ad75-47cd-9596-678e8cf17ff9 [SWAP] vdb If the FSTYPE field is not empty, there is a filesystem on top of the corresponding device. In this example, vdb is available to Rook, while vda and its partitions have a filesystem and are not available.","title":"Ceph Prerequisites"},{"location":"Getting-Started/Prerequisites/prerequisites/#admission-controller","text":"Enabling the Rook admission controller is recommended to provide an additional level of validation that Rook is configured correctly with the custom resource (CR) settings. An admission controller intercepts requests to the Kubernetes API server prior to persistence of the object, but after the request is authenticated and authorized. To deploy the Rook admission controllers, install the cert manager before Rook is installed: 1 kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.11.1/cert-manager.yaml","title":"Admission Controller"},{"location":"Getting-Started/Prerequisites/prerequisites/#lvm-package","text":"Ceph OSDs have a dependency on LVM in the following scenarios: If encryption is enabled ( encryptedDevice: \"true\" in the cluster CR) A metadata device is specified LVM is not required for OSDs in these scenarios: OSDs are created on raw devices or partitions OSDs are created on PVCs using the storageClassDeviceSets If LVM is required, LVM needs to be available on the hosts where OSDs will be running. Some Linux distributions do not ship with the lvm2 package. This package is required on all storage nodes in the k8s cluster to run Ceph OSDs. Without this package even though Rook will be able to successfully create the Ceph OSDs, when a node is rebooted the OSD pods running on the restarted node will fail to start . Please install LVM using your Linux distribution's package manager. For example: CentOS : 1 sudo yum install -y lvm2 Ubuntu : 1 sudo apt-get install -y lvm2 RancherOS : Since version 1.5.0 LVM is supported Logical volumes will not be activated during the boot process. You need to add an runcmd command for that. 1 2 runcmd : - [ \"vgchange\" , \"-ay\" ]","title":"LVM package"},{"location":"Getting-Started/Prerequisites/prerequisites/#kernel","text":"","title":"Kernel"},{"location":"Getting-Started/Prerequisites/prerequisites/#rbd","text":"Ceph requires a Linux kernel built with the RBD module. Many Linux distributions have this module, but not all. For example, the GKE Container-Optimised OS (COS) does not have RBD. Test your Kubernetes nodes by running modprobe rbd . If the rbd module is 'not found', rebuild the kernel to include the rbd module, install a newer kernel, or choose a different Linux distribution. Rook's default RBD configuration specifies only the layering feature, for broad compatibility with older kernels. If your Kubernetes nodes run a 5.4 or later kernel, additional feature flags can be enabled in the storage class. The fast-diff and object-map features are especially useful. 1 imageFeatures : layering,fast-diff,object-map,deep-flatten,exclusive-lock","title":"RBD"},{"location":"Getting-Started/Prerequisites/prerequisites/#cephfs","text":"If creating RWX volumes from a Ceph shared file system (CephFS), the recommended minimum kernel version is 4.17 . If the kernel version is less than 4.17, the requested PVC sizes will not be enforced. Storage quotas will only be enforced on newer kernels.","title":"CephFS"},{"location":"Getting-Started/Prerequisites/prerequisites/#distro-notes","text":"Specific configurations for some distributions.","title":"Distro Notes"},{"location":"Getting-Started/Prerequisites/prerequisites/#nixos","text":"For NixOS, the kernel modules will be found in the non-standard path /run/current-system/kernel-modules/lib/modules/ , and they'll be symlinked inside the also non-standard path /nix . Rook containers require read access to those locations to be able to load the required modules. They have to be bind-mounted as volumes in the CephFS and RBD plugin pods. If installing Rook with Helm, uncomment these example settings in values.yaml : csi.csiCephFSPluginVolume csi.csiCephFSPluginVolumeMount csi.csiRBDPluginVolume csi.csiRBDPluginVolumeMount If deploying without Helm, add those same values to the settings in the rook-ceph-operator-config ConfigMap found in operator.yaml: CSI_CEPHFS_PLUGIN_VOLUME CSI_CEPHFS_PLUGIN_VOLUME_MOUNT CSI_RBD_PLUGIN_VOLUME CSI_RBD_PLUGIN_VOLUME_MOUNT","title":"NixOS"},{"location":"Helm-Charts/ceph-cluster-chart/","text":"Creates Rook resources to configure a Ceph cluster using the Helm package manager. This chart is a simple packaging of templates that will optionally create Rook resources such as: CephCluster, CephFilesystem, and CephObjectStore CRs Storage classes to expose Ceph RBD volumes, CephFS volumes, and RGW buckets Ingress for external access to the dashboard Toolbox Prerequisites \u00b6 Kubernetes 1.22+ Helm 3.x Install the Rook Operator chart Installing \u00b6 The helm install command deploys rook on the Kubernetes cluster in the default configuration. The configuration section lists the parameters that can be configured during installation. It is recommended that the rook operator be installed into the rook-ceph namespace. The clusters can be installed into the same namespace as the operator or a separate namespace. Rook currently publishes builds of this chart to the release and master channels. Before installing, review the values.yaml to confirm if the default settings need to be updated. If the operator was installed in a namespace other than rook-ceph , the namespace must be set in the operatorNamespace variable. Set the desired settings in the cephClusterSpec . The defaults are only an example and not likely to apply to your cluster. The monitoring section should be removed from the cephClusterSpec , as it is specified separately in the helm settings. The default values for cephBlockPools , cephFileSystems , and CephObjectStores will create one of each, and their corresponding storage classes. All Ceph components now have default values for the pod resources. The resources may need to be adjusted in production clusters depending on the load. The resources can also be disabled if Ceph should not be limited (e.g. test clusters). Release \u00b6 The release channel is the most recent release of Rook that is considered stable for the community. The example install assumes you have first installed the Rook Operator Helm Chart and created your customized values.yaml. 1 2 3 helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace rook-ceph rook-ceph-cluster \\ --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster -f values.yaml Note --namespace specifies the cephcluster namespace, which may be different from the rook operator namespace. Configuration \u00b6 The following table lists the configurable parameters of the rook-operator chart and their default values. Parameter Description Default cephBlockPools A list of CephBlockPool configurations to deploy See below cephBlockPoolsVolumeSnapshotClass Settings for the block pool snapshot class See RBD Snapshots cephClusterSpec Cluster configuration. See below cephFileSystemVolumeSnapshotClass Settings for the filesystem snapshot class See CephFS Snapshots cephFileSystems A list of CephFileSystem configurations to deploy See below cephObjectStores A list of CephObjectStore configurations to deploy See below clusterName The metadata.name of the CephCluster CR The same as the namespace configOverride Cluster ceph.conf override nil ingress.dashboard Enable an ingress for the ceph-dashboard {} kubeVersion Optional override of the target kubernetes version nil monitoring.createPrometheusRules Whether to create the Prometheus rules for Ceph alerts false monitoring.enabled Enable Prometheus integration, will also create necessary RBAC rules to allow Operator to create ServiceMonitors. Monitoring requires Prometheus to be pre-installed false monitoring.prometheusRule.annotations Annotations applied to PrometheusRule {} monitoring.prometheusRule.labels Labels applied to PrometheusRule {} monitoring.rulesNamespaceOverride The namespace in which to create the prometheus rules, if different from the rook cluster namespace. If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus deployed) to set rulesNamespaceOverride for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions. nil operatorNamespace Namespace of the main rook operator \"rook-ceph\" pspEnable Create & use PSP resources. Set this to the same value as the rook-ceph chart. false toolbox.affinity Toolbox affinity {} toolbox.enabled Enable Ceph debugging pod deployment. See toolbox false toolbox.image Toolbox image, defaults to the image used by the Ceph cluster nil toolbox.priorityClassName Set the priority class for the toolbox if desired nil toolbox.resources Toolbox resources {\"limits\":{\"cpu\":\"500m\",\"memory\":\"1Gi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"128Mi\"}} toolbox.tolerations Toolbox tolerations [] Ceph Cluster Spec \u00b6 The CephCluster CRD takes its spec from cephClusterSpec.* . This is not an exhaustive list of parameters. For the full list, see the Cluster CRD topic. The cluster spec example is for a converged cluster where all the Ceph daemons are running locally, as in the host-based example (cluster.yaml). For a different configuration such as a PVC-based cluster (cluster-on-pvc.yaml), external cluster (cluster-external.yaml), or stretch cluster (cluster-stretched.yaml), replace this entire cephClusterSpec with the specs from those examples. Ceph Block Pools \u00b6 The cephBlockPools array in the values file will define a list of CephBlockPool as described in the table below. Parameter Description Default name The name of the CephBlockPool ceph-blockpool spec The CephBlockPool spec, see the CephBlockPool documentation. {} storageClass.enabled Whether a storage class is deployed alongside the CephBlockPool true storageClass.isDefault Whether the storage class will be the default storage class for PVCs. See PersistentVolumeClaim documentation for details. true storageClass.name The name of the storage class ceph-block storageClass.parameters See Block Storage documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete storageClass.allowVolumeExpansion Whether volume expansion is allowed by default. true storageClass.mountOptions Specifies the mount options for storageClass [] storageClass.allowedTopologies Specifies the allowedTopologies for storageClass [] Ceph File Systems \u00b6 The cephFileSystems array in the values file will define a list of CephFileSystem as described in the table below. Parameter Description Default name The name of the CephFileSystem ceph-filesystem spec The CephFileSystem spec, see the CephFilesystem CRD documentation. see values.yaml storageClass.enabled Whether a storage class is deployed alongside the CephFileSystem true storageClass.name The name of the storage class ceph-filesystem storageClass.pool The name of Data Pool , without the filesystem name prefix data0 storageClass.parameters See Shared Filesystem documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete storageClass.mountOptions Specifies the mount options for storageClass [] Ceph Object Stores \u00b6 The cephObjectStores array in the values file will define a list of CephObjectStore as described in the table below. Parameter Description Default name The name of the CephObjectStore ceph-objectstore spec The CephObjectStore spec, see the CephObjectStore CRD documentation. see values.yaml storageClass.enabled Whether a storage class is deployed alongside the CephObjectStore true storageClass.name The name of the storage class ceph-bucket storageClass.parameters See Object Store storage class documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete ingress.enabled Enable an ingress for the object store false ingress.annotations Ingress annotations {} ingress.host.name Ingress hostname \"\" ingress.host.path Ingress path prefix / ingress.tls Ingress tls / ingress.ingressClassName Ingress tls \"\" Existing Clusters \u00b6 If you have an existing CephCluster CR that was created without the helm chart and you want the helm chart to start managing the cluster: Extract the spec section of your existing CephCluster CR and copy to the cephClusterSpec section in values.yaml . Add the following annotations and label to your existing CephCluster CR: 1 2 3 4 5 annotations : meta.helm.sh/release-name : rook-ceph-cluster meta.helm.sh/release-namespace : rook-ceph labels : app.kubernetes.io/managed-by : Helm Run the helm install command in the Installing section to create the chart. In the future when updates to the cluster are needed, ensure the values.yaml always contains the desired CephCluster spec. Development Build \u00b6 To deploy from a local build from your development environment: 1 2 cd deploy/charts/rook-ceph-cluster helm install --create-namespace --namespace rook-ceph rook-ceph-cluster -f values.yaml . Uninstalling the Chart \u00b6 To see the currently installed Rook chart: 1 helm ls --namespace rook-ceph To uninstall/delete the rook-ceph-cluster chart: 1 helm delete --namespace rook-ceph rook-ceph-cluster The command removes all the Kubernetes components associated with the chart and deletes the release. Removing the cluster chart does not remove the Rook operator. In addition, all data on hosts in the Rook data directory ( /var/lib/rook by default) and on OSD raw devices is kept. To reuse disks, you will have to wipe them before recreating the cluster. See the teardown documentation for more information.","title":"Ceph Cluster Helm Chart"},{"location":"Helm-Charts/ceph-cluster-chart/#prerequisites","text":"Kubernetes 1.22+ Helm 3.x Install the Rook Operator chart","title":"Prerequisites"},{"location":"Helm-Charts/ceph-cluster-chart/#installing","text":"The helm install command deploys rook on the Kubernetes cluster in the default configuration. The configuration section lists the parameters that can be configured during installation. It is recommended that the rook operator be installed into the rook-ceph namespace. The clusters can be installed into the same namespace as the operator or a separate namespace. Rook currently publishes builds of this chart to the release and master channels. Before installing, review the values.yaml to confirm if the default settings need to be updated. If the operator was installed in a namespace other than rook-ceph , the namespace must be set in the operatorNamespace variable. Set the desired settings in the cephClusterSpec . The defaults are only an example and not likely to apply to your cluster. The monitoring section should be removed from the cephClusterSpec , as it is specified separately in the helm settings. The default values for cephBlockPools , cephFileSystems , and CephObjectStores will create one of each, and their corresponding storage classes. All Ceph components now have default values for the pod resources. The resources may need to be adjusted in production clusters depending on the load. The resources can also be disabled if Ceph should not be limited (e.g. test clusters).","title":"Installing"},{"location":"Helm-Charts/ceph-cluster-chart/#release","text":"The release channel is the most recent release of Rook that is considered stable for the community. The example install assumes you have first installed the Rook Operator Helm Chart and created your customized values.yaml. 1 2 3 helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace rook-ceph rook-ceph-cluster \\ --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster -f values.yaml Note --namespace specifies the cephcluster namespace, which may be different from the rook operator namespace.","title":"Release"},{"location":"Helm-Charts/ceph-cluster-chart/#configuration","text":"The following table lists the configurable parameters of the rook-operator chart and their default values. Parameter Description Default cephBlockPools A list of CephBlockPool configurations to deploy See below cephBlockPoolsVolumeSnapshotClass Settings for the block pool snapshot class See RBD Snapshots cephClusterSpec Cluster configuration. See below cephFileSystemVolumeSnapshotClass Settings for the filesystem snapshot class See CephFS Snapshots cephFileSystems A list of CephFileSystem configurations to deploy See below cephObjectStores A list of CephObjectStore configurations to deploy See below clusterName The metadata.name of the CephCluster CR The same as the namespace configOverride Cluster ceph.conf override nil ingress.dashboard Enable an ingress for the ceph-dashboard {} kubeVersion Optional override of the target kubernetes version nil monitoring.createPrometheusRules Whether to create the Prometheus rules for Ceph alerts false monitoring.enabled Enable Prometheus integration, will also create necessary RBAC rules to allow Operator to create ServiceMonitors. Monitoring requires Prometheus to be pre-installed false monitoring.prometheusRule.annotations Annotations applied to PrometheusRule {} monitoring.prometheusRule.labels Labels applied to PrometheusRule {} monitoring.rulesNamespaceOverride The namespace in which to create the prometheus rules, if different from the rook cluster namespace. If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus deployed) to set rulesNamespaceOverride for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions. nil operatorNamespace Namespace of the main rook operator \"rook-ceph\" pspEnable Create & use PSP resources. Set this to the same value as the rook-ceph chart. false toolbox.affinity Toolbox affinity {} toolbox.enabled Enable Ceph debugging pod deployment. See toolbox false toolbox.image Toolbox image, defaults to the image used by the Ceph cluster nil toolbox.priorityClassName Set the priority class for the toolbox if desired nil toolbox.resources Toolbox resources {\"limits\":{\"cpu\":\"500m\",\"memory\":\"1Gi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"128Mi\"}} toolbox.tolerations Toolbox tolerations []","title":"Configuration"},{"location":"Helm-Charts/ceph-cluster-chart/#ceph-cluster-spec","text":"The CephCluster CRD takes its spec from cephClusterSpec.* . This is not an exhaustive list of parameters. For the full list, see the Cluster CRD topic. The cluster spec example is for a converged cluster where all the Ceph daemons are running locally, as in the host-based example (cluster.yaml). For a different configuration such as a PVC-based cluster (cluster-on-pvc.yaml), external cluster (cluster-external.yaml), or stretch cluster (cluster-stretched.yaml), replace this entire cephClusterSpec with the specs from those examples.","title":"Ceph Cluster Spec"},{"location":"Helm-Charts/ceph-cluster-chart/#ceph-block-pools","text":"The cephBlockPools array in the values file will define a list of CephBlockPool as described in the table below. Parameter Description Default name The name of the CephBlockPool ceph-blockpool spec The CephBlockPool spec, see the CephBlockPool documentation. {} storageClass.enabled Whether a storage class is deployed alongside the CephBlockPool true storageClass.isDefault Whether the storage class will be the default storage class for PVCs. See PersistentVolumeClaim documentation for details. true storageClass.name The name of the storage class ceph-block storageClass.parameters See Block Storage documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete storageClass.allowVolumeExpansion Whether volume expansion is allowed by default. true storageClass.mountOptions Specifies the mount options for storageClass [] storageClass.allowedTopologies Specifies the allowedTopologies for storageClass []","title":"Ceph Block Pools"},{"location":"Helm-Charts/ceph-cluster-chart/#ceph-file-systems","text":"The cephFileSystems array in the values file will define a list of CephFileSystem as described in the table below. Parameter Description Default name The name of the CephFileSystem ceph-filesystem spec The CephFileSystem spec, see the CephFilesystem CRD documentation. see values.yaml storageClass.enabled Whether a storage class is deployed alongside the CephFileSystem true storageClass.name The name of the storage class ceph-filesystem storageClass.pool The name of Data Pool , without the filesystem name prefix data0 storageClass.parameters See Shared Filesystem documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete storageClass.mountOptions Specifies the mount options for storageClass []","title":"Ceph File Systems"},{"location":"Helm-Charts/ceph-cluster-chart/#ceph-object-stores","text":"The cephObjectStores array in the values file will define a list of CephObjectStore as described in the table below. Parameter Description Default name The name of the CephObjectStore ceph-objectstore spec The CephObjectStore spec, see the CephObjectStore CRD documentation. see values.yaml storageClass.enabled Whether a storage class is deployed alongside the CephObjectStore true storageClass.name The name of the storage class ceph-bucket storageClass.parameters See Object Store storage class documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete ingress.enabled Enable an ingress for the object store false ingress.annotations Ingress annotations {} ingress.host.name Ingress hostname \"\" ingress.host.path Ingress path prefix / ingress.tls Ingress tls / ingress.ingressClassName Ingress tls \"\"","title":"Ceph Object Stores"},{"location":"Helm-Charts/ceph-cluster-chart/#existing-clusters","text":"If you have an existing CephCluster CR that was created without the helm chart and you want the helm chart to start managing the cluster: Extract the spec section of your existing CephCluster CR and copy to the cephClusterSpec section in values.yaml . Add the following annotations and label to your existing CephCluster CR: 1 2 3 4 5 annotations : meta.helm.sh/release-name : rook-ceph-cluster meta.helm.sh/release-namespace : rook-ceph labels : app.kubernetes.io/managed-by : Helm Run the helm install command in the Installing section to create the chart. In the future when updates to the cluster are needed, ensure the values.yaml always contains the desired CephCluster spec.","title":"Existing Clusters"},{"location":"Helm-Charts/ceph-cluster-chart/#development-build","text":"To deploy from a local build from your development environment: 1 2 cd deploy/charts/rook-ceph-cluster helm install --create-namespace --namespace rook-ceph rook-ceph-cluster -f values.yaml .","title":"Development Build"},{"location":"Helm-Charts/ceph-cluster-chart/#uninstalling-the-chart","text":"To see the currently installed Rook chart: 1 helm ls --namespace rook-ceph To uninstall/delete the rook-ceph-cluster chart: 1 helm delete --namespace rook-ceph rook-ceph-cluster The command removes all the Kubernetes components associated with the chart and deletes the release. Removing the cluster chart does not remove the Rook operator. In addition, all data on hosts in the Rook data directory ( /var/lib/rook by default) and on OSD raw devices is kept. To reuse disks, you will have to wipe them before recreating the cluster. See the teardown documentation for more information.","title":"Uninstalling the Chart"},{"location":"Helm-Charts/helm-charts/","text":"Rook has published the following Helm charts for the Ceph storage provider: Rook Ceph Operator : Starts the Ceph Operator, which will watch for Ceph CRs (custom resources) Rook Ceph Cluster : Creates Ceph CRs that the operator will use to configure the cluster The Helm charts are intended to simplify deployment and upgrades. Configuring the Rook resources without Helm is also fully supported by creating the manifests directly.","title":"Helm Charts Overview"},{"location":"Helm-Charts/operator-chart/","text":"Installs rook to create, configure, and manage Ceph clusters on Kubernetes. Introduction \u00b6 This chart bootstraps a rook-ceph-operator deployment on a Kubernetes cluster using the Helm package manager. Prerequisites \u00b6 Kubernetes 1.22+ Helm 3.x See the Helm support matrix for more details. Installing \u00b6 The Ceph Operator helm chart will install the basic components necessary to create a storage platform for your Kubernetes cluster. Install the Helm chart Create a Rook cluster . The helm install command deploys rook on the Kubernetes cluster in the default configuration. The configuration section lists the parameters that can be configured during installation. It is recommended that the rook operator be installed into the rook-ceph namespace (you will install your clusters into separate namespaces). Rook currently publishes builds of the Ceph operator to the release and master channels. Release \u00b6 The release channel is the most recent release of Rook that is considered stable for the community. 1 2 helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph -f values.yaml For example settings, see the next section or values.yaml Configuration \u00b6 The following table lists the configurable parameters of the rook-operator chart and their default values. Parameter Description Default admissionController Set tolerations and nodeAffinity 1 for admission controller pod. The admission controller would be best to start on the same nodes as other ceph daemons. nil allowLoopDevices If true, loop devices are allowed to be used for osds in test clusters false annotations Pod annotations {} cephCommandsTimeoutSeconds The timeout for ceph commands in seconds \"15\" crds.enabled Whether the helm chart should create and update the CRDs. If false, the CRDs must be managed independently with deploy/examples/crds.yaml. WARNING Only set during first deployment. If later disabled the cluster may be DESTROYED. If the CRDs are deleted in this case, see the disaster recovery guide to restore them. true csi.allowUnsupportedVersion Allow starting an unsupported ceph-csi image false csi.attacher.image Kubernetes CSI Attacher image registry.k8s.io/sig-storage/csi-attacher:v4.3.0 csi.cephFSAttachRequired Whether to skip any attach operation altogether for CephFS PVCs. See more details here . If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the CephFS PVC fast. WARNING It's highly discouraged to use this for CephFS RWO volumes. Refer to this issue for more details. true csi.cephFSFSGroupPolicy Policy for modifying a volume's ownership or permissions when the CephFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.cephFSKernelMountOptions Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options . Set to \"ms_mode=secure\" when connections.encrypted is enabled in CephCluster CR nil csi.cephFSPluginUpdateStrategy CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.cephFSPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI cephFS plugin daemonset update strategy. 1 csi.cephcsi.image Ceph CSI image quay.io/cephcsi/cephcsi:v3.9.0 csi.cephfsGrpcMetricsPort CSI CephFS driver GRPC metrics port 9091 csi.cephfsLivenessMetricsPort CSI CephFS driver metrics port 9081 csi.cephfsPodLabels Labels to add to the CSI CephFS Deployments and DaemonSets Pods nil csi.clusterName Cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful in cases like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster nil csi.csiAddons.enabled Enable CSIAddons false csi.csiAddons.image CSIAddons Sidecar image \"quay.io/csiaddons/k8s-sidecar:v0.7.0\" csi.csiAddonsPort CSI Addons server port 9070 csi.csiCephFSPluginResource CEPH CSI CephFS plugin resource requirement list see values.yaml csi.csiCephFSPluginVolume The volume of the CephCSI CephFS plugin DaemonSet nil csi.csiCephFSPluginVolumeMount The volume mounts of the CephCSI CephFS plugin DaemonSet nil csi.csiCephFSProvisionerResource CEPH CSI CephFS provisioner resource requirement list see values.yaml csi.csiNFSPluginResource CEPH CSI NFS plugin resource requirement list see values.yaml csi.csiNFSProvisionerResource CEPH CSI NFS provisioner resource requirement list see values.yaml csi.csiRBDPluginResource CEPH CSI RBD plugin resource requirement list see values.yaml csi.csiRBDPluginVolume The volume of the CephCSI RBD plugin DaemonSet nil csi.csiRBDPluginVolumeMount The volume mounts of the CephCSI RBD plugin DaemonSet nil csi.csiRBDProvisionerResource CEPH CSI RBD provisioner resource requirement list csi-omap-generator resources will be applied only if enableOMAPGenerator is set to true see values.yaml csi.enableCSIEncryption Enable Ceph CSI PVC encryption support false csi.enableCSIHostNetwork Enable host networking for CSI CephFS and RBD nodeplugins. This may be necessary in some network configurations where the SDN does not provide access to an external cluster or there is significant drop in read/write performance true csi.enableCephfsDriver Enable Ceph CSI CephFS driver true csi.enableCephfsSnapshotter Enable Snapshotter in CephFS provisioner pod true csi.enableGrpcMetrics Enable Ceph CSI GRPC Metrics false csi.enableLiveness Enable Ceph CSI Liveness sidecar deployment false csi.enableMetadata Enable adding volume metadata on the CephFS subvolumes and RBD images. Not all users might be interested in getting volume/snapshot details as metadata on CephFS subvolume and RBD images. Hence enable metadata is false by default false csi.enableNFSSnapshotter Enable Snapshotter in NFS provisioner pod true csi.enableOMAPGenerator OMAP generator generates the omap mapping between the PV name and the RBD image which helps CSI to identify the rbd images for CSI operations. CSI_ENABLE_OMAP_GENERATOR needs to be enabled when we are using rbd mirroring feature. By default OMAP generator is disabled and when enabled, it will be deployed as a sidecar with CSI provisioner pod, to enable set it to true. false csi.enablePluginSelinuxHostMount Enable Host mount for /etc/selinux directory for Ceph CSI nodeplugins false csi.enableRBDSnapshotter Enable Snapshotter in RBD provisioner pod true csi.enableRbdDriver Enable Ceph CSI RBD driver true csi.forceCephFSKernelClient Enable Ceph Kernel clients on kernel < 4.17. If your kernel does not support quotas for CephFS you may want to disable this setting. However, this will cause an issue during upgrades with the FUSE client. See the upgrade guide true csi.grpcTimeoutInSeconds Set GRPC timeout for csi containers (in seconds). It should be >= 120. If this value is not set or is invalid, it defaults to 150 150 csi.imagePullPolicy Image pull policy \"IfNotPresent\" csi.kubeletDirPath Kubelet root directory path (if the Kubelet uses a different path for the --root-dir flag) /var/lib/kubelet csi.logLevel Set logging level for cephCSI containers maintained by the cephCSI. Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity. 0 csi.nfs.enabled Enable the nfs csi driver false csi.nfsAttachRequired Whether to skip any attach operation altogether for NFS PVCs. See more details here . If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the NFS PVC fast. WARNING It's highly discouraged to use this for NFS RWO volumes. Refer to this issue for more details. true csi.nfsFSGroupPolicy Policy for modifying a volume's ownership or permissions when the NFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.nfsPluginUpdateStrategy CSI NFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.nfsPodLabels Labels to add to the CSI NFS Deployments and DaemonSets Pods nil csi.pluginNodeAffinity The node labels for affinity of the CephCSI RBD plugin DaemonSet 1 nil csi.pluginPriorityClassName PriorityClassName to be set on csi driver plugin pods \"system-node-critical\" csi.pluginTolerations Array of tolerations in YAML format which will be added to CephCSI plugin DaemonSet nil csi.provisioner.image Kubernetes CSI provisioner image registry.k8s.io/sig-storage/csi-provisioner:v3.5.0 csi.provisionerNodeAffinity The node labels for affinity of the CSI provisioner deployment 1 nil csi.provisionerPriorityClassName PriorityClassName to be set on csi driver provisioner pods \"system-cluster-critical\" csi.provisionerReplicas Set replicas for csi provisioner deployment 2 csi.provisionerTolerations Array of tolerations in YAML format which will be added to CSI provisioner deployment nil csi.rbdAttachRequired Whether to skip any attach operation altogether for RBD PVCs. See more details here . If set to false it skips the volume attachments and makes the creation of pods using the RBD PVC fast. WARNING It's highly discouraged to use this for RWO volumes as it can cause data corruption. csi-addons operations like Reclaimspace and PVC Keyrotation will also not be supported if set to false since we'll have no VolumeAttachments to determine which node the PVC is mounted on. Refer to this issue for more details. true csi.rbdFSGroupPolicy Policy for modifying a volume's ownership or permissions when the RBD PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.rbdGrpcMetricsPort Ceph CSI RBD driver GRPC metrics port 9090 csi.rbdLivenessMetricsPort Ceph CSI RBD driver metrics port 8080 csi.rbdPluginUpdateStrategy CSI RBD plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.rbdPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI RBD plugin daemonset update strategy. 1 csi.rbdPodLabels Labels to add to the CSI RBD Deployments and DaemonSets Pods nil csi.readAffinity.crushLocationLabels Define which node labels to use as CRUSH location. This should correspond to the values set in the CRUSH map. labels listed here csi.readAffinity.enabled Enable read affinity for RBD volumes. Recommended to set to true if running kernel 5.8 or newer. false csi.registrar.image Kubernetes CSI registrar image registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.8.0 csi.resizer.image Kubernetes CSI resizer image registry.k8s.io/sig-storage/csi-resizer:v1.8.0 csi.serviceMonitor.enabled Enable ServiceMonitor for Ceph CSI drivers false csi.serviceMonitor.interval Service monitor scrape interval \"5s\" csi.serviceMonitor.labels ServiceMonitor additional labels {} csi.sidecarLogLevel Set logging level for Kubernetes-csi sidecar containers. Supported values from 0 to 5. 0 for general useful logs (the default), 5 for trace level verbosity. 0 csi.snapshotter.image Kubernetes CSI snapshotter image registry.k8s.io/sig-storage/csi-snapshotter:v6.2.2 csi.topology.domainLabels domainLabels define which node labels to use as domains for CSI nodeplugins to advertise their domains nil csi.topology.enabled Enable topology based provisioning false currentNamespaceOnly Whether the operator should watch cluster CRD in its own namespace or not false disableAdmissionController Whether to disable the admission controller true disableDeviceHotplug Disable automatic orchestration when new devices are discovered. false discover.nodeAffinity The node labels for affinity of discover-agent 1 nil discover.podLabels Labels to add to the discover pods nil discover.resources Add resources to discover daemon pods nil discover.toleration Toleration for the discover pods. Options: NoSchedule , PreferNoSchedule or NoExecute nil discover.tolerationKey The specific key of the taint to tolerate nil discover.tolerations Array of tolerations in YAML format which will be added to discover deployment nil discoverDaemonUdev Blacklist certain disks according to the regex provided. nil enableDiscoveryDaemon Enable discovery daemon false enableOBCWatchOperatorNamespace Whether the OBC provisioner should watch on the operator namespace or not, if not the namespace of the cluster will be used true hostpathRequiresPrivileged Runs Ceph Pods as privileged to be able to write to hostPaths in OpenShift with SELinux restrictions. false image.pullPolicy Image pull policy \"IfNotPresent\" image.repository Image \"rook/ceph\" image.tag Image tag master imagePullSecrets imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts. nil logLevel Global log level for the operator. Options: ERROR , WARNING , INFO , DEBUG \"INFO\" monitoring.enabled Enable monitoring. Requires Prometheus to be pre-installed. Enabling will also create RBAC rules to allow Operator to create ServiceMonitors false nodeSelector Kubernetes nodeSelector to add to the Deployment. {} priorityClassName Set the priority class for the rook operator deployment if desired nil pspEnable If true, create & use PSP resources false rbacEnable If true, create & use RBAC resources true resources Pod resource requests & limits {\"limits\":{\"cpu\":\"500m\",\"memory\":\"512Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"128Mi\"}} scaleDownOperator If true, scale down the rook operator. This is useful for administrative actions where the rook operator must be scaled down, while using gitops style tooling to deploy your helm charts. false tolerations List of Kubernetes tolerations to add to the Deployment. [] unreachableNodeTolerationSeconds Delay to use for the node.kubernetes.io/unreachable pod failure toleration to override the Kubernetes default of 5 minutes 5 useOperatorHostNetwork If true, run rook operator on the host network nil Development Build \u00b6 To deploy from a local build from your development environment: Build the Rook docker image: make Copy the image to your K8s cluster, such as with the docker save then the docker load commands Install the helm chart: 1 2 cd deploy/charts/rook-ceph helm install --create-namespace --namespace rook-ceph rook-ceph . Uninstalling the Chart \u00b6 To see the currently installed Rook chart: 1 helm ls --namespace rook-ceph To uninstall/delete the rook-ceph deployment: 1 helm delete --namespace rook-ceph rook-ceph The command removes all the Kubernetes components associated with the chart and deletes the release. After uninstalling you may want to clean up the CRDs as described on the teardown documentation . nodeAffinity and *NodeAffinity options should have the format \"role=storage,rook; storage=ceph\" or storage=;role=rook-example or storage=; ( checks only for presence of key ) \u21a9 \u21a9 \u21a9 \u21a9","title":"Ceph Operator Helm Chart"},{"location":"Helm-Charts/operator-chart/#introduction","text":"This chart bootstraps a rook-ceph-operator deployment on a Kubernetes cluster using the Helm package manager.","title":"Introduction"},{"location":"Helm-Charts/operator-chart/#prerequisites","text":"Kubernetes 1.22+ Helm 3.x See the Helm support matrix for more details.","title":"Prerequisites"},{"location":"Helm-Charts/operator-chart/#installing","text":"The Ceph Operator helm chart will install the basic components necessary to create a storage platform for your Kubernetes cluster. Install the Helm chart Create a Rook cluster . The helm install command deploys rook on the Kubernetes cluster in the default configuration. The configuration section lists the parameters that can be configured during installation. It is recommended that the rook operator be installed into the rook-ceph namespace (you will install your clusters into separate namespaces). Rook currently publishes builds of the Ceph operator to the release and master channels.","title":"Installing"},{"location":"Helm-Charts/operator-chart/#release","text":"The release channel is the most recent release of Rook that is considered stable for the community. 1 2 helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph -f values.yaml For example settings, see the next section or values.yaml","title":"Release"},{"location":"Helm-Charts/operator-chart/#configuration","text":"The following table lists the configurable parameters of the rook-operator chart and their default values. Parameter Description Default admissionController Set tolerations and nodeAffinity 1 for admission controller pod. The admission controller would be best to start on the same nodes as other ceph daemons. nil allowLoopDevices If true, loop devices are allowed to be used for osds in test clusters false annotations Pod annotations {} cephCommandsTimeoutSeconds The timeout for ceph commands in seconds \"15\" crds.enabled Whether the helm chart should create and update the CRDs. If false, the CRDs must be managed independently with deploy/examples/crds.yaml. WARNING Only set during first deployment. If later disabled the cluster may be DESTROYED. If the CRDs are deleted in this case, see the disaster recovery guide to restore them. true csi.allowUnsupportedVersion Allow starting an unsupported ceph-csi image false csi.attacher.image Kubernetes CSI Attacher image registry.k8s.io/sig-storage/csi-attacher:v4.3.0 csi.cephFSAttachRequired Whether to skip any attach operation altogether for CephFS PVCs. See more details here . If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the CephFS PVC fast. WARNING It's highly discouraged to use this for CephFS RWO volumes. Refer to this issue for more details. true csi.cephFSFSGroupPolicy Policy for modifying a volume's ownership or permissions when the CephFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.cephFSKernelMountOptions Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options . Set to \"ms_mode=secure\" when connections.encrypted is enabled in CephCluster CR nil csi.cephFSPluginUpdateStrategy CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.cephFSPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI cephFS plugin daemonset update strategy. 1 csi.cephcsi.image Ceph CSI image quay.io/cephcsi/cephcsi:v3.9.0 csi.cephfsGrpcMetricsPort CSI CephFS driver GRPC metrics port 9091 csi.cephfsLivenessMetricsPort CSI CephFS driver metrics port 9081 csi.cephfsPodLabels Labels to add to the CSI CephFS Deployments and DaemonSets Pods nil csi.clusterName Cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful in cases like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster nil csi.csiAddons.enabled Enable CSIAddons false csi.csiAddons.image CSIAddons Sidecar image \"quay.io/csiaddons/k8s-sidecar:v0.7.0\" csi.csiAddonsPort CSI Addons server port 9070 csi.csiCephFSPluginResource CEPH CSI CephFS plugin resource requirement list see values.yaml csi.csiCephFSPluginVolume The volume of the CephCSI CephFS plugin DaemonSet nil csi.csiCephFSPluginVolumeMount The volume mounts of the CephCSI CephFS plugin DaemonSet nil csi.csiCephFSProvisionerResource CEPH CSI CephFS provisioner resource requirement list see values.yaml csi.csiNFSPluginResource CEPH CSI NFS plugin resource requirement list see values.yaml csi.csiNFSProvisionerResource CEPH CSI NFS provisioner resource requirement list see values.yaml csi.csiRBDPluginResource CEPH CSI RBD plugin resource requirement list see values.yaml csi.csiRBDPluginVolume The volume of the CephCSI RBD plugin DaemonSet nil csi.csiRBDPluginVolumeMount The volume mounts of the CephCSI RBD plugin DaemonSet nil csi.csiRBDProvisionerResource CEPH CSI RBD provisioner resource requirement list csi-omap-generator resources will be applied only if enableOMAPGenerator is set to true see values.yaml csi.enableCSIEncryption Enable Ceph CSI PVC encryption support false csi.enableCSIHostNetwork Enable host networking for CSI CephFS and RBD nodeplugins. This may be necessary in some network configurations where the SDN does not provide access to an external cluster or there is significant drop in read/write performance true csi.enableCephfsDriver Enable Ceph CSI CephFS driver true csi.enableCephfsSnapshotter Enable Snapshotter in CephFS provisioner pod true csi.enableGrpcMetrics Enable Ceph CSI GRPC Metrics false csi.enableLiveness Enable Ceph CSI Liveness sidecar deployment false csi.enableMetadata Enable adding volume metadata on the CephFS subvolumes and RBD images. Not all users might be interested in getting volume/snapshot details as metadata on CephFS subvolume and RBD images. Hence enable metadata is false by default false csi.enableNFSSnapshotter Enable Snapshotter in NFS provisioner pod true csi.enableOMAPGenerator OMAP generator generates the omap mapping between the PV name and the RBD image which helps CSI to identify the rbd images for CSI operations. CSI_ENABLE_OMAP_GENERATOR needs to be enabled when we are using rbd mirroring feature. By default OMAP generator is disabled and when enabled, it will be deployed as a sidecar with CSI provisioner pod, to enable set it to true. false csi.enablePluginSelinuxHostMount Enable Host mount for /etc/selinux directory for Ceph CSI nodeplugins false csi.enableRBDSnapshotter Enable Snapshotter in RBD provisioner pod true csi.enableRbdDriver Enable Ceph CSI RBD driver true csi.forceCephFSKernelClient Enable Ceph Kernel clients on kernel < 4.17. If your kernel does not support quotas for CephFS you may want to disable this setting. However, this will cause an issue during upgrades with the FUSE client. See the upgrade guide true csi.grpcTimeoutInSeconds Set GRPC timeout for csi containers (in seconds). It should be >= 120. If this value is not set or is invalid, it defaults to 150 150 csi.imagePullPolicy Image pull policy \"IfNotPresent\" csi.kubeletDirPath Kubelet root directory path (if the Kubelet uses a different path for the --root-dir flag) /var/lib/kubelet csi.logLevel Set logging level for cephCSI containers maintained by the cephCSI. Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity. 0 csi.nfs.enabled Enable the nfs csi driver false csi.nfsAttachRequired Whether to skip any attach operation altogether for NFS PVCs. See more details here . If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the NFS PVC fast. WARNING It's highly discouraged to use this for NFS RWO volumes. Refer to this issue for more details. true csi.nfsFSGroupPolicy Policy for modifying a volume's ownership or permissions when the NFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.nfsPluginUpdateStrategy CSI NFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.nfsPodLabels Labels to add to the CSI NFS Deployments and DaemonSets Pods nil csi.pluginNodeAffinity The node labels for affinity of the CephCSI RBD plugin DaemonSet 1 nil csi.pluginPriorityClassName PriorityClassName to be set on csi driver plugin pods \"system-node-critical\" csi.pluginTolerations Array of tolerations in YAML format which will be added to CephCSI plugin DaemonSet nil csi.provisioner.image Kubernetes CSI provisioner image registry.k8s.io/sig-storage/csi-provisioner:v3.5.0 csi.provisionerNodeAffinity The node labels for affinity of the CSI provisioner deployment 1 nil csi.provisionerPriorityClassName PriorityClassName to be set on csi driver provisioner pods \"system-cluster-critical\" csi.provisionerReplicas Set replicas for csi provisioner deployment 2 csi.provisionerTolerations Array of tolerations in YAML format which will be added to CSI provisioner deployment nil csi.rbdAttachRequired Whether to skip any attach operation altogether for RBD PVCs. See more details here . If set to false it skips the volume attachments and makes the creation of pods using the RBD PVC fast. WARNING It's highly discouraged to use this for RWO volumes as it can cause data corruption. csi-addons operations like Reclaimspace and PVC Keyrotation will also not be supported if set to false since we'll have no VolumeAttachments to determine which node the PVC is mounted on. Refer to this issue for more details. true csi.rbdFSGroupPolicy Policy for modifying a volume's ownership or permissions when the RBD PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.rbdGrpcMetricsPort Ceph CSI RBD driver GRPC metrics port 9090 csi.rbdLivenessMetricsPort Ceph CSI RBD driver metrics port 8080 csi.rbdPluginUpdateStrategy CSI RBD plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.rbdPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI RBD plugin daemonset update strategy. 1 csi.rbdPodLabels Labels to add to the CSI RBD Deployments and DaemonSets Pods nil csi.readAffinity.crushLocationLabels Define which node labels to use as CRUSH location. This should correspond to the values set in the CRUSH map. labels listed here csi.readAffinity.enabled Enable read affinity for RBD volumes. Recommended to set to true if running kernel 5.8 or newer. false csi.registrar.image Kubernetes CSI registrar image registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.8.0 csi.resizer.image Kubernetes CSI resizer image registry.k8s.io/sig-storage/csi-resizer:v1.8.0 csi.serviceMonitor.enabled Enable ServiceMonitor for Ceph CSI drivers false csi.serviceMonitor.interval Service monitor scrape interval \"5s\" csi.serviceMonitor.labels ServiceMonitor additional labels {} csi.sidecarLogLevel Set logging level for Kubernetes-csi sidecar containers. Supported values from 0 to 5. 0 for general useful logs (the default), 5 for trace level verbosity. 0 csi.snapshotter.image Kubernetes CSI snapshotter image registry.k8s.io/sig-storage/csi-snapshotter:v6.2.2 csi.topology.domainLabels domainLabels define which node labels to use as domains for CSI nodeplugins to advertise their domains nil csi.topology.enabled Enable topology based provisioning false currentNamespaceOnly Whether the operator should watch cluster CRD in its own namespace or not false disableAdmissionController Whether to disable the admission controller true disableDeviceHotplug Disable automatic orchestration when new devices are discovered. false discover.nodeAffinity The node labels for affinity of discover-agent 1 nil discover.podLabels Labels to add to the discover pods nil discover.resources Add resources to discover daemon pods nil discover.toleration Toleration for the discover pods. Options: NoSchedule , PreferNoSchedule or NoExecute nil discover.tolerationKey The specific key of the taint to tolerate nil discover.tolerations Array of tolerations in YAML format which will be added to discover deployment nil discoverDaemonUdev Blacklist certain disks according to the regex provided. nil enableDiscoveryDaemon Enable discovery daemon false enableOBCWatchOperatorNamespace Whether the OBC provisioner should watch on the operator namespace or not, if not the namespace of the cluster will be used true hostpathRequiresPrivileged Runs Ceph Pods as privileged to be able to write to hostPaths in OpenShift with SELinux restrictions. false image.pullPolicy Image pull policy \"IfNotPresent\" image.repository Image \"rook/ceph\" image.tag Image tag master imagePullSecrets imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts. nil logLevel Global log level for the operator. Options: ERROR , WARNING , INFO , DEBUG \"INFO\" monitoring.enabled Enable monitoring. Requires Prometheus to be pre-installed. Enabling will also create RBAC rules to allow Operator to create ServiceMonitors false nodeSelector Kubernetes nodeSelector to add to the Deployment. {} priorityClassName Set the priority class for the rook operator deployment if desired nil pspEnable If true, create & use PSP resources false rbacEnable If true, create & use RBAC resources true resources Pod resource requests & limits {\"limits\":{\"cpu\":\"500m\",\"memory\":\"512Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"128Mi\"}} scaleDownOperator If true, scale down the rook operator. This is useful for administrative actions where the rook operator must be scaled down, while using gitops style tooling to deploy your helm charts. false tolerations List of Kubernetes tolerations to add to the Deployment. [] unreachableNodeTolerationSeconds Delay to use for the node.kubernetes.io/unreachable pod failure toleration to override the Kubernetes default of 5 minutes 5 useOperatorHostNetwork If true, run rook operator on the host network nil","title":"Configuration"},{"location":"Helm-Charts/operator-chart/#development-build","text":"To deploy from a local build from your development environment: Build the Rook docker image: make Copy the image to your K8s cluster, such as with the docker save then the docker load commands Install the helm chart: 1 2 cd deploy/charts/rook-ceph helm install --create-namespace --namespace rook-ceph rook-ceph .","title":"Development Build"},{"location":"Helm-Charts/operator-chart/#uninstalling-the-chart","text":"To see the currently installed Rook chart: 1 helm ls --namespace rook-ceph To uninstall/delete the rook-ceph deployment: 1 helm delete --namespace rook-ceph rook-ceph The command removes all the Kubernetes components associated with the chart and deletes the release. After uninstalling you may want to clean up the CRDs as described on the teardown documentation . nodeAffinity and *NodeAffinity options should have the format \"role=storage,rook; storage=ceph\" or storage=;role=rook-example or storage=; ( checks only for presence of key ) \u21a9 \u21a9 \u21a9 \u21a9","title":"Uninstalling the Chart"},{"location":"Storage-Configuration/ceph-teardown/","text":"Cleaning up a Cluster \u00b6 If you want to tear down the cluster and bring up a new one, be aware of the following resources that will need to be cleaned up: The resources created under Rook's namespace (assume rook-ceph here): The Rook operator and cluster created by operator.yaml and cluster.yaml (the cluster CRD) /var/lib/rook/rook-ceph : Path on each host in the cluster where configuration is cached by the ceph mons and osds Note that if you changed the default namespaces or paths such as dataDirHostPath in the sample yaml files, you will need to adjust these namespaces and paths throughout these instructions. If you see issues tearing down the cluster, see the Troubleshooting section below. If you are tearing down a cluster frequently for development purposes, it is instead recommended to use an environment such as Minikube that can easily be reset without worrying about any of these steps. Delete the Block and File artifacts \u00b6 First you will need to clean up the resources created on top of the Rook cluster. These commands will clean up the resources from the block and file walkthroughs (unmount volumes, delete volume claims, etc). If you did not complete those parts of the walkthrough, you can skip these instructions: 1 2 3 4 5 6 kubectl delete -f ../wordpress.yaml kubectl delete -f ../mysql.yaml kubectl delete -n rook-ceph cephblockpool replicapool kubectl delete storageclass rook-ceph-block kubectl delete -f csi/cephfs/kube-registry.yaml kubectl delete storageclass csi-cephfs After those block and file resources have been cleaned up, you can then delete your Rook cluster. This is important to delete before removing the Rook operator and agent or else resources may not be cleaned up properly . Delete the CephCluster CRD \u00b6 Edit the CephCluster and add the cleanupPolicy WARNING: DATA WILL BE PERMANENTLY DELETED AFTER DELETING THE CephCluster CR WITH cleanupPolicy . 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Once the cleanup policy is enabled, any new configuration changes in the CephCluster will be blocked. Nothing will happen until the deletion of the CR is requested, so this cleanupPolicy change can still be reverted if needed. Checkout more details about the cleanupPolicy here Delete the CephCluster CR. 1 kubectl -n rook-ceph delete cephcluster rook-ceph Verify that the cluster CR has been deleted before continuing to the next step. 1 kubectl -n rook-ceph get cephcluster If the cleanupPolicy was applied, then wait for the rook-ceph-cleanup jobs to be completed on all the nodes. These jobs will perform the following operations: Delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph , on all the nodes Wipe the data on the drives on all the nodes where OSDs were running in this cluster Note: The cleanup jobs might not start if the resources created on top of Rook Cluster are not deleted completely. See Delete the Operator and related Resources \u00b6 This will begin the process of the Rook Ceph operator and all other resources being cleaned up. This includes related resources such as the agent and discover daemonsets with the following commands: 1 2 3 kubectl delete -f operator.yaml kubectl delete -f common.yaml kubectl delete -f crds.yaml If the cleanupPolicy was applied and the cleanup jobs have completed on all the nodes, then the cluster tear down has been successful. If you skipped adding the cleanupPolicy then follow the manual steps mentioned below to tear down the cluster. Delete the data on hosts \u00b6 Attention The final cleanup step requires deleting files on each host in the cluster. All files under the dataDirHostPath property specified in the cluster CRD will need to be deleted. Otherwise, inconsistent state will remain when a new cluster is started. Connect to each machine and delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph . In the future this step will not be necessary when we build on the K8s local storage feature. If you modified the demo settings, additional cleanup is up to you for devices, host paths, etc. Zapping Devices \u00b6 Disks on nodes used by Rook for osds can be reset to a usable state with methods suggested below. Note that these scripts are not one-size-fits-all. Please use them with discretion to ensure you are not removing data unrelated to Rook and/or Ceph. Disks can be zapped fairly easily. A single disk can usually be cleared with some or all of the steps below. 1 2 3 4 5 6 7 8 9 10 11 12 13 DISK=\"/dev/sdX\" # Zap the disk to a fresh, usable state ( zap-all is important, b/c MBR has to be clean ) sgdisk --zap-all $DISK # Wipe a large portion of the beginning of the disk to remove more LVM metadata that may be present dd if=/dev/zero of=\"$DISK\" bs=1M count=100 oflag=direct,dsync # SSDs may be better cleaned with blkdiscard instead of dd blkdiscard $DISK # Inform the OS of partition table changes partprobe $DISK Ceph can leave LVM and device mapper data that can lock the disks, preventing the disks from being used again. These steps can help to free up old Ceph disks for re-use. Note that this only needs to be run once on each node. If you have only one Rook cluster and all Ceph disks are being wiped, run the following command. 1 2 3 4 5 6 # This command hangs on some systems: with caution, 'dmsetup remove_all --force' can be used ls /dev/mapper/ceph-* | xargs -I% -- dmsetup remove % # ceph-volume setup can leave ceph- directories in /dev and /dev/mapper ( unnecessary clutter ) rm -rf /dev/ceph-* rm -rf /dev/mapper/ceph--* If disks are still reported locked, rebooting the node often helps clear LVM-related holds on disks. If there are multiple Ceph clusters and some disks are not wiped yet, it is necessary to manually determine which disks map to which device mapper devices. Troubleshooting \u00b6 If the cleanup instructions are not executed in the order above, or you otherwise have difficulty cleaning up the cluster, here are a few things to try. The most common issue cleaning up the cluster is that the rook-ceph namespace or the cluster CRD remain indefinitely in the terminating state. A namespace cannot be removed until all of its resources are removed, so look at which resources are pending termination. Look at the pods: 1 kubectl -n rook-ceph get pod If a pod is still terminating, you will need to wait or else attempt to forcefully terminate it ( kubectl delete pod  ). Now look at the cluster CRD: 1 kubectl -n rook-ceph get cephcluster If the cluster CRD still exists even though you have executed the delete command earlier, see the next section on removing the finalizer. Removing the Cluster CRD Finalizer \u00b6 When a Cluster CRD is created, a finalizer is added automatically by the Rook operator. The finalizer will allow the operator to ensure that before the cluster CRD is deleted, all block and file mounts will be cleaned up. Without proper cleanup, pods consuming the storage will be hung indefinitely until a system reboot. The operator is responsible for removing the finalizer after the mounts have been cleaned up. If for some reason the operator is not able to remove the finalizer (i.e., the operator is not running anymore), you can delete the finalizer manually with the following command: 1 2 3 4 for CRD in $(kubectl get crd -n rook-ceph | awk '/ceph.rook.io/ {print $1}'); do kubectl get -n rook-ceph \"$CRD\" -o name | \\ xargs -I {} kubectl patch -n rook-ceph {} --type merge -p '{\"metadata\":{\"finalizers\": []}}' done This command will patch the following CRDs on v1.3: 1 2 3 4 5 6 cephblockpools.ceph.rook.io cephclients.ceph.rook.io cephfilesystems.ceph.rook.io cephnfses.ceph.rook.io cephobjectstores.ceph.rook.io cephobjectstoreusers.ceph.rook.io Within a few seconds you should see that the cluster CRD has been deleted and will no longer block other cleanup such as deleting the rook-ceph namespace. If the namespace is still stuck in Terminating state, you can check which resources are holding up the deletion and remove the finalizers and delete those 1 2 kubectl api-resources --verbs=list --namespaced -o name \\ | xargs -n 1 kubectl get --show-kind --ignore-not-found -n rook-ceph Remove critical resource finalizers \u00b6 Rook adds a finalizer ceph.rook.io/disaster-protection to resources critical to the Ceph cluster so that the resources will not be accidentally deleted. The operator is responsible for removing the finalizers when a CephCluster is deleted. If for some reason the operator is not able to remove the finalizers (i.e., the operator is not running anymore), you can remove the finalizers manually with the following commands: 1 2 kubectl -n rook-ceph patch configmap rook-ceph-mon-endpoints --type merge -p '{\"metadata\":{\"finalizers\": []}}' kubectl -n rook-ceph patch secrets rook-ceph-mon --type merge -p '{\"metadata\":{\"finalizers\": []}}'","title":"Cleanup"},{"location":"Storage-Configuration/ceph-teardown/#cleaning-up-a-cluster","text":"If you want to tear down the cluster and bring up a new one, be aware of the following resources that will need to be cleaned up: The resources created under Rook's namespace (assume rook-ceph here): The Rook operator and cluster created by operator.yaml and cluster.yaml (the cluster CRD) /var/lib/rook/rook-ceph : Path on each host in the cluster where configuration is cached by the ceph mons and osds Note that if you changed the default namespaces or paths such as dataDirHostPath in the sample yaml files, you will need to adjust these namespaces and paths throughout these instructions. If you see issues tearing down the cluster, see the Troubleshooting section below. If you are tearing down a cluster frequently for development purposes, it is instead recommended to use an environment such as Minikube that can easily be reset without worrying about any of these steps.","title":"Cleaning up a Cluster"},{"location":"Storage-Configuration/ceph-teardown/#delete-the-block-and-file-artifacts","text":"First you will need to clean up the resources created on top of the Rook cluster. These commands will clean up the resources from the block and file walkthroughs (unmount volumes, delete volume claims, etc). If you did not complete those parts of the walkthrough, you can skip these instructions: 1 2 3 4 5 6 kubectl delete -f ../wordpress.yaml kubectl delete -f ../mysql.yaml kubectl delete -n rook-ceph cephblockpool replicapool kubectl delete storageclass rook-ceph-block kubectl delete -f csi/cephfs/kube-registry.yaml kubectl delete storageclass csi-cephfs After those block and file resources have been cleaned up, you can then delete your Rook cluster. This is important to delete before removing the Rook operator and agent or else resources may not be cleaned up properly .","title":"Delete the Block and File artifacts"},{"location":"Storage-Configuration/ceph-teardown/#delete-the-cephcluster-crd","text":"Edit the CephCluster and add the cleanupPolicy WARNING: DATA WILL BE PERMANENTLY DELETED AFTER DELETING THE CephCluster CR WITH cleanupPolicy . 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Once the cleanup policy is enabled, any new configuration changes in the CephCluster will be blocked. Nothing will happen until the deletion of the CR is requested, so this cleanupPolicy change can still be reverted if needed. Checkout more details about the cleanupPolicy here Delete the CephCluster CR. 1 kubectl -n rook-ceph delete cephcluster rook-ceph Verify that the cluster CR has been deleted before continuing to the next step. 1 kubectl -n rook-ceph get cephcluster If the cleanupPolicy was applied, then wait for the rook-ceph-cleanup jobs to be completed on all the nodes. These jobs will perform the following operations: Delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph , on all the nodes Wipe the data on the drives on all the nodes where OSDs were running in this cluster Note: The cleanup jobs might not start if the resources created on top of Rook Cluster are not deleted completely. See","title":"Delete the CephCluster CRD"},{"location":"Storage-Configuration/ceph-teardown/#delete-the-operator-and-related-resources","text":"This will begin the process of the Rook Ceph operator and all other resources being cleaned up. This includes related resources such as the agent and discover daemonsets with the following commands: 1 2 3 kubectl delete -f operator.yaml kubectl delete -f common.yaml kubectl delete -f crds.yaml If the cleanupPolicy was applied and the cleanup jobs have completed on all the nodes, then the cluster tear down has been successful. If you skipped adding the cleanupPolicy then follow the manual steps mentioned below to tear down the cluster.","title":"Delete the Operator and related Resources"},{"location":"Storage-Configuration/ceph-teardown/#delete-the-data-on-hosts","text":"Attention The final cleanup step requires deleting files on each host in the cluster. All files under the dataDirHostPath property specified in the cluster CRD will need to be deleted. Otherwise, inconsistent state will remain when a new cluster is started. Connect to each machine and delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph . In the future this step will not be necessary when we build on the K8s local storage feature. If you modified the demo settings, additional cleanup is up to you for devices, host paths, etc.","title":"Delete the data on hosts"},{"location":"Storage-Configuration/ceph-teardown/#zapping-devices","text":"Disks on nodes used by Rook for osds can be reset to a usable state with methods suggested below. Note that these scripts are not one-size-fits-all. Please use them with discretion to ensure you are not removing data unrelated to Rook and/or Ceph. Disks can be zapped fairly easily. A single disk can usually be cleared with some or all of the steps below. 1 2 3 4 5 6 7 8 9 10 11 12 13 DISK=\"/dev/sdX\" # Zap the disk to a fresh, usable state ( zap-all is important, b/c MBR has to be clean ) sgdisk --zap-all $DISK # Wipe a large portion of the beginning of the disk to remove more LVM metadata that may be present dd if=/dev/zero of=\"$DISK\" bs=1M count=100 oflag=direct,dsync # SSDs may be better cleaned with blkdiscard instead of dd blkdiscard $DISK # Inform the OS of partition table changes partprobe $DISK Ceph can leave LVM and device mapper data that can lock the disks, preventing the disks from being used again. These steps can help to free up old Ceph disks for re-use. Note that this only needs to be run once on each node. If you have only one Rook cluster and all Ceph disks are being wiped, run the following command. 1 2 3 4 5 6 # This command hangs on some systems: with caution, 'dmsetup remove_all --force' can be used ls /dev/mapper/ceph-* | xargs -I% -- dmsetup remove % # ceph-volume setup can leave ceph- directories in /dev and /dev/mapper ( unnecessary clutter ) rm -rf /dev/ceph-* rm -rf /dev/mapper/ceph--* If disks are still reported locked, rebooting the node often helps clear LVM-related holds on disks. If there are multiple Ceph clusters and some disks are not wiped yet, it is necessary to manually determine which disks map to which device mapper devices.","title":"Zapping Devices"},{"location":"Storage-Configuration/ceph-teardown/#troubleshooting","text":"If the cleanup instructions are not executed in the order above, or you otherwise have difficulty cleaning up the cluster, here are a few things to try. The most common issue cleaning up the cluster is that the rook-ceph namespace or the cluster CRD remain indefinitely in the terminating state. A namespace cannot be removed until all of its resources are removed, so look at which resources are pending termination. Look at the pods: 1 kubectl -n rook-ceph get pod If a pod is still terminating, you will need to wait or else attempt to forcefully terminate it ( kubectl delete pod  ). Now look at the cluster CRD: 1 kubectl -n rook-ceph get cephcluster If the cluster CRD still exists even though you have executed the delete command earlier, see the next section on removing the finalizer.","title":"Troubleshooting"},{"location":"Storage-Configuration/ceph-teardown/#removing-the-cluster-crd-finalizer","text":"When a Cluster CRD is created, a finalizer is added automatically by the Rook operator. The finalizer will allow the operator to ensure that before the cluster CRD is deleted, all block and file mounts will be cleaned up. Without proper cleanup, pods consuming the storage will be hung indefinitely until a system reboot. The operator is responsible for removing the finalizer after the mounts have been cleaned up. If for some reason the operator is not able to remove the finalizer (i.e., the operator is not running anymore), you can delete the finalizer manually with the following command: 1 2 3 4 for CRD in $(kubectl get crd -n rook-ceph | awk '/ceph.rook.io/ {print $1}'); do kubectl get -n rook-ceph \"$CRD\" -o name | \\ xargs -I {} kubectl patch -n rook-ceph {} --type merge -p '{\"metadata\":{\"finalizers\": []}}' done This command will patch the following CRDs on v1.3: 1 2 3 4 5 6 cephblockpools.ceph.rook.io cephclients.ceph.rook.io cephfilesystems.ceph.rook.io cephnfses.ceph.rook.io cephobjectstores.ceph.rook.io cephobjectstoreusers.ceph.rook.io Within a few seconds you should see that the cluster CRD has been deleted and will no longer block other cleanup such as deleting the rook-ceph namespace. If the namespace is still stuck in Terminating state, you can check which resources are holding up the deletion and remove the finalizers and delete those 1 2 kubectl api-resources --verbs=list --namespaced -o name \\ | xargs -n 1 kubectl get --show-kind --ignore-not-found -n rook-ceph","title":"Removing the Cluster CRD Finalizer"},{"location":"Storage-Configuration/ceph-teardown/#remove-critical-resource-finalizers","text":"Rook adds a finalizer ceph.rook.io/disaster-protection to resources critical to the Ceph cluster so that the resources will not be accidentally deleted. The operator is responsible for removing the finalizers when a CephCluster is deleted. If for some reason the operator is not able to remove the finalizers (i.e., the operator is not running anymore), you can remove the finalizers manually with the following commands: 1 2 kubectl -n rook-ceph patch configmap rook-ceph-mon-endpoints --type merge -p '{\"metadata\":{\"finalizers\": []}}' kubectl -n rook-ceph patch secrets rook-ceph-mon --type merge -p '{\"metadata\":{\"finalizers\": []}}'","title":"Remove critical resource finalizers"},{"location":"Storage-Configuration/Advanced/ceph-configuration/","text":"These examples show how to perform advanced configuration tasks on your Rook storage cluster. Prerequisites \u00b6 Most of the examples make use of the ceph client command. A quick way to use the Ceph client suite is from a Rook Toolbox container . The Kubernetes based examples assume Rook OSD pods are in the rook-ceph namespace. If you run them in a different namespace, modify kubectl -n rook-ceph [...] to fit your situation. Using alternate namespaces \u00b6 If you wish to deploy the Rook Operator and/or Ceph clusters to namespaces other than the default rook-ceph , the manifests are commented to allow for easy sed replacements. Change ROOK_CLUSTER_NAMESPACE to tailor the manifests for additional Ceph clusters. You can choose to also change ROOK_OPERATOR_NAMESPACE to create a new Rook Operator for each Ceph cluster (don't forget to set ROOK_CURRENT_NAMESPACE_ONLY ), or you can leave it at the same value for every Ceph cluster if you only wish to have one Operator manage all Ceph clusters. This will help you manage namespaces more easily, but you should still make sure the resources are configured to your liking. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 cd deploy/examples export ROOK_OPERATOR_NAMESPACE=\"rook-ceph\" export ROOK_CLUSTER_NAMESPACE=\"rook-ceph\" sed -i.bak \\ -e \"s/\\(.*\\):.*# namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE # namespace:operator/g\" \\ -e \"s/\\(.*\\):.*# namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE # namespace:cluster/g\" \\ -e \"s/\\(.*serviceaccount\\):.*:\\(.*\\) # serviceaccount:namespace:operator/\\1:$ROOK_OPERATOR_NAMESPACE:\\2 # serviceaccount:namespace:operator/g\" \\ -e \"s/\\(.*serviceaccount\\):.*:\\(.*\\) # serviceaccount:namespace:cluster/\\1:$ROOK_CLUSTER_NAMESPACE:\\2 # serviceaccount:namespace:cluster/g\" \\ -e \"s/\\(.*\\): [-_A-Za-z0-9]*\\.\\(.*\\) # driver:namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE.\\2 # driver:namespace:operator/g\" \\ -e \"s/\\(.*\\): [-_A-Za-z0-9]*\\.\\(.*\\) # driver:namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE.\\2 # driver:namespace:cluster/g\" \\ common.yaml operator.yaml cluster.yaml # add other files or change these as desired for your config # You need to use ` apply ` for all Ceph clusters after the first if you have only one Operator kubectl apply -f common.yaml -f operator.yaml -f cluster.yaml # add other files as desired for yourconfig Deploying a second cluster \u00b6 If you wish to create a new CephCluster in a different namespace than rook-ceph while using a single operator to manage both clusters execute the following: 1 2 3 cd deploy/examples NAMESPACE=rook-ceph-secondary envsubst < common-second-cluster.yaml | kubectl create -f - This will create all the necessary RBACs as well as the new namespace. The script assumes that common.yaml was already created. When you create the second CephCluster CR, use the same NAMESPACE and the operator will configure the second cluster. Log Collection \u00b6 All Rook logs can be collected in a Kubernetes environment with the following command: 1 2 3 4 5 6 7 8 9 for p in $(kubectl -n rook-ceph get pods -o jsonpath='{.items[*].metadata.name}') do for c in $(kubectl -n rook-ceph get pod ${p} -o jsonpath='{.spec.containers[*].name}') do echo \"BEGIN logs from pod: ${p} ${c}\" kubectl -n rook-ceph logs -c ${c} ${p} echo \"END logs from pod: ${p} ${c}\" done done This gets the logs for every container in every Rook pod and then compresses them into a .gz archive for easy sharing. Note that instead of gzip , you could instead pipe to less or to a single text file. OSD Information \u00b6 Keeping track of OSDs and their underlying storage devices can be difficult. The following scripts will clear things up quickly. Kubernetes \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 # Get OSD Pods # This uses the example/default cluster name \"rook\" OSD_PODS=$(kubectl get pods --all-namespaces -l \\ app=rook-ceph-osd,rook_cluster=rook-ceph -o jsonpath='{.items[*].metadata.name}') # Find node and drive associations from OSD pods for pod in $(echo ${OSD_PODS}) do echo \"Pod: ${pod}\" echo \"Node: $(kubectl -n rook-ceph get pod ${pod} -o jsonpath='{.spec.nodeName}')\" kubectl -n rook-ceph exec ${pod} -- sh -c '\\ for i in /var/lib/ceph/osd/ceph-*; do [ -f ${i}/ready ] || continue echo -ne \"-$(basename ${i}) \" echo $(lsblk -n -o NAME,SIZE ${i}/block 2> /dev/null || \\ findmnt -n -v -o SOURCE,SIZE -T ${i}) $(cat ${i}/type) done | sort -V echo' done The output should look something like this. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 Pod: osd-m2fz2 Node: node1.zbrbdl -osd0 sda3 557.3G bluestore -osd1 sdf3 110.2G bluestore -osd2 sdd3 277.8G bluestore -osd3 sdb3 557.3G bluestore -osd4 sde3 464.2G bluestore -osd5 sdc3 557.3G bluestore Pod: osd-nxxnq Node: node3.zbrbdl -osd6 sda3 110.7G bluestore -osd17 sdd3 1.8T bluestore -osd18 sdb3 231.8G bluestore -osd19 sdc3 231.8G bluestore Pod: osd-tww1h Node: node2.zbrbdl -osd7 sdc3 464.2G bluestore -osd8 sdj3 557.3G bluestore -osd9 sdf3 66.7G bluestore -osd10 sdd3 464.2G bluestore -osd11 sdb3 147.4G bluestore -osd12 sdi3 557.3G bluestore -osd13 sdk3 557.3G bluestore -osd14 sde3 66.7G bluestore -osd15 sda3 110.2G bluestore -osd16 sdh3 135.1G bluestore Separate Storage Groups \u00b6 Attention It is deprecated to manually need to set this , the deviceClass property can be used on Pool structures in CephBlockPool , CephFilesystem and CephObjectStore CRD objects. By default Rook/Ceph puts all storage under one replication rule in the CRUSH Map which provides the maximum amount of storage capacity for a cluster. If you would like to use different storage endpoints for different purposes, you'll have to create separate storage groups. In the following example we will separate SSD drives from spindle-based drives, a common practice for those looking to target certain workloads onto faster (database) or slower (file archive) storage. Configuring Pools \u00b6 Placement Group Sizing \u00b6 Note Since Ceph Nautilus (v14.x), you can use the Ceph MGR pg_autoscaler module to auto scale the PGs as needed. It is highly advisable to configure default pg_num value on per-pool basis, If you want to enable this feature, please refer to Default PG and PGP counts . The general rules for deciding how many PGs your pool(s) should contain is: Fewer than 5 OSDs set pg_num to 128 Between 5 and 10 OSDs set pg_num to 512 Between 10 and 50 OSDs set pg_num to 1024 If you have more than 50 OSDs, you need to understand the tradeoffs and how to calculate the pg_num value by yourself. For calculating pg_num yourself please make use of the pgcalc tool . Setting PG Count \u00b6 Be sure to read the placement group sizing section before changing the number of PGs. 1 2 # Set the number of PGs in the rbd pool to 512 ceph osd pool set rbd pg_num 512 Custom ceph.conf Settings \u00b6 Warning The advised method for controlling Ceph configuration is to manually use the Ceph CLI or the Ceph dashboard because this offers the most flexibility. It is highly recommended that this only be used when absolutely necessary and that the config be reset to an empty string if/when the configurations are no longer necessary. Configurations in the config file will make the Ceph cluster less configurable from the CLI and dashboard and may make future tuning or debugging difficult. Setting configs via Ceph's CLI requires that at least one mon be available for the configs to be set, and setting configs via dashboard requires at least one mgr to be available. Ceph also has a number of very advanced settings that cannot be modified easily via the CLI or dashboard. In order to set configurations before monitors are available or to set advanced configuration settings, the rook-config-override ConfigMap exists, and the config field can be set with the contents of a ceph.conf file. The contents will be propagated to all mon, mgr, OSD, MDS, and RGW daemons as an /etc/ceph/ceph.conf file. Warning Rook performs no validation on the config, so the validity of the settings is the user's responsibility. If the rook-config-override ConfigMap is created before the cluster is started, the Ceph daemons will automatically pick up the settings. If you add the settings to the ConfigMap after the cluster has been initialized, each daemon will need to be restarted where you want the settings applied: mons: ensure all three mons are online and healthy before restarting each mon pod, one at a time. mgrs: the pods are stateless and can be restarted as needed, but note that this will disrupt the Ceph dashboard during restart. OSDs: restart your the pods by deleting them, one at a time, and running ceph -s between each restart to ensure the cluster goes back to \"active/clean\" state. RGW: the pods are stateless and can be restarted as needed. MDS: the pods are stateless and can be restarted as needed. After the pod restart, the new settings should be in effect. Note that if the ConfigMap in the Ceph cluster's namespace is created before the cluster is created, the daemons will pick up the settings at first launch. To automate the restart of the Ceph daemon pods, you will need to trigger an update to the pod specs. The simplest way to trigger the update is to add annotations or labels to the CephCluster CR for the daemons you want to restart. The operator will then proceed with a rolling update, similar to any other update to the cluster. Example \u00b6 In this example we will set the default pool size to two, and tell OSD daemons not to change the weight of OSDs on startup. Warning Modify Ceph settings carefully. You are leaving the sandbox tested by Rook. Changing the settings could result in unhealthy daemons or even data loss if used incorrectly. When the Rook Operator creates a cluster, a placeholder ConfigMap is created that will allow you to override Ceph configuration settings. When the daemon pods are started, the settings specified in this ConfigMap will be merged with the default settings generated by Rook. The default override settings are blank. Cutting out the extraneous properties, we would see the following defaults after creating a cluster: 1 kubectl -n rook-ceph get ConfigMap rook-config-override -o yaml 1 2 3 4 5 6 7 kind : ConfigMap apiVersion : v1 metadata : name : rook-config-override namespace : rook-ceph data : config : \"\" To apply your desired configuration, you will need to update this ConfigMap. The next time the daemon pod(s) start, they will use the updated configs. 1 kubectl -n rook-ceph edit configmap rook-config-override Modify the settings and save. Each line you add should be indented from the config property as such: 1 2 3 4 5 6 7 8 9 10 apiVersion : v1 kind : ConfigMap metadata : name : rook-config-override namespace : rook-ceph data : config : | [global] osd crush update on start = false osd pool default size = 2 Custom CSI ceph.conf Settings \u00b6 Warning It is highly recommended to use the default setting that comes with CephCSI and this can only be used when absolutely necessary. The ceph.conf should be reset back to default values if/when the configurations are no longer necessary. If the csi-ceph-conf-override ConfigMap is created before the cluster is started, the CephCSI pods will automatically pick up the settings. If you add the settings to the ConfigMap after the cluster has been initialized, you can restart the Rook operator pod and wait for Rook to recreate CSI pods to take immediate effect. After the CSI pods are restarted, the new settings should be in effect. Example CSI ceph.conf Settings \u00b6 In this Example we will set the rbd_validate_pool to false to skip rbd pool validation. Warning Modify Ceph settings carefully to avoid modifying the default configuration. Changing the settings could result in unexpected results if used incorrectly. 1 kubectl create -f csi-ceph-conf-override.yaml Restart the Rook operator pod and wait for CSI pods to be recreated. OSD CRUSH Settings \u00b6 A useful view of the CRUSH Map is generated with the following command: 1 ceph osd tree In this section we will be tweaking some of the values seen in the output. OSD Weight \u00b6 The CRUSH weight controls the ratio of data that should be distributed to each OSD. This also means a higher or lower amount of disk I/O operations for an OSD with higher/lower weight, respectively. By default OSDs get a weight relative to their storage capacity, which maximizes overall cluster capacity by filling all drives at the same rate, even if drive sizes vary. This should work for most use-cases, but the following situations could warrant weight changes: Your cluster has some relatively slow OSDs or nodes. Lowering their weight can reduce the impact of this bottleneck. You're using bluestore drives provisioned with Rook v0.3.1 or older. In this case you may notice OSD weights did not get set relative to their storage capacity. Changing the weight can fix this and maximize cluster capacity. This example sets the weight of osd.0 which is 600GiB 1 ceph osd crush reweight osd.0 .600 OSD Primary Affinity \u00b6 When pools are set with a size setting greater than one, data is replicated between nodes and OSDs. For every chunk of data a Primary OSD is selected to be used for reading that data to be sent to clients. You can control how likely it is for an OSD to become a Primary using the Primary Affinity setting. This is similar to the OSD weight setting, except it only affects reads on the storage device, not capacity or writes. In this example we will ensure that osd.0 is only selected as Primary if all other OSDs holding data replicas are unavailable: 1 ceph osd primary-affinity osd.0 0 OSD Dedicated Network \u00b6 It is possible to configure ceph to leverage a dedicated network for the OSDs to communicate across. A useful overview is the CEPH Networks section of the Ceph documentation. If you declare a cluster network, OSDs will route heartbeat, object replication and recovery traffic over the cluster network. This may improve performance compared to using a single network, especially when slower network technologies are used, with the tradeoff of additional expense and subtle failure modes. Two changes are necessary to the configuration to enable this capability: Use hostNetwork in the cluster configuration \u00b6 Enable the hostNetwork setting in the Ceph Cluster CRD configuration . For example, 1 2 network : provider : host Important Changing this setting is not supported in a running Rook cluster. Host networking should be configured when the cluster is first created. Define the subnets to use for public and private OSD networks \u00b6 Edit the rook-config-override configmap to define the custom network configuration: 1 kubectl -n rook-ceph edit configmap rook-config-override In the editor, add a custom configuration to instruct ceph which subnet is the public network and which subnet is the private network. For example: 1 2 3 4 5 6 7 8 apiVersion : v1 data : config : | [global] public network = 10.0.7.0/24 cluster network = 10.0.10.0/24 public addr = \"\" cluster addr = \"\" After applying the updated rook-config-override configmap, it will be necessary to restart the OSDs by deleting the OSD pods in order to apply the change. Restart the OSD pods by deleting them, one at a time, and running ceph -s between each restart to ensure the cluster goes back to \"active/clean\" state. Phantom OSD Removal \u00b6 If you have OSDs in which are not showing any disks, you can remove those \"Phantom OSDs\" by following the instructions below. To check for \"Phantom OSDs\", you can run (example output): 1 2 3 4 5 6 $ ceph osd tree ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF -1 57.38062 root default -13 7.17258 host node1.example.com 2 hdd 3.61859 osd.2 up 1.00000 1.00000 -7 0 host node2.example.com down 0 1.00000 The host node2.example.com in the output has no disks, so it is most likely a \"Phantom OSD\". Now to remove it, use the ID in the first column of the output and replace  with it. In the example output above the ID would be -7 . The commands are: 1 2 3 4 ceph osd out  ceph osd crush remove osd. ceph auth del osd. ceph osd rm  To recheck that the Phantom OSD was removed, re-run the following command and check if the OSD with the ID doesn't show up anymore: 1 ceph osd tree Auto Expansion of OSDs \u00b6 Prerequisites for Auto Expansion of OSDs \u00b6 1) A PVC-based cluster deployed in dynamic provisioning environment with a storageClassDeviceSet . 2) Create the Rook Toolbox . Note Prometheus Operator and [Prometheus ../Monitoring/ceph-monitoring.mdnitoring.md#prometheus-instances) are Prerequisites that are created by the auto-grow-storage script. To scale OSDs Vertically \u00b6 Run the following script to auto-grow the size of OSDs on a PVC-based Rook cluster whenever the OSDs have reached the storage near-full threshold. 1 tests/scripts/auto-grow-storage.sh size --max maxSize --growth-rate percent growth-rate percentage represents the percent increase you want in the OSD capacity and maxSize represent the maximum disk size. For example, if you need to increase the size of OSD by 30% and max disk size is 1Ti 1 ./auto-grow-storage.sh size --max 1Ti --growth-rate 30 To scale OSDs Horizontally \u00b6 Run the following script to auto-grow the number of OSDs on a PVC-based Rook cluster whenever the OSDs have reached the storage near-full threshold. 1 tests/scripts/auto-grow-storage.sh count --max maxCount --count rate Count of OSD represents the number of OSDs you need to add and maxCount represents the number of disks a storage cluster will support. For example, if you need to increase the number of OSDs by 3 and maxCount is 10 1 ./auto-grow-storage.sh count --max 10 --count 3","title":"Ceph Configuration"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#prerequisites","text":"Most of the examples make use of the ceph client command. A quick way to use the Ceph client suite is from a Rook Toolbox container . The Kubernetes based examples assume Rook OSD pods are in the rook-ceph namespace. If you run them in a different namespace, modify kubectl -n rook-ceph [...] to fit your situation.","title":"Prerequisites"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#using-alternate-namespaces","text":"If you wish to deploy the Rook Operator and/or Ceph clusters to namespaces other than the default rook-ceph , the manifests are commented to allow for easy sed replacements. Change ROOK_CLUSTER_NAMESPACE to tailor the manifests for additional Ceph clusters. You can choose to also change ROOK_OPERATOR_NAMESPACE to create a new Rook Operator for each Ceph cluster (don't forget to set ROOK_CURRENT_NAMESPACE_ONLY ), or you can leave it at the same value for every Ceph cluster if you only wish to have one Operator manage all Ceph clusters. This will help you manage namespaces more easily, but you should still make sure the resources are configured to your liking. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 cd deploy/examples export ROOK_OPERATOR_NAMESPACE=\"rook-ceph\" export ROOK_CLUSTER_NAMESPACE=\"rook-ceph\" sed -i.bak \\ -e \"s/\\(.*\\):.*# namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE # namespace:operator/g\" \\ -e \"s/\\(.*\\):.*# namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE # namespace:cluster/g\" \\ -e \"s/\\(.*serviceaccount\\):.*:\\(.*\\) # serviceaccount:namespace:operator/\\1:$ROOK_OPERATOR_NAMESPACE:\\2 # serviceaccount:namespace:operator/g\" \\ -e \"s/\\(.*serviceaccount\\):.*:\\(.*\\) # serviceaccount:namespace:cluster/\\1:$ROOK_CLUSTER_NAMESPACE:\\2 # serviceaccount:namespace:cluster/g\" \\ -e \"s/\\(.*\\): [-_A-Za-z0-9]*\\.\\(.*\\) # driver:namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE.\\2 # driver:namespace:operator/g\" \\ -e \"s/\\(.*\\): [-_A-Za-z0-9]*\\.\\(.*\\) # driver:namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE.\\2 # driver:namespace:cluster/g\" \\ common.yaml operator.yaml cluster.yaml # add other files or change these as desired for your config # You need to use ` apply ` for all Ceph clusters after the first if you have only one Operator kubectl apply -f common.yaml -f operator.yaml -f cluster.yaml # add other files as desired for yourconfig","title":"Using alternate namespaces"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#deploying-a-second-cluster","text":"If you wish to create a new CephCluster in a different namespace than rook-ceph while using a single operator to manage both clusters execute the following: 1 2 3 cd deploy/examples NAMESPACE=rook-ceph-secondary envsubst < common-second-cluster.yaml | kubectl create -f - This will create all the necessary RBACs as well as the new namespace. The script assumes that common.yaml was already created. When you create the second CephCluster CR, use the same NAMESPACE and the operator will configure the second cluster.","title":"Deploying a second cluster"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#log-collection","text":"All Rook logs can be collected in a Kubernetes environment with the following command: 1 2 3 4 5 6 7 8 9 for p in $(kubectl -n rook-ceph get pods -o jsonpath='{.items[*].metadata.name}') do for c in $(kubectl -n rook-ceph get pod ${p} -o jsonpath='{.spec.containers[*].name}') do echo \"BEGIN logs from pod: ${p} ${c}\" kubectl -n rook-ceph logs -c ${c} ${p} echo \"END logs from pod: ${p} ${c}\" done done This gets the logs for every container in every Rook pod and then compresses them into a .gz archive for easy sharing. Note that instead of gzip , you could instead pipe to less or to a single text file.","title":"Log Collection"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#osd-information","text":"Keeping track of OSDs and their underlying storage devices can be difficult. The following scripts will clear things up quickly.","title":"OSD Information"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#kubernetes","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 # Get OSD Pods # This uses the example/default cluster name \"rook\" OSD_PODS=$(kubectl get pods --all-namespaces -l \\ app=rook-ceph-osd,rook_cluster=rook-ceph -o jsonpath='{.items[*].metadata.name}') # Find node and drive associations from OSD pods for pod in $(echo ${OSD_PODS}) do echo \"Pod: ${pod}\" echo \"Node: $(kubectl -n rook-ceph get pod ${pod} -o jsonpath='{.spec.nodeName}')\" kubectl -n rook-ceph exec ${pod} -- sh -c '\\ for i in /var/lib/ceph/osd/ceph-*; do [ -f ${i}/ready ] || continue echo -ne \"-$(basename ${i}) \" echo $(lsblk -n -o NAME,SIZE ${i}/block 2> /dev/null || \\ findmnt -n -v -o SOURCE,SIZE -T ${i}) $(cat ${i}/type) done | sort -V echo' done The output should look something like this. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 Pod: osd-m2fz2 Node: node1.zbrbdl -osd0 sda3 557.3G bluestore -osd1 sdf3 110.2G bluestore -osd2 sdd3 277.8G bluestore -osd3 sdb3 557.3G bluestore -osd4 sde3 464.2G bluestore -osd5 sdc3 557.3G bluestore Pod: osd-nxxnq Node: node3.zbrbdl -osd6 sda3 110.7G bluestore -osd17 sdd3 1.8T bluestore -osd18 sdb3 231.8G bluestore -osd19 sdc3 231.8G bluestore Pod: osd-tww1h Node: node2.zbrbdl -osd7 sdc3 464.2G bluestore -osd8 sdj3 557.3G bluestore -osd9 sdf3 66.7G bluestore -osd10 sdd3 464.2G bluestore -osd11 sdb3 147.4G bluestore -osd12 sdi3 557.3G bluestore -osd13 sdk3 557.3G bluestore -osd14 sde3 66.7G bluestore -osd15 sda3 110.2G bluestore -osd16 sdh3 135.1G bluestore","title":"Kubernetes"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#separate-storage-groups","text":"Attention It is deprecated to manually need to set this , the deviceClass property can be used on Pool structures in CephBlockPool , CephFilesystem and CephObjectStore CRD objects. By default Rook/Ceph puts all storage under one replication rule in the CRUSH Map which provides the maximum amount of storage capacity for a cluster. If you would like to use different storage endpoints for different purposes, you'll have to create separate storage groups. In the following example we will separate SSD drives from spindle-based drives, a common practice for those looking to target certain workloads onto faster (database) or slower (file archive) storage.","title":"Separate Storage Groups"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#configuring-pools","text":"","title":"Configuring Pools"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#placement-group-sizing","text":"Note Since Ceph Nautilus (v14.x), you can use the Ceph MGR pg_autoscaler module to auto scale the PGs as needed. It is highly advisable to configure default pg_num value on per-pool basis, If you want to enable this feature, please refer to Default PG and PGP counts . The general rules for deciding how many PGs your pool(s) should contain is: Fewer than 5 OSDs set pg_num to 128 Between 5 and 10 OSDs set pg_num to 512 Between 10 and 50 OSDs set pg_num to 1024 If you have more than 50 OSDs, you need to understand the tradeoffs and how to calculate the pg_num value by yourself. For calculating pg_num yourself please make use of the pgcalc tool .","title":"Placement Group Sizing"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#setting-pg-count","text":"Be sure to read the placement group sizing section before changing the number of PGs. 1 2 # Set the number of PGs in the rbd pool to 512 ceph osd pool set rbd pg_num 512","title":"Setting PG Count"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#custom-cephconf-settings","text":"Warning The advised method for controlling Ceph configuration is to manually use the Ceph CLI or the Ceph dashboard because this offers the most flexibility. It is highly recommended that this only be used when absolutely necessary and that the config be reset to an empty string if/when the configurations are no longer necessary. Configurations in the config file will make the Ceph cluster less configurable from the CLI and dashboard and may make future tuning or debugging difficult. Setting configs via Ceph's CLI requires that at least one mon be available for the configs to be set, and setting configs via dashboard requires at least one mgr to be available. Ceph also has a number of very advanced settings that cannot be modified easily via the CLI or dashboard. In order to set configurations before monitors are available or to set advanced configuration settings, the rook-config-override ConfigMap exists, and the config field can be set with the contents of a ceph.conf file. The contents will be propagated to all mon, mgr, OSD, MDS, and RGW daemons as an /etc/ceph/ceph.conf file. Warning Rook performs no validation on the config, so the validity of the settings is the user's responsibility. If the rook-config-override ConfigMap is created before the cluster is started, the Ceph daemons will automatically pick up the settings. If you add the settings to the ConfigMap after the cluster has been initialized, each daemon will need to be restarted where you want the settings applied: mons: ensure all three mons are online and healthy before restarting each mon pod, one at a time. mgrs: the pods are stateless and can be restarted as needed, but note that this will disrupt the Ceph dashboard during restart. OSDs: restart your the pods by deleting them, one at a time, and running ceph -s between each restart to ensure the cluster goes back to \"active/clean\" state. RGW: the pods are stateless and can be restarted as needed. MDS: the pods are stateless and can be restarted as needed. After the pod restart, the new settings should be in effect. Note that if the ConfigMap in the Ceph cluster's namespace is created before the cluster is created, the daemons will pick up the settings at first launch. To automate the restart of the Ceph daemon pods, you will need to trigger an update to the pod specs. The simplest way to trigger the update is to add annotations or labels to the CephCluster CR for the daemons you want to restart. The operator will then proceed with a rolling update, similar to any other update to the cluster.","title":"Custom ceph.conf Settings"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#example","text":"In this example we will set the default pool size to two, and tell OSD daemons not to change the weight of OSDs on startup. Warning Modify Ceph settings carefully. You are leaving the sandbox tested by Rook. Changing the settings could result in unhealthy daemons or even data loss if used incorrectly. When the Rook Operator creates a cluster, a placeholder ConfigMap is created that will allow you to override Ceph configuration settings. When the daemon pods are started, the settings specified in this ConfigMap will be merged with the default settings generated by Rook. The default override settings are blank. Cutting out the extraneous properties, we would see the following defaults after creating a cluster: 1 kubectl -n rook-ceph get ConfigMap rook-config-override -o yaml 1 2 3 4 5 6 7 kind : ConfigMap apiVersion : v1 metadata : name : rook-config-override namespace : rook-ceph data : config : \"\" To apply your desired configuration, you will need to update this ConfigMap. The next time the daemon pod(s) start, they will use the updated configs. 1 kubectl -n rook-ceph edit configmap rook-config-override Modify the settings and save. Each line you add should be indented from the config property as such: 1 2 3 4 5 6 7 8 9 10 apiVersion : v1 kind : ConfigMap metadata : name : rook-config-override namespace : rook-ceph data : config : | [global] osd crush update on start = false osd pool default size = 2","title":"Example"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#custom-csi-cephconf-settings","text":"Warning It is highly recommended to use the default setting that comes with CephCSI and this can only be used when absolutely necessary. The ceph.conf should be reset back to default values if/when the configurations are no longer necessary. If the csi-ceph-conf-override ConfigMap is created before the cluster is started, the CephCSI pods will automatically pick up the settings. If you add the settings to the ConfigMap after the cluster has been initialized, you can restart the Rook operator pod and wait for Rook to recreate CSI pods to take immediate effect. After the CSI pods are restarted, the new settings should be in effect.","title":"Custom CSI ceph.conf Settings"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#example-csi-cephconf-settings","text":"In this Example we will set the rbd_validate_pool to false to skip rbd pool validation. Warning Modify Ceph settings carefully to avoid modifying the default configuration. Changing the settings could result in unexpected results if used incorrectly. 1 kubectl create -f csi-ceph-conf-override.yaml Restart the Rook operator pod and wait for CSI pods to be recreated.","title":"Example CSI ceph.conf Settings"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#osd-crush-settings","text":"A useful view of the CRUSH Map is generated with the following command: 1 ceph osd tree In this section we will be tweaking some of the values seen in the output.","title":"OSD CRUSH Settings"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#osd-weight","text":"The CRUSH weight controls the ratio of data that should be distributed to each OSD. This also means a higher or lower amount of disk I/O operations for an OSD with higher/lower weight, respectively. By default OSDs get a weight relative to their storage capacity, which maximizes overall cluster capacity by filling all drives at the same rate, even if drive sizes vary. This should work for most use-cases, but the following situations could warrant weight changes: Your cluster has some relatively slow OSDs or nodes. Lowering their weight can reduce the impact of this bottleneck. You're using bluestore drives provisioned with Rook v0.3.1 or older. In this case you may notice OSD weights did not get set relative to their storage capacity. Changing the weight can fix this and maximize cluster capacity. This example sets the weight of osd.0 which is 600GiB 1 ceph osd crush reweight osd.0 .600","title":"OSD Weight"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#osd-primary-affinity","text":"When pools are set with a size setting greater than one, data is replicated between nodes and OSDs. For every chunk of data a Primary OSD is selected to be used for reading that data to be sent to clients. You can control how likely it is for an OSD to become a Primary using the Primary Affinity setting. This is similar to the OSD weight setting, except it only affects reads on the storage device, not capacity or writes. In this example we will ensure that osd.0 is only selected as Primary if all other OSDs holding data replicas are unavailable: 1 ceph osd primary-affinity osd.0 0","title":"OSD Primary Affinity"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#osd-dedicated-network","text":"It is possible to configure ceph to leverage a dedicated network for the OSDs to communicate across. A useful overview is the CEPH Networks section of the Ceph documentation. If you declare a cluster network, OSDs will route heartbeat, object replication and recovery traffic over the cluster network. This may improve performance compared to using a single network, especially when slower network technologies are used, with the tradeoff of additional expense and subtle failure modes. Two changes are necessary to the configuration to enable this capability:","title":"OSD Dedicated Network"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#use-hostnetwork-in-the-cluster-configuration","text":"Enable the hostNetwork setting in the Ceph Cluster CRD configuration . For example, 1 2 network : provider : host Important Changing this setting is not supported in a running Rook cluster. Host networking should be configured when the cluster is first created.","title":"Use hostNetwork in the cluster configuration"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#define-the-subnets-to-use-for-public-and-private-osd-networks","text":"Edit the rook-config-override configmap to define the custom network configuration: 1 kubectl -n rook-ceph edit configmap rook-config-override In the editor, add a custom configuration to instruct ceph which subnet is the public network and which subnet is the private network. For example: 1 2 3 4 5 6 7 8 apiVersion : v1 data : config : | [global] public network = 10.0.7.0/24 cluster network = 10.0.10.0/24 public addr = \"\" cluster addr = \"\" After applying the updated rook-config-override configmap, it will be necessary to restart the OSDs by deleting the OSD pods in order to apply the change. Restart the OSD pods by deleting them, one at a time, and running ceph -s between each restart to ensure the cluster goes back to \"active/clean\" state.","title":"Define the subnets to use for public and private OSD networks"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#phantom-osd-removal","text":"If you have OSDs in which are not showing any disks, you can remove those \"Phantom OSDs\" by following the instructions below. To check for \"Phantom OSDs\", you can run (example output): 1 2 3 4 5 6 $ ceph osd tree ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF -1 57.38062 root default -13 7.17258 host node1.example.com 2 hdd 3.61859 osd.2 up 1.00000 1.00000 -7 0 host node2.example.com down 0 1.00000 The host node2.example.com in the output has no disks, so it is most likely a \"Phantom OSD\". Now to remove it, use the ID in the first column of the output and replace  with it. In the example output above the ID would be -7 . The commands are: 1 2 3 4 ceph osd out  ceph osd crush remove osd. ceph auth del osd. ceph osd rm  To recheck that the Phantom OSD was removed, re-run the following command and check if the OSD with the ID doesn't show up anymore: 1 ceph osd tree","title":"Phantom OSD Removal"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#auto-expansion-of-osds","text":"","title":"Auto Expansion of OSDs"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#prerequisites-for-auto-expansion-of-osds","text":"1) A PVC-based cluster deployed in dynamic provisioning environment with a storageClassDeviceSet . 2) Create the Rook Toolbox . Note Prometheus Operator and [Prometheus ../Monitoring/ceph-monitoring.mdnitoring.md#prometheus-instances) are Prerequisites that are created by the auto-grow-storage script.","title":"Prerequisites for Auto Expansion of OSDs"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#to-scale-osds-vertically","text":"Run the following script to auto-grow the size of OSDs on a PVC-based Rook cluster whenever the OSDs have reached the storage near-full threshold. 1 tests/scripts/auto-grow-storage.sh size --max maxSize --growth-rate percent growth-rate percentage represents the percent increase you want in the OSD capacity and maxSize represent the maximum disk size. For example, if you need to increase the size of OSD by 30% and max disk size is 1Ti 1 ./auto-grow-storage.sh size --max 1Ti --growth-rate 30","title":"To scale OSDs Vertically"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#to-scale-osds-horizontally","text":"Run the following script to auto-grow the number of OSDs on a PVC-based Rook cluster whenever the OSDs have reached the storage near-full threshold. 1 tests/scripts/auto-grow-storage.sh count --max maxCount --count rate Count of OSD represents the number of OSDs you need to add and maxCount represents the number of disks a storage cluster will support. For example, if you need to increase the number of OSDs by 3 and maxCount is 10 1 ./auto-grow-storage.sh count --max 10 --count 3","title":"To scale OSDs Horizontally"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/","text":"Failure in a distributed system is to be expected. Ceph was designed from the ground up to deal with the failures of a distributed system. At the next layer, Rook was designed from the ground up to automate recovery of Ceph components that traditionally required admin intervention. Monitor health is the most critical piece of the equation that Rook actively monitors. If they are not in a good state, the operator will take action to restore their health and keep your cluster protected from disaster. The Ceph monitors (mons) are the brains of the distributed cluster. They control all of the metadata that is necessary to store and retrieve your data as well as keep it safe. If the monitors are not in a healthy state you will risk losing all the data in your system. Monitor Identity \u00b6 Each monitor in a Ceph cluster has a static identity. Every component in the cluster is aware of the identity, and that identity must be immutable. The identity of a mon is its IP address. To have an immutable IP address in Kubernetes, Rook creates a K8s service for each monitor. The clusterIP of the service will act as the stable identity. When a monitor pod starts, it will bind to its podIP and it will expect communication to be via its service IP address. Monitor Quorum \u00b6 Multiple mons work together to provide redundancy by each keeping a copy of the metadata. A variation of the distributed algorithm Paxos is used to establish consensus about the state of the cluster. Paxos requires a super-majority of mons to be running in order to establish quorum and perform operations in the cluster. If the majority of mons are not running, quorum is lost and nothing can be done in the cluster. How many mons? \u00b6 Most commonly a cluster will have three mons. This would mean that one mon could go down and allow the cluster to remain healthy. You would still have 2/3 mons running to give you consensus in the cluster for any operation. For highest availability, an odd number of mons is required. Fifty percent of mons will not be sufficient to maintain quorum. If you had two mons and one of them went down, you would have 1/2 of quorum. Since that is not a super-majority, the cluster would have to wait until the second mon is up again. Rook allows an even number of mons for higher durability. See the disaster recovery guide if quorum is lost and to recover mon quorum from a single mon. The number of mons to create in a cluster depends on your tolerance for losing a node. If you have 1 mon zero nodes can be lost to maintain quorum. With 3 mons one node can be lost, and with 5 mons two nodes can be lost. Because the Rook operator will automatically start a new monitor if one dies, you typically only need three mons. The more mons you have, the more overhead there will be to make a change to the cluster, which could become a performance issue in a large cluster. Mitigating Monitor Failure \u00b6 Whatever the reason that a mon may fail (power failure, software crash, software hang, etc), there are several layers of mitigation in place to help recover the mon. It is always better to bring an existing mon back up than to failover to bring up a new mon. The Rook operator creates a mon with a Deployment to ensure that the mon pod will always be restarted if it fails. If a mon pod stops for any reason, Kubernetes will automatically start the pod up again. In order for a mon to support a pod/node restart, the mon metadata is persisted to disk, either under the dataDirHostPath specified in the CephCluster CR, or in the volume defined by the volumeClaimTemplate in the CephCluster CR. This will allow the mon to start back up with its existing metadata and continue where it left off even if the pod had to be re-created. Without this persistence, the mon cannot restart. Failing over a Monitor \u00b6 If a mon is unhealthy and the K8s pod restart or liveness probe are not sufficient to bring a mon back up, the operator will make the decision to terminate the unhealthy monitor deployment and bring up a new monitor with a new identity. This is an operation that must be done while mon quorum is maintained by other mons in the cluster. The operator checks for mon health every 45 seconds. If a monitor is down, the operator will wait 10 minutes before failing over the unhealthy mon. These two intervals can be configured as parameters to the CephCluster CR (see below). If the intervals are too short, it could be unhealthy if the mons are failed over too aggressively. If the intervals are too long, the cluster could be at risk of losing quorum if a new monitor is not brought up before another mon fails. 1 2 3 4 5 6 healthCheck : daemonHealth : mon : disabled : false interval : 45s timeout : 10m If you want to force a mon to failover for testing or other purposes, you can scale down the mon deployment to 0, then wait for the timeout. Note that the operator may scale up the mon again automatically if the operator is restarted or if a full reconcile is triggered, such as when the CephCluster CR is updated. If the mon pod is in pending state and couldn't be assigned to a node (say, due to node drain), then the operator will wait for the timeout again before the mon failover. So the timeout waiting for the mon failover will be doubled in this case. To disable monitor automatic failover, the timeout can be set to 0 , if the monitor goes out of quorum Rook will never fail it over onto another node. This is especially useful for planned maintenance. Example Failover \u00b6 Rook will create mons with pod names such as mon-a, mon-b, and mon-c. Let's say mon-b had an issue and the pod failed. 1 2 3 4 5 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-74dc96545-ch5ns 1/1 Running 0 9m rook-ceph-mon-b-6b9d895c4c-bcl2h 1/1 Error 2 9m rook-ceph-mon-c-7d6df6d65c-5cjwl 1/1 Running 0 8m After a failover, you will see the unhealthy mon removed and a new mon added such as mon-d. A fully healthy mon quorum is now running again. 1 2 3 4 5 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-74dc96545-ch5ns 1/1 Running 0 19m rook-ceph-mon-c-7d6df6d65c-5cjwl 1/1 Running 0 18m rook-ceph-mon-d-9e7ea7e76d-4bhxm 1/1 Running 0 20s From the toolbox we can verify the status of the health mon quorum: 1 2 3 4 5 6 7 8 9 10 $ ceph -s cluster: id: 35179270-8a39-4e08-a352-a10c52bb04ff health: HEALTH_OK services: mon: 3 daemons, quorum a,b,d (age 2m) mgr: a(active, since 12m) osd: 3 osds: 3 up (since 10m), 3 in (since 10m) [...]","title":"Monitor Health"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#monitor-identity","text":"Each monitor in a Ceph cluster has a static identity. Every component in the cluster is aware of the identity, and that identity must be immutable. The identity of a mon is its IP address. To have an immutable IP address in Kubernetes, Rook creates a K8s service for each monitor. The clusterIP of the service will act as the stable identity. When a monitor pod starts, it will bind to its podIP and it will expect communication to be via its service IP address.","title":"Monitor Identity"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#monitor-quorum","text":"Multiple mons work together to provide redundancy by each keeping a copy of the metadata. A variation of the distributed algorithm Paxos is used to establish consensus about the state of the cluster. Paxos requires a super-majority of mons to be running in order to establish quorum and perform operations in the cluster. If the majority of mons are not running, quorum is lost and nothing can be done in the cluster.","title":"Monitor Quorum"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#how-many-mons","text":"Most commonly a cluster will have three mons. This would mean that one mon could go down and allow the cluster to remain healthy. You would still have 2/3 mons running to give you consensus in the cluster for any operation. For highest availability, an odd number of mons is required. Fifty percent of mons will not be sufficient to maintain quorum. If you had two mons and one of them went down, you would have 1/2 of quorum. Since that is not a super-majority, the cluster would have to wait until the second mon is up again. Rook allows an even number of mons for higher durability. See the disaster recovery guide if quorum is lost and to recover mon quorum from a single mon. The number of mons to create in a cluster depends on your tolerance for losing a node. If you have 1 mon zero nodes can be lost to maintain quorum. With 3 mons one node can be lost, and with 5 mons two nodes can be lost. Because the Rook operator will automatically start a new monitor if one dies, you typically only need three mons. The more mons you have, the more overhead there will be to make a change to the cluster, which could become a performance issue in a large cluster.","title":"How many mons?"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#mitigating-monitor-failure","text":"Whatever the reason that a mon may fail (power failure, software crash, software hang, etc), there are several layers of mitigation in place to help recover the mon. It is always better to bring an existing mon back up than to failover to bring up a new mon. The Rook operator creates a mon with a Deployment to ensure that the mon pod will always be restarted if it fails. If a mon pod stops for any reason, Kubernetes will automatically start the pod up again. In order for a mon to support a pod/node restart, the mon metadata is persisted to disk, either under the dataDirHostPath specified in the CephCluster CR, or in the volume defined by the volumeClaimTemplate in the CephCluster CR. This will allow the mon to start back up with its existing metadata and continue where it left off even if the pod had to be re-created. Without this persistence, the mon cannot restart.","title":"Mitigating Monitor Failure"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#failing-over-a-monitor","text":"If a mon is unhealthy and the K8s pod restart or liveness probe are not sufficient to bring a mon back up, the operator will make the decision to terminate the unhealthy monitor deployment and bring up a new monitor with a new identity. This is an operation that must be done while mon quorum is maintained by other mons in the cluster. The operator checks for mon health every 45 seconds. If a monitor is down, the operator will wait 10 minutes before failing over the unhealthy mon. These two intervals can be configured as parameters to the CephCluster CR (see below). If the intervals are too short, it could be unhealthy if the mons are failed over too aggressively. If the intervals are too long, the cluster could be at risk of losing quorum if a new monitor is not brought up before another mon fails. 1 2 3 4 5 6 healthCheck : daemonHealth : mon : disabled : false interval : 45s timeout : 10m If you want to force a mon to failover for testing or other purposes, you can scale down the mon deployment to 0, then wait for the timeout. Note that the operator may scale up the mon again automatically if the operator is restarted or if a full reconcile is triggered, such as when the CephCluster CR is updated. If the mon pod is in pending state and couldn't be assigned to a node (say, due to node drain), then the operator will wait for the timeout again before the mon failover. So the timeout waiting for the mon failover will be doubled in this case. To disable monitor automatic failover, the timeout can be set to 0 , if the monitor goes out of quorum Rook will never fail it over onto another node. This is especially useful for planned maintenance.","title":"Failing over a Monitor"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#example-failover","text":"Rook will create mons with pod names such as mon-a, mon-b, and mon-c. Let's say mon-b had an issue and the pod failed. 1 2 3 4 5 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-74dc96545-ch5ns 1/1 Running 0 9m rook-ceph-mon-b-6b9d895c4c-bcl2h 1/1 Error 2 9m rook-ceph-mon-c-7d6df6d65c-5cjwl 1/1 Running 0 8m After a failover, you will see the unhealthy mon removed and a new mon added such as mon-d. A fully healthy mon quorum is now running again. 1 2 3 4 5 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-74dc96545-ch5ns 1/1 Running 0 19m rook-ceph-mon-c-7d6df6d65c-5cjwl 1/1 Running 0 18m rook-ceph-mon-d-9e7ea7e76d-4bhxm 1/1 Running 0 20s From the toolbox we can verify the status of the health mon quorum: 1 2 3 4 5 6 7 8 9 10 $ ceph -s cluster: id: 35179270-8a39-4e08-a352-a10c52bb04ff health: HEALTH_OK services: mon: 3 daemons, quorum a,b,d (age 2m) mgr: a(active, since 12m) osd: 3 osds: 3 up (since 10m), 3 in (since 10m) [...]","title":"Example Failover"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/","text":"Ceph Object Storage Daemons (OSDs) are the heart and soul of the Ceph storage platform. Each OSD manages a local device and together they provide the distributed storage. Rook will automate creation and management of OSDs to hide the complexity based on the desired state in the CephCluster CR as much as possible. This guide will walk through some of the scenarios to configure OSDs where more configuration may be required. OSD Health \u00b6 The rook-ceph-tools pod provides a simple environment to run Ceph tools. The ceph commands mentioned in this document should be run from the toolbox. Once the is created, connect to the pod to execute the ceph commands to analyze the health of the cluster, in particular the OSDs and placement groups (PGs). Some common commands to analyze OSDs include: 1 2 3 4 5 ceph status ceph osd tree ceph osd status ceph osd df ceph osd utilization 1 kubectl -n rook-ceph exec -it $(kubectl -n rook-ceph get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[0].metadata.name}') bash Add an OSD \u00b6 The QuickStart Guide will provide the basic steps to create a cluster and start some OSDs. For more details on the OSD settings also see the Cluster CRD documentation. If you are not seeing OSDs created, see the Ceph Troubleshooting Guide . To add more OSDs, Rook will automatically watch for new nodes and devices being added to your cluster. If they match the filters or other settings in the storage section of the cluster CR, the operator will create new OSDs. Add an OSD on a PVC \u00b6 In more dynamic environments where storage can be dynamically provisioned with a raw block storage provider, the OSDs can be backed by PVCs. See the storageClassDeviceSets documentation in the Cluster CRD topic. To add more OSDs, you can either increase the count of the OSDs in an existing device set or you can add more device sets to the cluster CR. The operator will then automatically create new OSDs according to the updated cluster CR. Remove an OSD \u00b6 To remove an OSD due to a failed disk or other re-configuration, consider the following to ensure the health of the data through the removal process: Confirm you will have enough space on your cluster after removing your OSDs to properly handle the deletion Confirm the remaining OSDs and their placement groups (PGs) are healthy in order to handle the rebalancing of the data Do not remove too many OSDs at once Wait for rebalancing between removing multiple OSDs If all the PGs are active+clean and there are no warnings about being low on space, this means the data is fully replicated and it is safe to proceed. If an OSD is failing, the PGs will not be perfectly clean and you will need to proceed anyway. Host-based cluster \u00b6 Update your CephCluster CR. Depending on your CR settings, you may need to remove the device from the list or update the device filter. If you are using useAllDevices: true , no change to the CR is necessary. Important On host-based clusters, you may need to stop the Rook Operator while performing OSD removal steps in order to prevent Rook from detecting the old OSD and trying to re-create it before the disk is wiped or removed. To stop the Rook Operator, run: 1 kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas=0 You must perform steps below to (1) purge the OSD and either (2.a) delete the underlying data or (2.b)replace the disk before starting the Rook Operator again. Once you have done that, you can start the Rook operator again with: 1 kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas=1 PVC-based cluster \u00b6 To reduce the storage in your cluster or remove a failed OSD on a PVC: Shrink the number of OSDs in the storageClassDeviceSets in the CephCluster CR. If you have multiple device sets, you may need to change the index of 0 in this example path. kubectl -n rook-ceph patch CephCluster rook-ceph --type=json -p '[{\"op\": \"replace\", \"path\": \"/spec/storage/storageClassDeviceSets/0/count\", \"value\":}]' Reduce the count of the OSDs to the desired number. Rook will not take any action to automatically remove the extra OSD(s). Identify the PVC that belongs to the OSD that is failed or otherwise being removed. kubectl -n rook-ceph get pvc -l ceph.rook.io/DeviceSet= Identify the OSD you desire to remove. The OSD assigned to the PVC can be found in the labels on the PVC kubectl -n rook-ceph get pod -l ceph.rook.io/pvc= -o yaml | grep ceph-osd-id For example, this might return: ceph-osd-id: \"0\" Remember the OSD ID for purging the OSD below If you later increase the count in the device set, note that the operator will create PVCs with the highest index that is not currently in use by existing OSD PVCs. Confirm the OSD is down \u00b6 If you want to remove an unhealthy OSD, the osd pod may be in an error state such as CrashLoopBackoff or the ceph commands in the toolbox may show which OSD is down . If you want to remove a healthy OSD, you should run the following commands: 1 2 3 $ kubectl -n rook-ceph scale deployment rook-ceph-osd- --replicas = 0 # Inside the toolbox $ ceph osd down osd. Purge the OSD with Krew \u00b6 Note The rook-ceph Krew plugin must be installed 1 2 3 4 5 6 7 8 9 kubectl rook-ceph rook purge-osd 0 --force # 2022-09-14 08:58:28.888431 I | rookcmd: starting Rook v1.10.0-alpha.0.164.gcb73f728c with arguments 'rook ceph osd remove --osd-ids=0 --force-osd-removal=true' # 2022-09-14 08:58:28.889217 I | rookcmd: flag values: --force-osd-removal=true, --help=false, --log-level=INFO, --operator-image=, --osd-ids=0, --preserve-pvc=false, --service-account= # 2022-09-14 08:58:28.889582 I | op-mon: parsing mon endpoints: b=10.106.118.240:6789 # 2022-09-14 08:58:28.898898 I | cephclient: writing config file /var/lib/rook/rook-ceph/rook-ceph.config # 2022-09-14 08:58:28.899567 I | cephclient: generated admin config in /var/lib/rook/rook-ceph # 2022-09-14 08:58:29.421345 I | cephosd: validating status of osd.0 --- Purge the OSD with a Job \u00b6 OSD removal can be automated with the example found in the rook-ceph-purge-osd job . In the osd-purge.yaml, change the  to the ID(s) of the OSDs you want to remove. Run the job: kubectl create -f osd-purge.yaml When the job is completed, review the logs to ensure success: kubectl -n rook-ceph logs -l app=rook-ceph-purge-osd When finished, you can delete the job: kubectl delete -f osd-purge.yaml If you want to remove OSDs by hand, continue with the following sections. However, we recommend you use the above-mentioned steps to avoid operation errors. Purge the OSD manually \u00b6 If the OSD purge job fails or you need fine-grained control of the removal, here are the individual commands that can be run from the toolbox. Detach the OSD PVC from Rook kubectl -n rook-ceph label pvc  ceph.rook.io/DeviceSetPVCId- Mark the OSD as out if not already marked as such by Ceph. This signals Ceph to start moving (backfilling) the data that was on that OSD to another OSD. ceph osd out osd. (for example if the OSD ID is 23 this would be ceph osd out osd.23 ) Wait for the data to finish backfilling to other OSDs. ceph status will indicate the backfilling is done when all of the PGs are active+clean . If desired, it's safe to remove the disk after that. Remove the OSD from the Ceph cluster ceph osd purge  --yes-i-really-mean-it Verify the OSD is removed from the node in the CRUSH map ceph osd tree The operator can automatically remove OSD deployments that are considered \"safe-to-destroy\" by Ceph. After the steps above, the OSD will be considered safe to remove since the data has all been moved to other OSDs. But this will only be done automatically by the operator if you have this setting in the cluster CR: 1 removeOSDsIfOutAndSafeToRemove : true Otherwise, you will need to delete the deployment directly: 1 kubectl delete deployment -n rook-ceph rook-ceph-osd- In PVC-based cluster, remove the orphaned PVC, if necessary. Delete the underlying data \u00b6 If you want to clean the device where the OSD was running, see in the instructions to wipe a disk on the Cleaning up a Cluster topic. Replace an OSD \u00b6 To replace a disk that has failed: Run the steps in the previous section to Remove an OSD . Replace the physical device and verify the new device is attached. Check if your cluster CR will find the new device. If you are using useAllDevices: true you can skip this step. If your cluster CR lists individual devices or uses a device filter you may need to update the CR. The operator ideally will automatically create the new OSD within a few minutes of adding the new device or updating the CR. If you don't see a new OSD automatically created, restart the operator (by deleting the operator pod) to trigger the OSD creation. Verify if the OSD is created on the node by running ceph osd tree from the toolbox. Note The OSD might have a different ID than the previous OSD that was replaced.","title":"Ceph OSD Management"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#osd-health","text":"The rook-ceph-tools pod provides a simple environment to run Ceph tools. The ceph commands mentioned in this document should be run from the toolbox. Once the is created, connect to the pod to execute the ceph commands to analyze the health of the cluster, in particular the OSDs and placement groups (PGs). Some common commands to analyze OSDs include: 1 2 3 4 5 ceph status ceph osd tree ceph osd status ceph osd df ceph osd utilization 1 kubectl -n rook-ceph exec -it $(kubectl -n rook-ceph get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[0].metadata.name}') bash","title":"OSD Health"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#add-an-osd","text":"The QuickStart Guide will provide the basic steps to create a cluster and start some OSDs. For more details on the OSD settings also see the Cluster CRD documentation. If you are not seeing OSDs created, see the Ceph Troubleshooting Guide . To add more OSDs, Rook will automatically watch for new nodes and devices being added to your cluster. If they match the filters or other settings in the storage section of the cluster CR, the operator will create new OSDs.","title":"Add an OSD"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#add-an-osd-on-a-pvc","text":"In more dynamic environments where storage can be dynamically provisioned with a raw block storage provider, the OSDs can be backed by PVCs. See the storageClassDeviceSets documentation in the Cluster CRD topic. To add more OSDs, you can either increase the count of the OSDs in an existing device set or you can add more device sets to the cluster CR. The operator will then automatically create new OSDs according to the updated cluster CR.","title":"Add an OSD on a PVC"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#remove-an-osd","text":"To remove an OSD due to a failed disk or other re-configuration, consider the following to ensure the health of the data through the removal process: Confirm you will have enough space on your cluster after removing your OSDs to properly handle the deletion Confirm the remaining OSDs and their placement groups (PGs) are healthy in order to handle the rebalancing of the data Do not remove too many OSDs at once Wait for rebalancing between removing multiple OSDs If all the PGs are active+clean and there are no warnings about being low on space, this means the data is fully replicated and it is safe to proceed. If an OSD is failing, the PGs will not be perfectly clean and you will need to proceed anyway.","title":"Remove an OSD"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#host-based-cluster","text":"Update your CephCluster CR. Depending on your CR settings, you may need to remove the device from the list or update the device filter. If you are using useAllDevices: true , no change to the CR is necessary. Important On host-based clusters, you may need to stop the Rook Operator while performing OSD removal steps in order to prevent Rook from detecting the old OSD and trying to re-create it before the disk is wiped or removed. To stop the Rook Operator, run: 1 kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas=0 You must perform steps below to (1) purge the OSD and either (2.a) delete the underlying data or (2.b)replace the disk before starting the Rook Operator again. Once you have done that, you can start the Rook operator again with: 1 kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas=1","title":"Host-based cluster"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#pvc-based-cluster","text":"To reduce the storage in your cluster or remove a failed OSD on a PVC: Shrink the number of OSDs in the storageClassDeviceSets in the CephCluster CR. If you have multiple device sets, you may need to change the index of 0 in this example path. kubectl -n rook-ceph patch CephCluster rook-ceph --type=json -p '[{\"op\": \"replace\", \"path\": \"/spec/storage/storageClassDeviceSets/0/count\", \"value\":}]' Reduce the count of the OSDs to the desired number. Rook will not take any action to automatically remove the extra OSD(s). Identify the PVC that belongs to the OSD that is failed or otherwise being removed. kubectl -n rook-ceph get pvc -l ceph.rook.io/DeviceSet= Identify the OSD you desire to remove. The OSD assigned to the PVC can be found in the labels on the PVC kubectl -n rook-ceph get pod -l ceph.rook.io/pvc= -o yaml | grep ceph-osd-id For example, this might return: ceph-osd-id: \"0\" Remember the OSD ID for purging the OSD below If you later increase the count in the device set, note that the operator will create PVCs with the highest index that is not currently in use by existing OSD PVCs.","title":"PVC-based cluster"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#confirm-the-osd-is-down","text":"If you want to remove an unhealthy OSD, the osd pod may be in an error state such as CrashLoopBackoff or the ceph commands in the toolbox may show which OSD is down . If you want to remove a healthy OSD, you should run the following commands: 1 2 3 $ kubectl -n rook-ceph scale deployment rook-ceph-osd- --replicas = 0 # Inside the toolbox $ ceph osd down osd.","title":"Confirm the OSD is down"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#purge-the-osd-with-krew","text":"Note The rook-ceph Krew plugin must be installed 1 2 3 4 5 6 7 8 9 kubectl rook-ceph rook purge-osd 0 --force # 2022-09-14 08:58:28.888431 I | rookcmd: starting Rook v1.10.0-alpha.0.164.gcb73f728c with arguments 'rook ceph osd remove --osd-ids=0 --force-osd-removal=true' # 2022-09-14 08:58:28.889217 I | rookcmd: flag values: --force-osd-removal=true, --help=false, --log-level=INFO, --operator-image=, --osd-ids=0, --preserve-pvc=false, --service-account= # 2022-09-14 08:58:28.889582 I | op-mon: parsing mon endpoints: b=10.106.118.240:6789 # 2022-09-14 08:58:28.898898 I | cephclient: writing config file /var/lib/rook/rook-ceph/rook-ceph.config # 2022-09-14 08:58:28.899567 I | cephclient: generated admin config in /var/lib/rook/rook-ceph # 2022-09-14 08:58:29.421345 I | cephosd: validating status of osd.0 ---","title":"Purge the OSD with Krew"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#purge-the-osd-with-a-job","text":"OSD removal can be automated with the example found in the rook-ceph-purge-osd job . In the osd-purge.yaml, change the  to the ID(s) of the OSDs you want to remove. Run the job: kubectl create -f osd-purge.yaml When the job is completed, review the logs to ensure success: kubectl -n rook-ceph logs -l app=rook-ceph-purge-osd When finished, you can delete the job: kubectl delete -f osd-purge.yaml If you want to remove OSDs by hand, continue with the following sections. However, we recommend you use the above-mentioned steps to avoid operation errors.","title":"Purge the OSD with a Job"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#purge-the-osd-manually","text":"If the OSD purge job fails or you need fine-grained control of the removal, here are the individual commands that can be run from the toolbox. Detach the OSD PVC from Rook kubectl -n rook-ceph label pvc  ceph.rook.io/DeviceSetPVCId- Mark the OSD as out if not already marked as such by Ceph. This signals Ceph to start moving (backfilling) the data that was on that OSD to another OSD. ceph osd out osd. (for example if the OSD ID is 23 this would be ceph osd out osd.23 ) Wait for the data to finish backfilling to other OSDs. ceph status will indicate the backfilling is done when all of the PGs are active+clean . If desired, it's safe to remove the disk after that. Remove the OSD from the Ceph cluster ceph osd purge  --yes-i-really-mean-it Verify the OSD is removed from the node in the CRUSH map ceph osd tree The operator can automatically remove OSD deployments that are considered \"safe-to-destroy\" by Ceph. After the steps above, the OSD will be considered safe to remove since the data has all been moved to other OSDs. But this will only be done automatically by the operator if you have this setting in the cluster CR: 1 removeOSDsIfOutAndSafeToRemove : true Otherwise, you will need to delete the deployment directly: 1 kubectl delete deployment -n rook-ceph rook-ceph-osd- In PVC-based cluster, remove the orphaned PVC, if necessary.","title":"Purge the OSD manually"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#delete-the-underlying-data","text":"If you want to clean the device where the OSD was running, see in the instructions to wipe a disk on the Cleaning up a Cluster topic.","title":"Delete the underlying data"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#replace-an-osd","text":"To replace a disk that has failed: Run the steps in the previous section to Remove an OSD . Replace the physical device and verify the new device is attached. Check if your cluster CR will find the new device. If you are using useAllDevices: true you can skip this step. If your cluster CR lists individual devices or uses a device filter you may need to update the CR. The operator ideally will automatically create the new OSD within a few minutes of adding the new device or updating the CR. If you don't see a new OSD automatically created, restart the operator (by deleting the operator pod) to trigger the OSD creation. Verify if the OSD is created on the node by running ceph osd tree from the toolbox. Note The OSD might have a different ID than the previous OSD that was replaced.","title":"Replace an OSD"},{"location":"Storage-Configuration/Advanced/configuration/","text":"For most any Ceph cluster, the user will want to--and may need to--change some Ceph configurations. These changes often may be warranted in order to alter performance to meet SLAs or to update default data resiliency settings. Warning Modify Ceph settings carefully, and review the Ceph configuration documentation before making any changes. Changing the settings could result in unhealthy daemons or even data loss if used incorrectly. Required configurations \u00b6 Rook and Ceph both strive to make configuration as easy as possible, but there are some configuration options which users are well advised to consider for any production cluster. Default PG and PGP counts \u00b6 The number of PGs and PGPs can be configured on a per-pool basis, but it is advised to set default values that are appropriate for your Ceph cluster. Appropriate values depend on the number of OSDs the user expects to have backing each pool. These can be configured by declaring pg_num and pgp_num parameters under CephBlockPool resource. For determining the right value for pg_num please refer placement group sizing In this example configuration, 128 PGs are applied to the pool: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : ceph-block-pool-test namespace : rook-ceph spec : deviceClass : hdd replicated : size : 3 spec : parameters : pg_num : '128' # create the pool with a pre-configured placement group number pgp_num : '128' # this should at least match `pg_num` so that all PGs are used Ceph OSD and Pool config docs provide detailed information about how to tune these parameters. Nautilus introduced the PG auto-scaler mgr module capable of automatically managing PG and PGP values for pools. Please see Ceph New in Nautilus: PG merging and autotuning for more information about this module. The pg_autoscaler module is enabled by default. To disable this module, in the CephCluster CR : 1 2 3 4 5 spec : mgr : modules : - name : pg_autoscaler enabled : false With that setting, the autoscaler will be enabled for all new pools. If you do not desire to have the autoscaler enabled for all new pools, you will need to use the Rook toolbox to enable the module and enable the autoscaling on individual pools. Specifying configuration options \u00b6 Toolbox + Ceph CLI \u00b6 The most recommended way of configuring Ceph is to set Ceph's configuration directly. The first method for doing so is to use Ceph's CLI from the Rook toolbox pod. Using the toolbox pod is detailed here . From the toolbox, the user can change Ceph configurations, enable manager modules, create users and pools, and much more. Ceph Dashboard \u00b6 The Ceph Dashboard, examined in more detail here , is another way of setting some of Ceph's configuration directly. Configuration by the Ceph dashboard is recommended with the same priority as configuration via the Ceph CLI (above). Advanced configuration via ceph.conf override ConfigMap \u00b6 Setting configs via Ceph's CLI requires that at least one mon be available for the configs to be set, and setting configs via dashboard requires at least one mgr to be available. Ceph may also have a small number of very advanced settings that aren't able to be modified easily via CLI or dashboard. The least recommended method for configuring Ceph is intended as a last-resort fallback in situations like these. This is covered in detail here .","title":"Configuration"},{"location":"Storage-Configuration/Advanced/configuration/#required-configurations","text":"Rook and Ceph both strive to make configuration as easy as possible, but there are some configuration options which users are well advised to consider for any production cluster.","title":"Required configurations"},{"location":"Storage-Configuration/Advanced/configuration/#default-pg-and-pgp-counts","text":"The number of PGs and PGPs can be configured on a per-pool basis, but it is advised to set default values that are appropriate for your Ceph cluster. Appropriate values depend on the number of OSDs the user expects to have backing each pool. These can be configured by declaring pg_num and pgp_num parameters under CephBlockPool resource. For determining the right value for pg_num please refer placement group sizing In this example configuration, 128 PGs are applied to the pool: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : ceph-block-pool-test namespace : rook-ceph spec : deviceClass : hdd replicated : size : 3 spec : parameters : pg_num : '128' # create the pool with a pre-configured placement group number pgp_num : '128' # this should at least match `pg_num` so that all PGs are used Ceph OSD and Pool config docs provide detailed information about how to tune these parameters. Nautilus introduced the PG auto-scaler mgr module capable of automatically managing PG and PGP values for pools. Please see Ceph New in Nautilus: PG merging and autotuning for more information about this module. The pg_autoscaler module is enabled by default. To disable this module, in the CephCluster CR : 1 2 3 4 5 spec : mgr : modules : - name : pg_autoscaler enabled : false With that setting, the autoscaler will be enabled for all new pools. If you do not desire to have the autoscaler enabled for all new pools, you will need to use the Rook toolbox to enable the module and enable the autoscaling on individual pools.","title":"Default PG and PGP counts"},{"location":"Storage-Configuration/Advanced/configuration/#specifying-configuration-options","text":"","title":"Specifying configuration options"},{"location":"Storage-Configuration/Advanced/configuration/#toolbox-ceph-cli","text":"The most recommended way of configuring Ceph is to set Ceph's configuration directly. The first method for doing so is to use Ceph's CLI from the Rook toolbox pod. Using the toolbox pod is detailed here . From the toolbox, the user can change Ceph configurations, enable manager modules, create users and pools, and much more.","title":"Toolbox + Ceph CLI"},{"location":"Storage-Configuration/Advanced/configuration/#ceph-dashboard","text":"The Ceph Dashboard, examined in more detail here , is another way of setting some of Ceph's configuration directly. Configuration by the Ceph dashboard is recommended with the same priority as configuration via the Ceph CLI (above).","title":"Ceph Dashboard"},{"location":"Storage-Configuration/Advanced/configuration/#advanced-configuration-via-cephconf-override-configmap","text":"Setting configs via Ceph's CLI requires that at least one mon be available for the configs to be set, and setting configs via dashboard requires at least one mgr to be available. Ceph may also have a small number of very advanced settings that aren't able to be modified easily via CLI or dashboard. The least recommended method for configuring Ceph is intended as a last-resort fallback in situations like these. This is covered in detail here .","title":"Advanced configuration via ceph.conf override ConfigMap"},{"location":"Storage-Configuration/Advanced/key-management-system/","text":"Rook has the ability to encrypt OSDs of clusters running on PVC via the flag ( encrypted: true ) in your storageClassDeviceSets template . Rook also has the ability to rotate encryption keys of OSDs using a cron job per OSD. By default, the Key Encryption Keys (also known as Data Encryption Keys) are stored in a Kubernetes Secret. However, if a Key Management System exists Rook is capable of using it. The security section contains settings related to encryption of the cluster. security : kms : Key Management System settings connectionDetails : the list of parameters representing kms connection details tokenSecretName : the name of the Kubernetes Secret containing the kms authentication token keyRotation : Key Rotation settings enabled : whether key rotation is enabled or not, default is false schedule : the schedule, written in cron format , with which key rotation CronJob is created, default value is \"@weekly\" . Note Currently key rotation is only supported for the default type, where the Key Encryption Keys are stored in a Kubernetes Secret. Supported KMS providers: Vault Authentication methods Token-based authentication Kubernetes-based authentication General Vault configuration TLS configuration IBM Key Protect Configuration Key Management Interoperability Protocol Configuration Vault \u00b6 Rook supports storing OSD encryption keys in HashiCorp Vault KMS . Authentication methods \u00b6 Rook support two authentication methods: token-based : a token is provided by the user and is stored in a Kubernetes Secret. It's used to authenticate the KMS by the Rook operator. This has several pitfalls such as: when the token expires it must be renewed, so the secret holding it must be updated no token automatic rotation Kubernetes Service Account uses Vault Kubernetes native authentication mechanism and alleviate some of the limitations from the token authentication such as token automatic renewal. This method is generally recommended over the token-based authentication. Token-based authentication \u00b6 When using the token-based authentication, a Kubernetes Secret must be created to hold the token. This is governed by the tokenSecretName parameter. Note: Rook supports all the Vault environment variables . The Kubernetes Secret rook-vault-token should contain: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : rook-vault-token namespace : rook-ceph data : token :  # base64 of a token to connect to Vault, for example: cy5GWXpsbzAyY2duVGVoRjhkWG5Bb3EyWjkK You can create a token in Vault by running the following command: 1 vault token create -policy=rook Refer to the official vault document for more details on how to create a token . For which policy to apply see the next section. In order for Rook to connect to Vault, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 9 10 11 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rook VAULT_SECRET_ENGINE : kv VAULT_AUTH_METHOD : token # name of the k8s secret containing the kms authentication token tokenSecretName : rook-vault-token Kubernetes-based authentication \u00b6 In order to use the Kubernetes Service Account authentication method, the following must be run to properly configure Vault: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 ROOK_NAMESPACE=rook-ceph ROOK_VAULT_SA=rook-vault-auth ROOK_SYSTEM_SA=rook-ceph-system ROOK_OSD_SA=rook-ceph-osd VAULT_POLICY_NAME=rook # create service account for vault to validate API token kubectl -n \"$ROOK_NAMESPACE\" create serviceaccount \"$ROOK_VAULT_SA\" # create the RBAC for this SA kubectl -n \"$ROOK_NAMESPACE\" create clusterrolebinding vault-tokenreview-binding --clusterrole=system:auth-delegator --serviceaccount=\"$ROOK_NAMESPACE\":\"$ROOK_VAULT_SA\" # get the service account common.yaml created earlier VAULT_SA_SECRET_NAME=$(kubectl -n \"$ROOK_NAMESPACE\" get sa \"$ROOK_VAULT_SA\" -o jsonpath=\"{.secrets[*]['name']}\") # Set SA_JWT_TOKEN value to the service account JWT used to access the TokenReview API SA_JWT_TOKEN=$(kubectl -n \"$ROOK_NAMESPACE\" get secret \"$VAULT_SA_SECRET_NAME\" -o jsonpath=\"{.data.token}\" | base64 --decode) # Set SA_CA_CRT to the PEM encoded CA cert used to talk to Kubernetes API SA_CA_CRT=$(kubectl -n \"$ROOK_NAMESPACE\" get secret \"$VAULT_SA_SECRET_NAME\" -o jsonpath=\"{.data['ca\\.crt']}\" | base64 --decode) # get kubernetes endpoint K8S_HOST=$(kubectl config view --minify --flatten -o jsonpath=\"{.clusters[0].cluster.server}\") # enable kubernetes auth vault auth enable kubernetes # To fetch the service account issuer kubectl proxy & proxy_pid=$! # configure the kubernetes auth vault write auth/kubernetes/config \\ token_reviewer_jwt=\"$SA_JWT_TOKEN\" \\ kubernetes_host=\"$K8S_HOST\" \\ kubernetes_ca_cert=\"$SA_CA_CRT\" \\ issuer=\"$(curl --silent http://127.0.0.1:8001/.well-known/openid-configuration | jq -r .issuer)\" kill $proxy_pid # configure a role for rook vault write auth/kubernetes/role/\"$ROOK_NAMESPACE\" \\ bound_service_account_names=\"$ROOK_SYSTEM_SA\",\"$ROOK_OSD_SA\" \\ bound_service_account_namespaces=\"$ROOK_NAMESPACE\" \\ policies=\"$VAULT_POLICY_NAME\" \\ ttl=1440h Once done, your CephCluster CR should look like: 1 2 3 4 5 6 7 8 9 security : kms : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rook VAULT_SECRET_ENGINE : kv VAULT_AUTH_METHOD : kubernetes VAULT_AUTH_KUBERNETES_ROLE : rook-ceph Note The VAULT_ADDR value above assumes that Vault is accessible within the cluster itself on the default port (8200). If running elsewhere, please update the URL accordingly. General Vault configuration \u00b6 As part of the token, here is an example of a policy that can be used: 1 2 3 4 5 6 path \"rook/*\" { capabilities = [ \"create\", \"read\", \"update\", \"delete\", \"list\" ] } path \"sys/mounts\" { capabilities = [ \"read\" ] } You can write the policy like so and then create a token: 1 2 3 4 5 6 7 8 9 10 11 $ vault policy write rook /tmp/rook.hcl $ vault token create -policy = rook Key Value --- ----- token s.FYzlo02cgnTehF8dXnAoq2Z9 token_accessor oMo7sAXQKbYtxU4HtO8k3pko token_duration 768h token_renewable true token_policies [\"default\" \"rook\"] identity_policies [] policies [\"default\" \"rook\"] In the above example, Vault's secret backend path name is rook . It must be enabled with the following: 1 vault secrets enable -path=rook kv If a different path is used, the VAULT_BACKEND_PATH key in connectionDetails must be changed. TLS configuration \u00b6 This is an advanced but recommended configuration for production deployments, in this case the vault-connection-details will look like: 1 2 3 4 5 6 7 8 9 10 11 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_CACERT :  VAULT_CLIENT_CERT :  VAULT_CLIENT_KEY :  # name of the k8s secret containing the kms authentication token tokenSecretName : rook-vault-token Each secret keys are expected to be: VAULT_CACERT: cert VAULT_CLIENT_CERT: cert VAULT_CLIENT_KEY: key For instance VAULT_CACERT Secret named vault-tls-ca-certificate will look like: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : vault-tls-ca-certificate namespace : rook-ceph data : cert :  Note: if you are using self-signed certificates (not known/approved by a proper CA) you must pass VAULT_SKIP_VERIFY: true . Communications will remain encrypted but the validity of the certificate will not be verified. IBM Key Protect \u00b6 Rook supports storing OSD encryption keys in IBM Key Protect . The current implementation stores OSD encryption keys as Standard Keys using the Bring Your Own Key (BYOK) method. This means that the Key Protect instance policy must have Standard Imported Key enabled. Configuration \u00b6 First, you need to provision the Key Protect service on the IBM Cloud. Once completed, retrieve the instance ID . Make a record of it; we need it in the CRD. On the IBM Cloud, the user must create a Service ID, then assign an Access Policy to this service. Ultimately, a Service API Key needs to be generated. All the steps are summarized in the official documentation . The Service ID must be granted access to the Key Protect Service. Once the Service API Key is generated, store it in a Kubernetes Secret. 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : ibm-kp-svc-api-key namespace : rook-ceph data : IBM_KP_SERVICE_API_KEY :  In order for Rook to connect to IBM Key Protect, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : ibmkeyprotect IBM_KP_SERVICE_INSTANCE_ID :  # name of the k8s secret containing the service API Key tokenSecretName : ibm-kp-svc-api-key More options are supported such as: IBM_BASE_URL : the base URL of the Key Protect instance, depending on your region . Defaults to https://us-south.kms.cloud.ibm.com . IBM_TOKEN_URL : the URL of the Key Protect instance to retrieve the token. Defaults to https://iam.cloud.ibm.com/oidc/token . Only needed for private instances. Key Management Interoperability Protocol \u00b6 Rook supports storing OSD encryption keys in Key Management Interoperability Protocol (KMIP) supported KMS. The current implementation stores OSD encryption keys using the Register operation. Key is fetched and deleted using Get and Destroy operations respectively. Configuration \u00b6 The Secret with credentials for the KMIP KMS is expected to contain the following. 1 2 3 4 5 6 7 8 9 apiVersion : v1 kind : Secret metadata : name : kmip-credentials namespace : rook-ceph stringData : CA_CERT :  CLIENT_CERT :  CLIENT_KEY :  In order for Rook to connect to KMIP, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : kmip KMIP_ENDPOINT :  # (optional) The endpoint server name. Useful when the KMIP endpoint does not have a DNS entry. TLS_SERVER_NAME :  # (optional) Network read timeout, in seconds. The default value is 10. READ_TIMEOUT :  # (optional) Network write timeout, in seconds. The default value is 10. WRITE_TIMEOUT :  # name of the k8s secret containing the credentials. tokenSecretName : kmip-credentials","title":"Key Management System"},{"location":"Storage-Configuration/Advanced/key-management-system/#vault","text":"Rook supports storing OSD encryption keys in HashiCorp Vault KMS .","title":"Vault"},{"location":"Storage-Configuration/Advanced/key-management-system/#authentication-methods","text":"Rook support two authentication methods: token-based : a token is provided by the user and is stored in a Kubernetes Secret. It's used to authenticate the KMS by the Rook operator. This has several pitfalls such as: when the token expires it must be renewed, so the secret holding it must be updated no token automatic rotation Kubernetes Service Account uses Vault Kubernetes native authentication mechanism and alleviate some of the limitations from the token authentication such as token automatic renewal. This method is generally recommended over the token-based authentication.","title":"Authentication methods"},{"location":"Storage-Configuration/Advanced/key-management-system/#token-based-authentication","text":"When using the token-based authentication, a Kubernetes Secret must be created to hold the token. This is governed by the tokenSecretName parameter. Note: Rook supports all the Vault environment variables . The Kubernetes Secret rook-vault-token should contain: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : rook-vault-token namespace : rook-ceph data : token :  # base64 of a token to connect to Vault, for example: cy5GWXpsbzAyY2duVGVoRjhkWG5Bb3EyWjkK You can create a token in Vault by running the following command: 1 vault token create -policy=rook Refer to the official vault document for more details on how to create a token . For which policy to apply see the next section. In order for Rook to connect to Vault, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 9 10 11 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rook VAULT_SECRET_ENGINE : kv VAULT_AUTH_METHOD : token # name of the k8s secret containing the kms authentication token tokenSecretName : rook-vault-token","title":"Token-based authentication"},{"location":"Storage-Configuration/Advanced/key-management-system/#kubernetes-based-authentication","text":"In order to use the Kubernetes Service Account authentication method, the following must be run to properly configure Vault: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 ROOK_NAMESPACE=rook-ceph ROOK_VAULT_SA=rook-vault-auth ROOK_SYSTEM_SA=rook-ceph-system ROOK_OSD_SA=rook-ceph-osd VAULT_POLICY_NAME=rook # create service account for vault to validate API token kubectl -n \"$ROOK_NAMESPACE\" create serviceaccount \"$ROOK_VAULT_SA\" # create the RBAC for this SA kubectl -n \"$ROOK_NAMESPACE\" create clusterrolebinding vault-tokenreview-binding --clusterrole=system:auth-delegator --serviceaccount=\"$ROOK_NAMESPACE\":\"$ROOK_VAULT_SA\" # get the service account common.yaml created earlier VAULT_SA_SECRET_NAME=$(kubectl -n \"$ROOK_NAMESPACE\" get sa \"$ROOK_VAULT_SA\" -o jsonpath=\"{.secrets[*]['name']}\") # Set SA_JWT_TOKEN value to the service account JWT used to access the TokenReview API SA_JWT_TOKEN=$(kubectl -n \"$ROOK_NAMESPACE\" get secret \"$VAULT_SA_SECRET_NAME\" -o jsonpath=\"{.data.token}\" | base64 --decode) # Set SA_CA_CRT to the PEM encoded CA cert used to talk to Kubernetes API SA_CA_CRT=$(kubectl -n \"$ROOK_NAMESPACE\" get secret \"$VAULT_SA_SECRET_NAME\" -o jsonpath=\"{.data['ca\\.crt']}\" | base64 --decode) # get kubernetes endpoint K8S_HOST=$(kubectl config view --minify --flatten -o jsonpath=\"{.clusters[0].cluster.server}\") # enable kubernetes auth vault auth enable kubernetes # To fetch the service account issuer kubectl proxy & proxy_pid=$! # configure the kubernetes auth vault write auth/kubernetes/config \\ token_reviewer_jwt=\"$SA_JWT_TOKEN\" \\ kubernetes_host=\"$K8S_HOST\" \\ kubernetes_ca_cert=\"$SA_CA_CRT\" \\ issuer=\"$(curl --silent http://127.0.0.1:8001/.well-known/openid-configuration | jq -r .issuer)\" kill $proxy_pid # configure a role for rook vault write auth/kubernetes/role/\"$ROOK_NAMESPACE\" \\ bound_service_account_names=\"$ROOK_SYSTEM_SA\",\"$ROOK_OSD_SA\" \\ bound_service_account_namespaces=\"$ROOK_NAMESPACE\" \\ policies=\"$VAULT_POLICY_NAME\" \\ ttl=1440h Once done, your CephCluster CR should look like: 1 2 3 4 5 6 7 8 9 security : kms : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rook VAULT_SECRET_ENGINE : kv VAULT_AUTH_METHOD : kubernetes VAULT_AUTH_KUBERNETES_ROLE : rook-ceph Note The VAULT_ADDR value above assumes that Vault is accessible within the cluster itself on the default port (8200). If running elsewhere, please update the URL accordingly.","title":"Kubernetes-based authentication"},{"location":"Storage-Configuration/Advanced/key-management-system/#general-vault-configuration","text":"As part of the token, here is an example of a policy that can be used: 1 2 3 4 5 6 path \"rook/*\" { capabilities = [ \"create\", \"read\", \"update\", \"delete\", \"list\" ] } path \"sys/mounts\" { capabilities = [ \"read\" ] } You can write the policy like so and then create a token: 1 2 3 4 5 6 7 8 9 10 11 $ vault policy write rook /tmp/rook.hcl $ vault token create -policy = rook Key Value --- ----- token s.FYzlo02cgnTehF8dXnAoq2Z9 token_accessor oMo7sAXQKbYtxU4HtO8k3pko token_duration 768h token_renewable true token_policies [\"default\" \"rook\"] identity_policies [] policies [\"default\" \"rook\"] In the above example, Vault's secret backend path name is rook . It must be enabled with the following: 1 vault secrets enable -path=rook kv If a different path is used, the VAULT_BACKEND_PATH key in connectionDetails must be changed.","title":"General Vault configuration"},{"location":"Storage-Configuration/Advanced/key-management-system/#tls-configuration","text":"This is an advanced but recommended configuration for production deployments, in this case the vault-connection-details will look like: 1 2 3 4 5 6 7 8 9 10 11 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_CACERT :  VAULT_CLIENT_CERT :  VAULT_CLIENT_KEY :  # name of the k8s secret containing the kms authentication token tokenSecretName : rook-vault-token Each secret keys are expected to be: VAULT_CACERT: cert VAULT_CLIENT_CERT: cert VAULT_CLIENT_KEY: key For instance VAULT_CACERT Secret named vault-tls-ca-certificate will look like: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : vault-tls-ca-certificate namespace : rook-ceph data : cert :  Note: if you are using self-signed certificates (not known/approved by a proper CA) you must pass VAULT_SKIP_VERIFY: true . Communications will remain encrypted but the validity of the certificate will not be verified.","title":"TLS configuration"},{"location":"Storage-Configuration/Advanced/key-management-system/#ibm-key-protect","text":"Rook supports storing OSD encryption keys in IBM Key Protect . The current implementation stores OSD encryption keys as Standard Keys using the Bring Your Own Key (BYOK) method. This means that the Key Protect instance policy must have Standard Imported Key enabled.","title":"IBM Key Protect"},{"location":"Storage-Configuration/Advanced/key-management-system/#configuration","text":"First, you need to provision the Key Protect service on the IBM Cloud. Once completed, retrieve the instance ID . Make a record of it; we need it in the CRD. On the IBM Cloud, the user must create a Service ID, then assign an Access Policy to this service. Ultimately, a Service API Key needs to be generated. All the steps are summarized in the official documentation . The Service ID must be granted access to the Key Protect Service. Once the Service API Key is generated, store it in a Kubernetes Secret. 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : ibm-kp-svc-api-key namespace : rook-ceph data : IBM_KP_SERVICE_API_KEY :  In order for Rook to connect to IBM Key Protect, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : ibmkeyprotect IBM_KP_SERVICE_INSTANCE_ID :  # name of the k8s secret containing the service API Key tokenSecretName : ibm-kp-svc-api-key More options are supported such as: IBM_BASE_URL : the base URL of the Key Protect instance, depending on your region . Defaults to https://us-south.kms.cloud.ibm.com . IBM_TOKEN_URL : the URL of the Key Protect instance to retrieve the token. Defaults to https://iam.cloud.ibm.com/oidc/token . Only needed for private instances.","title":"Configuration"},{"location":"Storage-Configuration/Advanced/key-management-system/#key-management-interoperability-protocol","text":"Rook supports storing OSD encryption keys in Key Management Interoperability Protocol (KMIP) supported KMS. The current implementation stores OSD encryption keys using the Register operation. Key is fetched and deleted using Get and Destroy operations respectively.","title":"Key Management Interoperability Protocol"},{"location":"Storage-Configuration/Advanced/key-management-system/#configuration_1","text":"The Secret with credentials for the KMIP KMS is expected to contain the following. 1 2 3 4 5 6 7 8 9 apiVersion : v1 kind : Secret metadata : name : kmip-credentials namespace : rook-ceph stringData : CA_CERT :  CLIENT_CERT :  CLIENT_KEY :  In order for Rook to connect to KMIP, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : kmip KMIP_ENDPOINT :  # (optional) The endpoint server name. Useful when the KMIP endpoint does not have a DNS entry. TLS_SERVER_NAME :  # (optional) Network read timeout, in seconds. The default value is 10. READ_TIMEOUT :  # (optional) Network write timeout, in seconds. The default value is 10. WRITE_TIMEOUT :  # name of the k8s secret containing the credentials. tokenSecretName : kmip-credentials","title":"Configuration"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/","text":"Block storage allows a single pod to mount storage. This guide shows how to create a simple, multi-tier web application on Kubernetes using persistent volumes enabled by Rook. Prerequisites \u00b6 This guide assumes a Rook cluster as explained in the Quickstart . Provision Storage \u00b6 Before Rook can provision storage, a StorageClass and CephBlockPool CR need to be created. This will allow Kubernetes to interoperate with Rook when provisioning persistent volumes. Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because the failureDomain is set to host and the replicated.size is set to 3 . Save this StorageClass definition as storageclass.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 --- apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-block # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.rbd.csi.ceph.com parameters : # clusterID is the namespace where the rook cluster is running clusterID : rook-ceph # Ceph pool into which the RBD image shall be created pool : replicapool # (optional) mapOptions is a comma-separated list of map options. # For krbd options refer # https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options # For nbd options refer # https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options # mapOptions: lock_on_read,queue_depth=1024 # (optional) unmapOptions is a comma-separated list of unmap options. # For krbd options refer # https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options # For nbd options refer # https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options # unmapOptions: force # RBD image format. Defaults to \"2\". imageFormat : \"2\" # RBD image features # Available for imageFormat: \"2\". Older releases of CSI RBD # support only the `layering` feature. The Linux kernel (KRBD) supports the # full complement of features as of 5.4 # `layering` alone corresponds to Ceph's bitfield value of \"2\" ; # `layering` + `fast-diff` + `object-map` + `deep-flatten` + `exclusive-lock` together # correspond to Ceph's OR'd bitfield value of \"63\". Here we use # a symbolic, comma-separated format: # For 5.4 or later kernels: #imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock # For 5.3 or earlier kernels: imageFeatures : layering # The secrets contain Ceph admin credentials. csi.storage.k8s.io/provisioner-secret-name : rook-csi-rbd-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-rbd-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-rbd-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph # Specify the filesystem type of the volume. If not specified, csi-provisioner # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock # in hyperconverged settings where the volume is mounted on the same node as the osds. csi.storage.k8s.io/fstype : ext4 # Delete the rbd volume when a PVC is deleted reclaimPolicy : Delete # Optional, if you want to add dynamic resize for PVC. # For now only ext3, ext4, xfs resize support provided, like in Kubernetes itself. allowVolumeExpansion : true If you've deployed the Rook operator in a namespace other than \"rook-ceph\", change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace \"my-namespace\" the provisioner value should be \"my-namespace.rbd.csi.ceph.com\". Create the storage class. 1 kubectl create -f deploy/examples/csi/rbd/storageclass.yaml Note As specified by Kubernetes , when using the Retain reclaim policy, any Ceph RBD image that is backed by a PersistentVolume will continue to exist even after the PersistentVolume has been deleted. These Ceph RBD images will need to be cleaned up manually using rbd rm . Consume the storage: Wordpress sample \u00b6 We create a sample app to consume the block storage provisioned by Rook with the classic wordpress and mysql apps. Both of these apps will make use of block volumes provisioned by Rook. Start mysql and wordpress from the deploy/examples folder: 1 2 kubectl create -f mysql.yaml kubectl create -f wordpress.yaml Both of these apps create a block volume and mount it to their respective pod. You can see the Kubernetes volume claims by running the following: 1 kubectl get pvc Example Output: kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESSMODES AGE mysql-pv-claim Bound pvc-95402dbc-efc0-11e6-bc9a-0cc47a3459ee 20Gi RWO 1m wp-pv-claim Bound pvc-39e43169-efc1-11e6-bc9a-0cc47a3459ee 20Gi RWO 1m Once the wordpress and mysql pods are in the Running state, get the cluster IP of the wordpress app and enter it in your browser: 1 kubectl get svc wordpress Example Output: kubectl get svc wordpress 1 2 NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE wordpress 10.3.0.155  80:30841/TCP 2m You should see the wordpress app running. If you are using Minikube, the Wordpress URL can be retrieved with this one-line command: 1 echo http://$(minikube ip):$(kubectl get service wordpress -o jsonpath='{.spec.ports[0].nodePort}') Note When running in a vagrant environment, there will be no external IP address to reach wordpress with. You will only be able to reach wordpress via the CLUSTER-IP from inside the Kubernetes cluster. Consume the storage: Toolbox \u00b6 With the pool that was created above, we can also create a block image and mount it directly in a pod. See the Direct Block Tools topic for more details. Teardown \u00b6 To clean up all the artifacts created by the block demo: 1 2 3 4 kubectl delete -f wordpress.yaml kubectl delete -f mysql.yaml kubectl delete -n rook-ceph cephblockpools.ceph.rook.io replicapool kubectl delete storageclass rook-ceph-block Advanced Example: Erasure Coded Block Storage \u00b6 If you want to use erasure coded pool with RBD, your OSDs must use bluestore as their storeType . Additionally the nodes that are going to mount the erasure coded RBD block storage must have Linux kernel >= 4.11 . Attention This example requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). To be able to use an erasure coded pool you need to create two pools (as seen below in the definitions): one erasure coded and one replicated. Attention This example requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). Erasure Coded CSI Driver \u00b6 The erasure coded pool must be set as the dataPool parameter in storageclass-ec.yaml It is used for the data of the RBD images. Node Loss \u00b6 If a node goes down where a pod is running where a RBD RWO volume is mounted, the volume cannot automatically be mounted on another node. The node must be guaranteed to be offline before the volume can be mounted on another node. Note These instructions are for clusters with Kubernetes version 1.26 or greater. For K8s 1.25 or older, see the manual steps in the CSI troubleshooting guide to recover from the node loss. Configure CSI-Addons \u00b6 Deploy the csi-addons manifests: 1 2 3 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/crds.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/rbac.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/setup-controller.yaml Enable the csi-addons sidecar in the Rook operator configuration. 1 kubectl patch cm rook-ceph-operator-config -n -p $'data:\\n \"CSI_ENABLE_CSIADDONS\": \"true\"' Handling Node Loss \u00b6 When a node is confirmed to be down, add the following taints to the node: 1 2 kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoExecute kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoSchedule After the taint is added to the node, Rook will automatically blocklist the node to prevent connections to Ceph from the RBD volume on that node. To verify a node is blocklisted: 1 2 3 kubectl get networkfences.csiaddons.openshift.io NAME DRIVER CIDRS FENCESTATE AGE RESULT minikube-m02 rook-ceph.rbd.csi.ceph.com [\"192.168.39.187:0/32\"] Fenced 20s Succeeded The node is blocklisted if the state is Fenced and the result is Succeeded as seen above. Node Recovery \u00b6 If the node comes back online, the network fence can be removed from the node by removing the node taints: 1 2 kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoExecute- kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoSchedule-","title":"Block Storage Overview"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#prerequisites","text":"This guide assumes a Rook cluster as explained in the Quickstart .","title":"Prerequisites"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#provision-storage","text":"Before Rook can provision storage, a StorageClass and CephBlockPool CR need to be created. This will allow Kubernetes to interoperate with Rook when provisioning persistent volumes. Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because the failureDomain is set to host and the replicated.size is set to 3 . Save this StorageClass definition as storageclass.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 --- apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-block # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.rbd.csi.ceph.com parameters : # clusterID is the namespace where the rook cluster is running clusterID : rook-ceph # Ceph pool into which the RBD image shall be created pool : replicapool # (optional) mapOptions is a comma-separated list of map options. # For krbd options refer # https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options # For nbd options refer # https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options # mapOptions: lock_on_read,queue_depth=1024 # (optional) unmapOptions is a comma-separated list of unmap options. # For krbd options refer # https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options # For nbd options refer # https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options # unmapOptions: force # RBD image format. Defaults to \"2\". imageFormat : \"2\" # RBD image features # Available for imageFormat: \"2\". Older releases of CSI RBD # support only the `layering` feature. The Linux kernel (KRBD) supports the # full complement of features as of 5.4 # `layering` alone corresponds to Ceph's bitfield value of \"2\" ; # `layering` + `fast-diff` + `object-map` + `deep-flatten` + `exclusive-lock` together # correspond to Ceph's OR'd bitfield value of \"63\". Here we use # a symbolic, comma-separated format: # For 5.4 or later kernels: #imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock # For 5.3 or earlier kernels: imageFeatures : layering # The secrets contain Ceph admin credentials. csi.storage.k8s.io/provisioner-secret-name : rook-csi-rbd-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-rbd-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-rbd-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph # Specify the filesystem type of the volume. If not specified, csi-provisioner # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock # in hyperconverged settings where the volume is mounted on the same node as the osds. csi.storage.k8s.io/fstype : ext4 # Delete the rbd volume when a PVC is deleted reclaimPolicy : Delete # Optional, if you want to add dynamic resize for PVC. # For now only ext3, ext4, xfs resize support provided, like in Kubernetes itself. allowVolumeExpansion : true If you've deployed the Rook operator in a namespace other than \"rook-ceph\", change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace \"my-namespace\" the provisioner value should be \"my-namespace.rbd.csi.ceph.com\". Create the storage class. 1 kubectl create -f deploy/examples/csi/rbd/storageclass.yaml Note As specified by Kubernetes , when using the Retain reclaim policy, any Ceph RBD image that is backed by a PersistentVolume will continue to exist even after the PersistentVolume has been deleted. These Ceph RBD images will need to be cleaned up manually using rbd rm .","title":"Provision Storage"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#consume-the-storage-wordpress-sample","text":"We create a sample app to consume the block storage provisioned by Rook with the classic wordpress and mysql apps. Both of these apps will make use of block volumes provisioned by Rook. Start mysql and wordpress from the deploy/examples folder: 1 2 kubectl create -f mysql.yaml kubectl create -f wordpress.yaml Both of these apps create a block volume and mount it to their respective pod. You can see the Kubernetes volume claims by running the following: 1 kubectl get pvc Example Output: kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESSMODES AGE mysql-pv-claim Bound pvc-95402dbc-efc0-11e6-bc9a-0cc47a3459ee 20Gi RWO 1m wp-pv-claim Bound pvc-39e43169-efc1-11e6-bc9a-0cc47a3459ee 20Gi RWO 1m Once the wordpress and mysql pods are in the Running state, get the cluster IP of the wordpress app and enter it in your browser: 1 kubectl get svc wordpress Example Output: kubectl get svc wordpress 1 2 NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE wordpress 10.3.0.155  80:30841/TCP 2m You should see the wordpress app running. If you are using Minikube, the Wordpress URL can be retrieved with this one-line command: 1 echo http://$(minikube ip):$(kubectl get service wordpress -o jsonpath='{.spec.ports[0].nodePort}') Note When running in a vagrant environment, there will be no external IP address to reach wordpress with. You will only be able to reach wordpress via the CLUSTER-IP from inside the Kubernetes cluster.","title":"Consume the storage: Wordpress sample"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#consume-the-storage-toolbox","text":"With the pool that was created above, we can also create a block image and mount it directly in a pod. See the Direct Block Tools topic for more details.","title":"Consume the storage: Toolbox"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#teardown","text":"To clean up all the artifacts created by the block demo: 1 2 3 4 kubectl delete -f wordpress.yaml kubectl delete -f mysql.yaml kubectl delete -n rook-ceph cephblockpools.ceph.rook.io replicapool kubectl delete storageclass rook-ceph-block","title":"Teardown"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#advanced-example-erasure-coded-block-storage","text":"If you want to use erasure coded pool with RBD, your OSDs must use bluestore as their storeType . Additionally the nodes that are going to mount the erasure coded RBD block storage must have Linux kernel >= 4.11 . Attention This example requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). To be able to use an erasure coded pool you need to create two pools (as seen below in the definitions): one erasure coded and one replicated. Attention This example requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ).","title":"Advanced Example: Erasure Coded Block Storage"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#erasure-coded-csi-driver","text":"The erasure coded pool must be set as the dataPool parameter in storageclass-ec.yaml It is used for the data of the RBD images.","title":"Erasure Coded CSI Driver"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#node-loss","text":"If a node goes down where a pod is running where a RBD RWO volume is mounted, the volume cannot automatically be mounted on another node. The node must be guaranteed to be offline before the volume can be mounted on another node. Note These instructions are for clusters with Kubernetes version 1.26 or greater. For K8s 1.25 or older, see the manual steps in the CSI troubleshooting guide to recover from the node loss.","title":"Node Loss"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#configure-csi-addons","text":"Deploy the csi-addons manifests: 1 2 3 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/crds.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/rbac.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/setup-controller.yaml Enable the csi-addons sidecar in the Rook operator configuration. 1 kubectl patch cm rook-ceph-operator-config -n -p $'data:\\n \"CSI_ENABLE_CSIADDONS\": \"true\"'","title":"Configure CSI-Addons"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#handling-node-loss","text":"When a node is confirmed to be down, add the following taints to the node: 1 2 kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoExecute kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoSchedule After the taint is added to the node, Rook will automatically blocklist the node to prevent connections to Ceph from the RBD volume on that node. To verify a node is blocklisted: 1 2 3 kubectl get networkfences.csiaddons.openshift.io NAME DRIVER CIDRS FENCESTATE AGE RESULT minikube-m02 rook-ceph.rbd.csi.ceph.com [\"192.168.39.187:0/32\"] Fenced 20s Succeeded The node is blocklisted if the state is Fenced and the result is Succeeded as seen above.","title":"Handling Node Loss"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#node-recovery","text":"If the node comes back online, the network fence can be removed from the node by removing the node taints: 1 2 kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoExecute- kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoSchedule-","title":"Node Recovery"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/","text":"Planned Migration and Disaster Recovery \u00b6 Rook comes with the volume replication support, which allows users to perform disaster recovery and planned migration of clusters. The following document will help to track the procedure for failover and failback in case of a Disaster recovery or Planned migration use cases. Note The document assumes that RBD Mirroring is set up between the peer clusters. For information on rbd mirroring and how to set it up using rook, please refer to the rbd-mirroring guide . Planned Migration \u00b6 Info Use cases: Datacenter maintenance, technology refresh, disaster avoidance, etc. Relocation \u00b6 The Relocation operation is the process of switching production to a backup facility(normally your recovery site) or vice versa. For relocation, access to the image on the primary site should be stopped. The image should now be made primary on the secondary cluster so that the access can be resumed there. Note Periodic or one-time backup of the application should be available for restore on the secondary site (cluster-2). Follow the below steps for planned migration of workload from the primary cluster to the secondary cluster: Scale down all the application pods which are using the mirrored PVC on the Primary Cluster. Take a backup of PVC and PV object from the primary cluster. This can be done using some backup tools like velero . Update VolumeReplication CR to set replicationState to secondary at the Primary Site. When the operator sees this change, it will pass the information down to the driver via GRPC request to mark the dataSource as secondary . If you are manually recreating the PVC and PV on the secondary cluster, remove the claimRef section in the PV objects. (See this for details) Recreate the storageclass, PVC, and PV objects on the secondary site. As you are creating the static binding between PVC and PV, a new PV won\u2019t be created here, the PVC will get bind to the existing PV. Create the VolumeReplicationClass on the secondary site. Create VolumeReplications for all the PVC\u2019s for which mirroring is enabled replicationState should be primary for all the PVC\u2019s on the secondary site. Check VolumeReplication CR status to verify if the image is marked primary on the secondary site. Once the Image is marked as primary , the PVC is now ready to be used. Now, we can scale up the applications to use the PVC. Warning In Async Disaster recovery use case, we don't get the complete data. We will only get the crash-consistent data based on the snapshot interval time. Disaster Recovery \u00b6 Info Use cases: Natural disasters, Power failures, System failures, and crashes, etc. Note To effectively resume operations after a failover/relocation, backup of the kubernetes artifacts like deployment, PVC, PV, etc need to be created beforehand by the admin; so that the application can be restored on the peer cluster. For more information, see backup and restore . Failover (abrupt shutdown) \u00b6 In case of Disaster recovery, create VolumeReplication CR at the Secondary Site. Since the connection to the Primary Site is lost, the operator automatically sends a GRPC request down to the driver to forcefully mark the dataSource as primary on the Secondary Site. If you are manually creating the PVC and PV on the secondary cluster, remove the claimRef section in the PV objects. (See this for details) Create the storageclass, PVC, and PV objects on the secondary site. As you are creating the static binding between PVC and PV, a new PV won\u2019t be created here, the PVC will get bind to the existing PV. Create the VolumeReplicationClass and VolumeReplication CR on the secondary site. Check VolumeReplication CR status to verify if the image is marked primary on the secondary site. Once the Image is marked as primary , the PVC is now ready to be used. Now, we can scale up the applications to use the PVC. Failback (post-disaster recovery) \u00b6 Once the failed cluster is recovered on the primary site and you want to failback from secondary site, follow the below steps: Scale down the running applications (if any) on the primary site. Ensure that all persistent volumes in use by the workload are no longer in use on the primary cluster. Update VolumeReplication CR replicationState from primary to secondary on the primary site. Scale down the applications on the secondary site. Update VolumeReplication CR replicationState state from primary to secondary in secondary site. On the primary site, verify the VolumeReplication status is marked as volume ready to use. Once the volume is marked to ready to use, change the replicationState state from secondary to primary in primary site. Scale up the applications again on the primary site.","title":"RBD Asynchronous DR Failover and Failback"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#planned-migration-and-disaster-recovery","text":"Rook comes with the volume replication support, which allows users to perform disaster recovery and planned migration of clusters. The following document will help to track the procedure for failover and failback in case of a Disaster recovery or Planned migration use cases. Note The document assumes that RBD Mirroring is set up between the peer clusters. For information on rbd mirroring and how to set it up using rook, please refer to the rbd-mirroring guide .","title":"Planned Migration and Disaster Recovery"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#planned-migration","text":"Info Use cases: Datacenter maintenance, technology refresh, disaster avoidance, etc.","title":"Planned Migration"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#relocation","text":"The Relocation operation is the process of switching production to a backup facility(normally your recovery site) or vice versa. For relocation, access to the image on the primary site should be stopped. The image should now be made primary on the secondary cluster so that the access can be resumed there. Note Periodic or one-time backup of the application should be available for restore on the secondary site (cluster-2). Follow the below steps for planned migration of workload from the primary cluster to the secondary cluster: Scale down all the application pods which are using the mirrored PVC on the Primary Cluster. Take a backup of PVC and PV object from the primary cluster. This can be done using some backup tools like velero . Update VolumeReplication CR to set replicationState to secondary at the Primary Site. When the operator sees this change, it will pass the information down to the driver via GRPC request to mark the dataSource as secondary . If you are manually recreating the PVC and PV on the secondary cluster, remove the claimRef section in the PV objects. (See this for details) Recreate the storageclass, PVC, and PV objects on the secondary site. As you are creating the static binding between PVC and PV, a new PV won\u2019t be created here, the PVC will get bind to the existing PV. Create the VolumeReplicationClass on the secondary site. Create VolumeReplications for all the PVC\u2019s for which mirroring is enabled replicationState should be primary for all the PVC\u2019s on the secondary site. Check VolumeReplication CR status to verify if the image is marked primary on the secondary site. Once the Image is marked as primary , the PVC is now ready to be used. Now, we can scale up the applications to use the PVC. Warning In Async Disaster recovery use case, we don't get the complete data. We will only get the crash-consistent data based on the snapshot interval time.","title":"Relocation"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#disaster-recovery","text":"Info Use cases: Natural disasters, Power failures, System failures, and crashes, etc. Note To effectively resume operations after a failover/relocation, backup of the kubernetes artifacts like deployment, PVC, PV, etc need to be created beforehand by the admin; so that the application can be restored on the peer cluster. For more information, see backup and restore .","title":"Disaster Recovery"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#failover-abrupt-shutdown","text":"In case of Disaster recovery, create VolumeReplication CR at the Secondary Site. Since the connection to the Primary Site is lost, the operator automatically sends a GRPC request down to the driver to forcefully mark the dataSource as primary on the Secondary Site. If you are manually creating the PVC and PV on the secondary cluster, remove the claimRef section in the PV objects. (See this for details) Create the storageclass, PVC, and PV objects on the secondary site. As you are creating the static binding between PVC and PV, a new PV won\u2019t be created here, the PVC will get bind to the existing PV. Create the VolumeReplicationClass and VolumeReplication CR on the secondary site. Check VolumeReplication CR status to verify if the image is marked primary on the secondary site. Once the Image is marked as primary , the PVC is now ready to be used. Now, we can scale up the applications to use the PVC.","title":"Failover (abrupt shutdown)"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#failback-post-disaster-recovery","text":"Once the failed cluster is recovered on the primary site and you want to failback from secondary site, follow the below steps: Scale down the running applications (if any) on the primary site. Ensure that all persistent volumes in use by the workload are no longer in use on the primary cluster. Update VolumeReplication CR replicationState from primary to secondary on the primary site. Scale down the applications on the secondary site. Update VolumeReplication CR replicationState state from primary to secondary in secondary site. On the primary site, verify the VolumeReplication status is marked as volume ready to use. Once the volume is marked to ready to use, change the replicationState state from secondary to primary in primary site. Scale up the applications again on the primary site.","title":"Failback (post-disaster recovery)"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/","text":"Disaster Recovery \u00b6 Disaster recovery (DR) is an organization's ability to react to and recover from an incident that negatively affects business operations. This plan comprises strategies for minimizing the consequences of a disaster, so an organization can continue to operate \u2013 or quickly resume the key operations. Thus, disaster recovery is one of the aspects of business continuity . One of the solutions, to achieve the same, is RBD mirroring . RBD Mirroring \u00b6 RBD mirroring is an asynchronous replication of RBD images between multiple Ceph clusters. This capability is available in two modes: Journal-based: Every write to the RBD image is first recorded to the associated journal before modifying the actual image. The remote cluster will read from this associated journal and replay the updates to its local image. Snapshot-based: This mode uses periodically scheduled or manually created RBD image mirror-snapshots to replicate crash-consistent RBD images between clusters. Note This document sheds light on rbd mirroring and how to set it up using rook. See also the topic on Failover and Failback Create RBD Pools \u00b6 In this section, we create specific RBD pools that are RBD mirroring enabled for use with the DR use case. Execute the following steps on each peer cluster to create mirror enabled pools: Create a RBD pool that is enabled for mirroring by adding the section spec.mirroring in the CephBlockPool CR: 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : mirrored-pool namespace : rook-ceph spec : replicated : size : 1 mirroring : enabled : true mode : image 1 kubectl create -f pool-mirrored.yaml Repeat the steps on the peer cluster. Note Pool name across the cluster peers must be the same for RBD replication to function. See the CephBlockPool documentation for more details. Note It is also feasible to edit existing pools and enable them for replication. Bootstrap Peers \u00b6 In order for the rbd-mirror daemon to discover its peer cluster, the peer must be registered and a user account must be created. The following steps enable bootstrapping peers to discover and authenticate to each other: For Bootstrapping a peer cluster its bootstrap secret is required. To determine the name of the secret that contains the bootstrap secret execute the following command on the remote cluster (cluster-2) 1 [cluster-2]$ kubectl get cephblockpool.ceph.rook.io/mirrored-pool -n rook-ceph -ojsonpath = '{.status.info.rbdMirrorBootstrapPeerSecretName}' Here, pool-peer-token-mirrored-pool is the desired bootstrap secret name. The secret pool-peer-token-mirrored-pool contains all the information related to the token and needs to be injected to the peer, to fetch the decoded secret: 1 2 [cluster-2]$ kubectl get secret -n rook-ceph pool-peer-token-mirrored-pool -o jsonpath = '{.data.token}' | base64 -d eyJmc2lkIjoiNGQ1YmNiNDAtNDY3YS00OWVkLThjMGEtOWVhOGJkNDY2OTE3IiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFDZ3hmZGdxN013R0JBQWZzcUtCaGpZVjJUZDRxVzJYQm5kemc9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMzkuMzY6MzMwMCx2MToxOTIuMTY4LjM5LjM2OjY3ODldIn0= With this Decoded value, create a secret on the primary site (cluster-1): 1 [cluster-1]$ kubectl -n rook-ceph create secret generic rbd-primary-site-secret --from-literal = token = eyJmc2lkIjoiNGQ1YmNiNDAtNDY3YS00OWVkLThjMGEtOWVhOGJkNDY2OTE3IiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFDZ3hmZGdxN013R0JBQWZzcUtCaGpZVjJUZDRxVzJYQm5kemc9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMzkuMzY6MzMwMCx2MToxOTIuMTY4LjM5LjM2OjY3ODldIn0 = --from-literal = pool = mirrored-pool This completes the bootstrap process for cluster-1 to be peered with cluster-2. Repeat the process switching cluster-2 in place of cluster-1, to complete the bootstrap process across both peer clusters. For more details, refer to the official rbd mirror documentation on how to create a bootstrap peer . Configure the RBDMirror Daemon \u00b6 Replication is handled by the rbd-mirror daemon. The rbd-mirror daemon is responsible for pulling image updates from the remote, peer cluster, and applying them to image within the local cluster. Creation of the rbd-mirror daemon(s) is done through the custom resource definitions (CRDs), as follows: Create mirror.yaml, to deploy the rbd-mirror daemon 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephRBDMirror metadata : name : my-rbd-mirror namespace : rook-ceph spec : # the number of rbd-mirror daemons to deploy count : 1 Create the RBD mirror daemon 1 [cluster-1]$ kubectl create -f mirror.yaml -n rook-ceph Validate if rbd-mirror daemon pod is now up 1 2 [cluster-1]$ kubectl get pods -n rook-ceph rook-ceph-rbd-mirror-a-6985b47c8c-dpv4k 1/1 Running 0 10s Verify that daemon health is OK 1 2 kubectl get cephblockpools.ceph.rook.io mirrored-pool -n rook-ceph -o jsonpath='{.status.mirroringStatus.summary}' {\"daemon_health\":\"OK\",\"health\":\"OK\",\"image_health\":\"OK\",\"states\":{\"replaying\":1}} Repeat the above steps on the peer cluster. See the CephRBDMirror CRD for more details on the mirroring settings. Add mirroring peer information to RBD pools \u00b6 Each pool can have its own peer. To add the peer information, patch the already created mirroring enabled pool to update the CephBlockPool CRD. 1 [cluster-1]$ kubectl -n rook-ceph patch cephblockpool mirrored-pool --type merge -p '{\"spec\":{\"mirroring\":{\"peers\": {\"secretNames\": [\"rbd-primary-site-secret\"]}}}}' Create VolumeReplication CRDs \u00b6 Volume Replication Operator follows controller pattern and provides extended APIs for storage disaster recovery. The extended APIs are provided via Custom Resource Definition(CRD). Create the VolumeReplication CRDs on all the peer clusters. 1 2 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.5.0/config/crd/bases/replication.storage.openshift.io_volumereplicationclasses.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.5.0/config/crd/bases/replication.storage.openshift.io_volumereplications.yaml Enable CSI Replication Sidecars \u00b6 To achieve RBD Mirroring, csi-omap-generator and csi-addons containers need to be deployed in the RBD provisioner pods, which are not enabled by default. Omap Generator : Omap generator is a sidecar container that when deployed with the CSI provisioner pod, generates the internal CSI omaps between the PV and the RBD image. This is required as static PVs are transferred across peer clusters in the DR use case, and hence is needed to preserve PVC to storage mappings. Volume Replication Operator : Volume Replication Operator is a kubernetes operator that provides common and reusable APIs for storage disaster recovery. The volume replication operation is supported by the CSIAddons It is based on csi-addons/spec specification and can be used by any storage provider. Execute the following steps on each peer cluster to enable the OMap generator and CSIADDONS sidecars: Edit the rook-ceph-operator-config configmap and add the following configurations 1 kubectl edit cm rook-ceph-operator-config -n rook-ceph Add the following properties if not present: 1 2 3 data : CSI_ENABLE_OMAP_GENERATOR : \"true\" CSI_ENABLE_CSIADDONS : \"true\" After updating the configmap with those settings, two new sidecars should now start automatically in the CSI provisioner pod. Repeat the steps on the peer cluster. Volume Replication Custom Resources \u00b6 VolumeReplication CRDs provide support for two custom resources: VolumeReplicationClass : VolumeReplicationClass is a cluster scoped resource that contains driver related configuration parameters. It holds the storage admin information required for the volume replication operator. VolumeReplication : VolumeReplication is a namespaced resource that contains references to storage object to be replicated and VolumeReplicationClass corresponding to the driver providing replication. Enable mirroring on a PVC \u00b6 Below guide assumes that we have a PVC (rbd-pvc) in BOUND state; created using StorageClass with Retain reclaimPolicy. 1 2 3 [cluster-1]$ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec 1Gi RWO csi-rbd-sc 44s Create a Volume Replication Class CR \u00b6 In this case, we create a Volume Replication Class on cluster-1 1 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication-class.yaml Note The schedulingInterval can be specified in formats of minutes, hours or days using suffix m , h and d respectively. The optional schedulingStartTime can be specified using the ISO 8601 time format. Create a VolumeReplication CR \u00b6 Once VolumeReplicationClass is created, create a Volume Replication for the PVC which we intend to replicate to secondary cluster. 1 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication.yaml Note VolumeReplication is a namespace scoped object. Thus, it should be created in the same namespace as of PVC. Checking Replication Status \u00b6 replicationState is the state of the volume being referenced. Possible values are primary, secondary, and resync. primary denotes that the volume is primary. secondary denotes that the volume is secondary. resync denotes that the volume needs to be resynced. To check VolumeReplication CR status: 1 [cluster-1]$ kubectl get volumereplication pvc-volumereplication -oyaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 ... spec : dataSource : apiGroup : \"\" kind : PersistentVolumeClaim name : rbd-pvc replicationState : primary volumeReplicationClass : rbd-volumereplicationclass status : conditions : - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : Promoted status : \"True\" type : Completed - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : Healthy status : \"False\" type : Degraded - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : NotResyncing status : \"False\" type : Resyncing lastCompletionTime : \"2021-05-04T07:39:00Z\" lastStartTime : \"2021-05-04T07:38:59Z\" message : volume is marked primary observedGeneration : 1 state : Primary Backup & Restore \u00b6 Note To effectively resume operations after a failover/relocation, backup of the kubernetes artifacts like deployment, PVC, PV, etc need to be created beforehand by the admin; so that the application can be restored on the peer cluster. Here, we take a backup of PVC and PV object on one site, so that they can be restored later to the peer cluster. Take backup on cluster-1 \u00b6 Take backup of the PVC rbd-pvc 1 [cluster-1]$ kubectl get pvc rbd-pvc -oyaml > pvc-backup.yaml Take a backup of the PV, corresponding to the PVC 1 [cluster-1]$ kubectl get pv/pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec -oyaml > pv_backup.yaml Note We can also take backup using external tools like Velero . See velero documentation for more information. Restore the backup on cluster-2 \u00b6 Create storageclass on the secondary cluster 1 [cluster-2]$ kubectl create -f deploy/examples/csi/rbd/storageclass.yaml Create VolumeReplicationClass on the secondary cluster 1 2 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication-class.yaml volumereplicationclass.replication.storage.openshift.io/rbd-volumereplicationclass created If Persistent Volumes and Claims are created manually on the secondary cluster, remove the claimRef on the backed up PV objects in yaml files; so that the PV can get bound to the new claim on the secondary cluster. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 ... spec : accessModes : - ReadWriteOnce capacity : storage : 1Gi claimRef : apiVersion : v1 kind : PersistentVolumeClaim name : rbd-pvc namespace : default resourceVersion : \"64252\" uid : 65dc0aac-5e15-4474-90f4-7a3532c621ec csi : ... Apply the Persistent Volume backup from the primary cluster 1 [cluster-2]$ kubectl create -f pv-backup.yaml Apply the Persistent Volume claim from the restored backup 1 [cluster-2]$ kubectl create -f pvc-backup.yaml 1 2 3 [cluster-2]$ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec 1Gi RWO rook-ceph-block 44s","title":"RBD Mirroring"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#disaster-recovery","text":"Disaster recovery (DR) is an organization's ability to react to and recover from an incident that negatively affects business operations. This plan comprises strategies for minimizing the consequences of a disaster, so an organization can continue to operate \u2013 or quickly resume the key operations. Thus, disaster recovery is one of the aspects of business continuity . One of the solutions, to achieve the same, is RBD mirroring .","title":"Disaster Recovery"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#rbd-mirroring","text":"RBD mirroring is an asynchronous replication of RBD images between multiple Ceph clusters. This capability is available in two modes: Journal-based: Every write to the RBD image is first recorded to the associated journal before modifying the actual image. The remote cluster will read from this associated journal and replay the updates to its local image. Snapshot-based: This mode uses periodically scheduled or manually created RBD image mirror-snapshots to replicate crash-consistent RBD images between clusters. Note This document sheds light on rbd mirroring and how to set it up using rook. See also the topic on Failover and Failback","title":"RBD Mirroring"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#create-rbd-pools","text":"In this section, we create specific RBD pools that are RBD mirroring enabled for use with the DR use case. Execute the following steps on each peer cluster to create mirror enabled pools: Create a RBD pool that is enabled for mirroring by adding the section spec.mirroring in the CephBlockPool CR: 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : mirrored-pool namespace : rook-ceph spec : replicated : size : 1 mirroring : enabled : true mode : image 1 kubectl create -f pool-mirrored.yaml Repeat the steps on the peer cluster. Note Pool name across the cluster peers must be the same for RBD replication to function. See the CephBlockPool documentation for more details. Note It is also feasible to edit existing pools and enable them for replication.","title":"Create RBD Pools"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#bootstrap-peers","text":"In order for the rbd-mirror daemon to discover its peer cluster, the peer must be registered and a user account must be created. The following steps enable bootstrapping peers to discover and authenticate to each other: For Bootstrapping a peer cluster its bootstrap secret is required. To determine the name of the secret that contains the bootstrap secret execute the following command on the remote cluster (cluster-2) 1 [cluster-2]$ kubectl get cephblockpool.ceph.rook.io/mirrored-pool -n rook-ceph -ojsonpath = '{.status.info.rbdMirrorBootstrapPeerSecretName}' Here, pool-peer-token-mirrored-pool is the desired bootstrap secret name. The secret pool-peer-token-mirrored-pool contains all the information related to the token and needs to be injected to the peer, to fetch the decoded secret: 1 2 [cluster-2]$ kubectl get secret -n rook-ceph pool-peer-token-mirrored-pool -o jsonpath = '{.data.token}' | base64 -d eyJmc2lkIjoiNGQ1YmNiNDAtNDY3YS00OWVkLThjMGEtOWVhOGJkNDY2OTE3IiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFDZ3hmZGdxN013R0JBQWZzcUtCaGpZVjJUZDRxVzJYQm5kemc9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMzkuMzY6MzMwMCx2MToxOTIuMTY4LjM5LjM2OjY3ODldIn0= With this Decoded value, create a secret on the primary site (cluster-1): 1 [cluster-1]$ kubectl -n rook-ceph create secret generic rbd-primary-site-secret --from-literal = token = eyJmc2lkIjoiNGQ1YmNiNDAtNDY3YS00OWVkLThjMGEtOWVhOGJkNDY2OTE3IiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFDZ3hmZGdxN013R0JBQWZzcUtCaGpZVjJUZDRxVzJYQm5kemc9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMzkuMzY6MzMwMCx2MToxOTIuMTY4LjM5LjM2OjY3ODldIn0 = --from-literal = pool = mirrored-pool This completes the bootstrap process for cluster-1 to be peered with cluster-2. Repeat the process switching cluster-2 in place of cluster-1, to complete the bootstrap process across both peer clusters. For more details, refer to the official rbd mirror documentation on how to create a bootstrap peer .","title":"Bootstrap Peers"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#configure-the-rbdmirror-daemon","text":"Replication is handled by the rbd-mirror daemon. The rbd-mirror daemon is responsible for pulling image updates from the remote, peer cluster, and applying them to image within the local cluster. Creation of the rbd-mirror daemon(s) is done through the custom resource definitions (CRDs), as follows: Create mirror.yaml, to deploy the rbd-mirror daemon 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephRBDMirror metadata : name : my-rbd-mirror namespace : rook-ceph spec : # the number of rbd-mirror daemons to deploy count : 1 Create the RBD mirror daemon 1 [cluster-1]$ kubectl create -f mirror.yaml -n rook-ceph Validate if rbd-mirror daemon pod is now up 1 2 [cluster-1]$ kubectl get pods -n rook-ceph rook-ceph-rbd-mirror-a-6985b47c8c-dpv4k 1/1 Running 0 10s Verify that daemon health is OK 1 2 kubectl get cephblockpools.ceph.rook.io mirrored-pool -n rook-ceph -o jsonpath='{.status.mirroringStatus.summary}' {\"daemon_health\":\"OK\",\"health\":\"OK\",\"image_health\":\"OK\",\"states\":{\"replaying\":1}} Repeat the above steps on the peer cluster. See the CephRBDMirror CRD for more details on the mirroring settings.","title":"Configure the RBDMirror Daemon"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#add-mirroring-peer-information-to-rbd-pools","text":"Each pool can have its own peer. To add the peer information, patch the already created mirroring enabled pool to update the CephBlockPool CRD. 1 [cluster-1]$ kubectl -n rook-ceph patch cephblockpool mirrored-pool --type merge -p '{\"spec\":{\"mirroring\":{\"peers\": {\"secretNames\": [\"rbd-primary-site-secret\"]}}}}'","title":"Add mirroring peer information to RBD pools"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#create-volumereplication-crds","text":"Volume Replication Operator follows controller pattern and provides extended APIs for storage disaster recovery. The extended APIs are provided via Custom Resource Definition(CRD). Create the VolumeReplication CRDs on all the peer clusters. 1 2 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.5.0/config/crd/bases/replication.storage.openshift.io_volumereplicationclasses.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.5.0/config/crd/bases/replication.storage.openshift.io_volumereplications.yaml","title":"Create VolumeReplication CRDs"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#enable-csi-replication-sidecars","text":"To achieve RBD Mirroring, csi-omap-generator and csi-addons containers need to be deployed in the RBD provisioner pods, which are not enabled by default. Omap Generator : Omap generator is a sidecar container that when deployed with the CSI provisioner pod, generates the internal CSI omaps between the PV and the RBD image. This is required as static PVs are transferred across peer clusters in the DR use case, and hence is needed to preserve PVC to storage mappings. Volume Replication Operator : Volume Replication Operator is a kubernetes operator that provides common and reusable APIs for storage disaster recovery. The volume replication operation is supported by the CSIAddons It is based on csi-addons/spec specification and can be used by any storage provider. Execute the following steps on each peer cluster to enable the OMap generator and CSIADDONS sidecars: Edit the rook-ceph-operator-config configmap and add the following configurations 1 kubectl edit cm rook-ceph-operator-config -n rook-ceph Add the following properties if not present: 1 2 3 data : CSI_ENABLE_OMAP_GENERATOR : \"true\" CSI_ENABLE_CSIADDONS : \"true\" After updating the configmap with those settings, two new sidecars should now start automatically in the CSI provisioner pod. Repeat the steps on the peer cluster.","title":"Enable CSI Replication Sidecars"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#volume-replication-custom-resources","text":"VolumeReplication CRDs provide support for two custom resources: VolumeReplicationClass : VolumeReplicationClass is a cluster scoped resource that contains driver related configuration parameters. It holds the storage admin information required for the volume replication operator. VolumeReplication : VolumeReplication is a namespaced resource that contains references to storage object to be replicated and VolumeReplicationClass corresponding to the driver providing replication.","title":"Volume Replication Custom Resources"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#enable-mirroring-on-a-pvc","text":"Below guide assumes that we have a PVC (rbd-pvc) in BOUND state; created using StorageClass with Retain reclaimPolicy. 1 2 3 [cluster-1]$ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec 1Gi RWO csi-rbd-sc 44s","title":"Enable mirroring on a PVC"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#create-a-volume-replication-class-cr","text":"In this case, we create a Volume Replication Class on cluster-1 1 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication-class.yaml Note The schedulingInterval can be specified in formats of minutes, hours or days using suffix m , h and d respectively. The optional schedulingStartTime can be specified using the ISO 8601 time format.","title":"Create a Volume Replication Class CR"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#create-a-volumereplication-cr","text":"Once VolumeReplicationClass is created, create a Volume Replication for the PVC which we intend to replicate to secondary cluster. 1 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication.yaml Note VolumeReplication is a namespace scoped object. Thus, it should be created in the same namespace as of PVC.","title":"Create a VolumeReplication CR"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#checking-replication-status","text":"replicationState is the state of the volume being referenced. Possible values are primary, secondary, and resync. primary denotes that the volume is primary. secondary denotes that the volume is secondary. resync denotes that the volume needs to be resynced. To check VolumeReplication CR status: 1 [cluster-1]$ kubectl get volumereplication pvc-volumereplication -oyaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 ... spec : dataSource : apiGroup : \"\" kind : PersistentVolumeClaim name : rbd-pvc replicationState : primary volumeReplicationClass : rbd-volumereplicationclass status : conditions : - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : Promoted status : \"True\" type : Completed - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : Healthy status : \"False\" type : Degraded - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : NotResyncing status : \"False\" type : Resyncing lastCompletionTime : \"2021-05-04T07:39:00Z\" lastStartTime : \"2021-05-04T07:38:59Z\" message : volume is marked primary observedGeneration : 1 state : Primary","title":"Checking Replication Status"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#backup-restore","text":"Note To effectively resume operations after a failover/relocation, backup of the kubernetes artifacts like deployment, PVC, PV, etc need to be created beforehand by the admin; so that the application can be restored on the peer cluster. Here, we take a backup of PVC and PV object on one site, so that they can be restored later to the peer cluster.","title":"Backup & Restore"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#take-backup-on-cluster-1","text":"Take backup of the PVC rbd-pvc 1 [cluster-1]$ kubectl get pvc rbd-pvc -oyaml > pvc-backup.yaml Take a backup of the PV, corresponding to the PVC 1 [cluster-1]$ kubectl get pv/pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec -oyaml > pv_backup.yaml Note We can also take backup using external tools like Velero . See velero documentation for more information.","title":"Take backup on cluster-1"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#restore-the-backup-on-cluster-2","text":"Create storageclass on the secondary cluster 1 [cluster-2]$ kubectl create -f deploy/examples/csi/rbd/storageclass.yaml Create VolumeReplicationClass on the secondary cluster 1 2 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication-class.yaml volumereplicationclass.replication.storage.openshift.io/rbd-volumereplicationclass created If Persistent Volumes and Claims are created manually on the secondary cluster, remove the claimRef on the backed up PV objects in yaml files; so that the PV can get bound to the new claim on the secondary cluster. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 ... spec : accessModes : - ReadWriteOnce capacity : storage : 1Gi claimRef : apiVersion : v1 kind : PersistentVolumeClaim name : rbd-pvc namespace : default resourceVersion : \"64252\" uid : 65dc0aac-5e15-4474-90f4-7a3532c621ec csi : ... Apply the Persistent Volume backup from the primary cluster 1 [cluster-2]$ kubectl create -f pv-backup.yaml Apply the Persistent Volume claim from the restored backup 1 [cluster-2]$ kubectl create -f pvc-backup.yaml 1 2 3 [cluster-2]$ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec 1Gi RWO rook-ceph-block 44s","title":"Restore the backup on cluster-2"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/","text":"There are three CSI drivers integrated with Rook that will enable different scenarios: RBD: This block storage driver is optimized for RWO pod access where only one pod may access the storage. More information . CephFS: This file storage driver allows for RWX with one or more pods accessing the same storage. More information . NFS (experimental): This file storage driver allows creating NFS exports that can be mounted to pods, or the exports can be mounted directly via an NFS client from inside or outside the Kubernetes cluster. More information The Ceph Filesystem (CephFS) and RADOS Block Device (RBD) drivers are enabled automatically with the Rook operator. The NFS driver is disabled by default. All drivers will be started in the same namespace as the operator when the first CephCluster CR is created. Supported Versions \u00b6 The supported Ceph CSI version is 3.3.0 or greater with Rook. Refer to ceph csi releases for more information. Static Provisioning \u00b6 Both drivers also support the creation of static PV and static PVC from existing RBD image/CephFS volume. Refer to static PVC for more information. Configure CSI Drivers in non-default namespace \u00b6 If you've deployed the Rook operator in a namespace other than \"rook-ceph\", change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace \"my-namespace\" the provisioner value should be \"my-namespace.rbd.csi.ceph.com\". The same provisioner name needs to be set in both the storageclass and snapshotclass. Liveness Sidecar \u00b6 All CSI pods are deployed with a sidecar container that provides a prometheus metric for tracking if the CSI plugin is alive and running. These metrics are meant to be collected by prometheus but can be accesses through a GET request to a specific node ip. for example curl -X get http://[pod ip]:[liveness-port][liveness-path] 2>/dev/null | grep csi the expected output should be 1 2 3 4 $ curl -X GET http://10.109.65.142:9080/metrics 2 >/dev/null | grep csi # HELP csi_liveness Liveness Probe # TYPE csi_liveness gauge csi_liveness 1 Check the monitoring doc to see how to integrate CSI liveness and grpc metrics into ceph monitoring. Dynamically Expand Volume \u00b6 Prerequisite \u00b6 For filesystem resize to be supported for your Kubernetes cluster, the kubernetes version running in your cluster should be >= v1.15 and for block volume resize support the Kubernetes version should be >= v1.16. Also, ExpandCSIVolumes feature gate has to be enabled for the volume resize functionality to work. To expand the PVC the controlling StorageClass must have allowVolumeExpansion set to true . csi.storage.k8s.io/controller-expand-secret-name and csi.storage.k8s.io/controller-expand-secret-namespace values set in storageclass. Now expand the PVC by editing the PVC pvc.spec.resource.requests.storage to a higher values than the current size. Once PVC is expanded on backend and same is reflected size is reflected on application mountpoint, the status capacity pvc.status.capacity.storage of PVC will be updated to new size. RBD Mirroring \u00b6 To support RBD Mirroring, the CSI-Addons sidecar will be started in the RBD provisioner pod. The CSI-Addons supports the VolumeReplication operation. The volume replication controller provides common and reusable APIs for storage disaster recovery. It is based on csi-addons/spec specification and can be used by any storage provider. It follows the controller pattern and provides extended APIs for storage disaster recovery. The extended APIs are provided via Custom Resource Definitions (CRDs). Prerequisites \u00b6 Kubernetes version 1.21 or greater is required. Enable CSIAddons Sidecar \u00b6 To enable the CSIAddons sidecar and deploy the controller, Please follow the steps below Ephemeral volume support \u00b6 The generic ephemeral volume feature adds support for specifying PVCs in the volumes field to indicate a user would like to create a Volume as part of the pod spec. This feature requires the GenericEphemeralVolume feature gate to be enabled. For example: 1 2 3 4 5 6 7 8 9 10 11 12 13 kind : Pod apiVersion : v1 ... volumes : - name : mypvc ephemeral : volumeClaimTemplate : spec : accessModes : [ \"ReadWriteOnce\" ] storageClassName : \"rook-ceph-block\" resources : requests : storage : 1Gi A volume claim template is defined inside the pod spec which refers to a volume provisioned and used by the pod with its lifecycle. The volumes are provisioned when pod get spawned and destroyed at time of pod delete. Refer to ephemeral-doc for more info. Also, See the example manifests for an RBD ephemeral volume and a CephFS ephemeral volume . CSI-Addons Controller \u00b6 The CSI-Addons Controller handles the requests from users to initiate an operation. Users create a CR that the controller inspects, and forwards a request to one or more CSI-Addons side-cars for execution. Deploying the controller \u00b6 Users can deploy the controller by running the following commands: 1 2 3 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/crds.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/rbac.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/setup-controller.yaml This creates the required crds and configure permissions. Enable the CSI-Addons Sidecar \u00b6 To use the features provided by the CSI-Addons, the csi-addons containers need to be deployed in the RBD provisioner and nodeplugin pods, which are not enabled by default. Execute the following command in the cluster to enable the CSI-Addons sidecar: Update the rook-ceph-operator-config configmap and patch the following configurations 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_CSIADDONS\": \"true\"' After enabling CSI_ENABLE_CSIADDONS in the configmap, a new sidecar container with name csi-addons should now start automatically in the RBD CSI provisioner and nodeplugin pods. Note Make sure the version of ceph-csi used is v3.5.0+ . CSI-ADDONS Operation \u00b6 CSI-Addons supports the following operations: Reclaim Space Creating a ReclaimSpaceJob Creating a ReclaimSpaceCronJob Annotating PersistentVolumeClaims Annotating Namespace Network Fencing Creating a NetworkFence Volume Replication Creating VolumeReplicationClass Creating VolumeReplication CR Enable RBD Encryption Support \u00b6 Ceph-CSI supports encrypting individual RBD PersistentVolumeClaim with LUKS encryption. More details can be found here with full list of supported encryption configurations. A sample configmap can be found here . Note Rook also supports OSD encryption (see encryptedDevice option here ). Using both RBD PVC encryption and OSD encryption together will lead to double encryption and may reduce read/write performance. Unlike OSD encryption, existing ceph clusters can also enable Ceph-CSI RBD PVC encryption support and multiple kinds of encryption KMS can be used on the same ceph cluster using different storageclasses. Following steps demonstrate how to enable support for encryption: Create the rook-ceph-csi-kms-config configmap with required encryption configuration in the same namespace where the Rook operator is deployed. An example is shown below: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : v1 kind : ConfigMap metadata : name : rook-ceph-csi-kms-config namespace : rook-ceph data : config.json : |- { \"user-secret-metadata\": { \"encryptionKMSType\": \"metadata\", \"secretName\": \"storage-encryption-secret\" } } Update the rook-ceph-operator-config configmap and patch the following configurations 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_ENCRYPTION\": \"true\"' Create necessary resources (secrets, configmaps etc) as required by the encryption type. In this case, create storage-encryption-secret secret in the namespace of pvc as shown: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : storage-encryption-secret namespace : rook-ceph stringData : encryptionPassphrase : test-encryption Create a new storageclass with additional parameters encrypted: \"true\" and encryptionKMSID: \"\" . An example is show below: 1 2 3 4 5 6 7 8 9 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-block-encrypted parameters : # additional parameters required for encryption encrypted : \"true\" encryptionKMSID : \"user-secret-metadata\" # ... PVCs created using the new storageclass will be encrypted. Enable Read affinity for RBD volumes \u00b6 Ceph CSI supports mapping RBD volumes with krbd options to allow serving reads from an OSD in proximity to the client, according to OSD locations defined in the CRUSH map and topology labels on nodes. Refer to the krbd-options for more details. Execute the following steps: Patch the rook-ceph-operator-config configmap using the following command. 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_READ_AFFINITY\": \"true\"' Add topology labels to the Kubernetes nodes. The same labels may be used as mentioned in the OSD topology topic. (optional) Rook will pass the labels mentioned in osd-topology as the default set of labels. This can overridden to supply custom labels by updating the CSI_CRUSH_LOCATION_LABELS value in the rook-ceph-operator-config configmap. Ceph CSI will extract the CRUSH location from the topology labels found on the node and pass it though krbd options during mapping RBD volumes. Note This requires kernel version 5.8 or higher.","title":"Ceph CSI Drivers"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#supported-versions","text":"The supported Ceph CSI version is 3.3.0 or greater with Rook. Refer to ceph csi releases for more information.","title":"Supported Versions"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#static-provisioning","text":"Both drivers also support the creation of static PV and static PVC from existing RBD image/CephFS volume. Refer to static PVC for more information.","title":"Static Provisioning"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#configure-csi-drivers-in-non-default-namespace","text":"If you've deployed the Rook operator in a namespace other than \"rook-ceph\", change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace \"my-namespace\" the provisioner value should be \"my-namespace.rbd.csi.ceph.com\". The same provisioner name needs to be set in both the storageclass and snapshotclass.","title":"Configure CSI Drivers in non-default namespace"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#liveness-sidecar","text":"All CSI pods are deployed with a sidecar container that provides a prometheus metric for tracking if the CSI plugin is alive and running. These metrics are meant to be collected by prometheus but can be accesses through a GET request to a specific node ip. for example curl -X get http://[pod ip]:[liveness-port][liveness-path] 2>/dev/null | grep csi the expected output should be 1 2 3 4 $ curl -X GET http://10.109.65.142:9080/metrics 2 >/dev/null | grep csi # HELP csi_liveness Liveness Probe # TYPE csi_liveness gauge csi_liveness 1 Check the monitoring doc to see how to integrate CSI liveness and grpc metrics into ceph monitoring.","title":"Liveness Sidecar"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#dynamically-expand-volume","text":"","title":"Dynamically Expand Volume"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#prerequisite","text":"For filesystem resize to be supported for your Kubernetes cluster, the kubernetes version running in your cluster should be >= v1.15 and for block volume resize support the Kubernetes version should be >= v1.16. Also, ExpandCSIVolumes feature gate has to be enabled for the volume resize functionality to work. To expand the PVC the controlling StorageClass must have allowVolumeExpansion set to true . csi.storage.k8s.io/controller-expand-secret-name and csi.storage.k8s.io/controller-expand-secret-namespace values set in storageclass. Now expand the PVC by editing the PVC pvc.spec.resource.requests.storage to a higher values than the current size. Once PVC is expanded on backend and same is reflected size is reflected on application mountpoint, the status capacity pvc.status.capacity.storage of PVC will be updated to new size.","title":"Prerequisite"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#rbd-mirroring","text":"To support RBD Mirroring, the CSI-Addons sidecar will be started in the RBD provisioner pod. The CSI-Addons supports the VolumeReplication operation. The volume replication controller provides common and reusable APIs for storage disaster recovery. It is based on csi-addons/spec specification and can be used by any storage provider. It follows the controller pattern and provides extended APIs for storage disaster recovery. The extended APIs are provided via Custom Resource Definitions (CRDs).","title":"RBD Mirroring"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#prerequisites","text":"Kubernetes version 1.21 or greater is required.","title":"Prerequisites"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#enable-csiaddons-sidecar","text":"To enable the CSIAddons sidecar and deploy the controller, Please follow the steps below","title":"Enable CSIAddons Sidecar"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#ephemeral-volume-support","text":"The generic ephemeral volume feature adds support for specifying PVCs in the volumes field to indicate a user would like to create a Volume as part of the pod spec. This feature requires the GenericEphemeralVolume feature gate to be enabled. For example: 1 2 3 4 5 6 7 8 9 10 11 12 13 kind : Pod apiVersion : v1 ... volumes : - name : mypvc ephemeral : volumeClaimTemplate : spec : accessModes : [ \"ReadWriteOnce\" ] storageClassName : \"rook-ceph-block\" resources : requests : storage : 1Gi A volume claim template is defined inside the pod spec which refers to a volume provisioned and used by the pod with its lifecycle. The volumes are provisioned when pod get spawned and destroyed at time of pod delete. Refer to ephemeral-doc for more info. Also, See the example manifests for an RBD ephemeral volume and a CephFS ephemeral volume .","title":"Ephemeral volume support"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#csi-addons-controller","text":"The CSI-Addons Controller handles the requests from users to initiate an operation. Users create a CR that the controller inspects, and forwards a request to one or more CSI-Addons side-cars for execution.","title":"CSI-Addons Controller"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#deploying-the-controller","text":"Users can deploy the controller by running the following commands: 1 2 3 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/crds.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/rbac.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/setup-controller.yaml This creates the required crds and configure permissions.","title":"Deploying the controller"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#enable-the-csi-addons-sidecar","text":"To use the features provided by the CSI-Addons, the csi-addons containers need to be deployed in the RBD provisioner and nodeplugin pods, which are not enabled by default. Execute the following command in the cluster to enable the CSI-Addons sidecar: Update the rook-ceph-operator-config configmap and patch the following configurations 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_CSIADDONS\": \"true\"' After enabling CSI_ENABLE_CSIADDONS in the configmap, a new sidecar container with name csi-addons should now start automatically in the RBD CSI provisioner and nodeplugin pods. Note Make sure the version of ceph-csi used is v3.5.0+ .","title":"Enable the CSI-Addons Sidecar"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#csi-addons-operation","text":"CSI-Addons supports the following operations: Reclaim Space Creating a ReclaimSpaceJob Creating a ReclaimSpaceCronJob Annotating PersistentVolumeClaims Annotating Namespace Network Fencing Creating a NetworkFence Volume Replication Creating VolumeReplicationClass Creating VolumeReplication CR","title":"CSI-ADDONS Operation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#enable-rbd-encryption-support","text":"Ceph-CSI supports encrypting individual RBD PersistentVolumeClaim with LUKS encryption. More details can be found here with full list of supported encryption configurations. A sample configmap can be found here . Note Rook also supports OSD encryption (see encryptedDevice option here ). Using both RBD PVC encryption and OSD encryption together will lead to double encryption and may reduce read/write performance. Unlike OSD encryption, existing ceph clusters can also enable Ceph-CSI RBD PVC encryption support and multiple kinds of encryption KMS can be used on the same ceph cluster using different storageclasses. Following steps demonstrate how to enable support for encryption: Create the rook-ceph-csi-kms-config configmap with required encryption configuration in the same namespace where the Rook operator is deployed. An example is shown below: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : v1 kind : ConfigMap metadata : name : rook-ceph-csi-kms-config namespace : rook-ceph data : config.json : |- { \"user-secret-metadata\": { \"encryptionKMSType\": \"metadata\", \"secretName\": \"storage-encryption-secret\" } } Update the rook-ceph-operator-config configmap and patch the following configurations 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_ENCRYPTION\": \"true\"' Create necessary resources (secrets, configmaps etc) as required by the encryption type. In this case, create storage-encryption-secret secret in the namespace of pvc as shown: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : storage-encryption-secret namespace : rook-ceph stringData : encryptionPassphrase : test-encryption Create a new storageclass with additional parameters encrypted: \"true\" and encryptionKMSID: \"\" . An example is show below: 1 2 3 4 5 6 7 8 9 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-block-encrypted parameters : # additional parameters required for encryption encrypted : \"true\" encryptionKMSID : \"user-secret-metadata\" # ... PVCs created using the new storageclass will be encrypted.","title":"Enable RBD Encryption Support"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#enable-read-affinity-for-rbd-volumes","text":"Ceph CSI supports mapping RBD volumes with krbd options to allow serving reads from an OSD in proximity to the client, according to OSD locations defined in the CRUSH map and topology labels on nodes. Refer to the krbd-options for more details. Execute the following steps: Patch the rook-ceph-operator-config configmap using the following command. 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_READ_AFFINITY\": \"true\"' Add topology labels to the Kubernetes nodes. The same labels may be used as mentioned in the OSD topology topic. (optional) Rook will pass the labels mentioned in osd-topology as the default set of labels. This can overridden to supply custom labels by updating the CSI_CRUSH_LOCATION_LABELS value in the rook-ceph-operator-config configmap. Ceph CSI will extract the CRUSH location from the topology labels found on the node and pass it though krbd options during mapping RBD volumes. Note This requires kernel version 5.8 or higher.","title":"Enable Read affinity for RBD volumes"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/","text":"Prerequisites \u00b6 Rook officially supports v1 snapshots for Kubernetes v1.20+. Install the snapshot controller and snapshot v1 CRD as required. More info can be found here . Note If only Alpha snapshots are available, enable snapshotter in rook-ceph-operator-config or helm chart values.yaml , change the external-snapshotter image to registry.k8s.io/sig-storage/csi-snapshotter:v1.2.2 and refer to the alpha snapshots documentation VolumeSnapshot betav1 is deprecated in Kubernetes 1.20+ and removed in 1.24.0. If you still require betav1 snapshots, change the external-snapshotter image to registry.k8s.io/sig-storage/csi-snapshotter:v5.0.1 and refer to the betav1 snapshots documentation We also need a VolumeSnapshotClass for volume snapshot to work. The purpose of a VolumeSnapshotClass is defined in the kubernetes documentation . In short, as the documentation describes it: Info Just like StorageClass provides a way for administrators to describe the \"classes\" of storage they offer when provisioning a volume, VolumeSnapshotClass provides a way to describe the \"classes\" of storage when provisioning a volume snapshot. Upgrade Snapshot API \u00b6 If your Kubernetes version is updated to a newer version of the snapshot API, follow the upgrade guide here to upgrade from v1alpha1 to v1beta1, or v1beta1 to v1. RBD Snapshots \u00b6 VolumeSnapshotClass \u00b6 In VolumeSnapshotClass , the csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the rbdplugin and pool to reflect the Ceph pool name. Update the value of the clusterID field to match the namespace that Rook is running in. When Ceph CSI is deployed by Rook, the operator will automatically maintain a configmap whose contents will match this key. By default this is \"rook-ceph\". 1 kubectl create -f deploy/examples/csi/rbd/snapshotclass.yaml Volumesnapshot \u00b6 In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the RBD CSI driver. 1 kubectl create -f deploy/examples/csi/rbd/snapshot.yaml Verify RBD Snapshot Creation \u00b6 1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-rbdplugin-snapclass rook-ceph.rbd.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE rbd-pvc-snapshot true rbd-pvc 1Gi csi-rbdplugin-snapclass snapcontent-79090db0-7c66-4b18-bf4a-634772c7cac7 3h50m 3h51m The snapshot will be ready to restore to a new PVC when the READYTOUSE field of the volumesnapshot is set to true. Restore the snapshot to a new PVC \u00b6 In pvc-restore , dataSource should be the name of the VolumeSnapshot previously created. The dataSource kind should be the VolumeSnapshot . Create a new PVC from the snapshot 1 kubectl create -f deploy/examples/csi/rbd/pvc-restore.yaml Verify RBD Clone PVC Creation \u00b6 1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-84294e34-577a-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 34m rbd-pvc-restore Bound pvc-575537bf-577f-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 8s RBD snapshot resource Cleanup \u00b6 To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/rbd/pvc-restore.yaml kubectl delete -f deploy/examples/csi/rbd/snapshot.yaml kubectl delete -f deploy/examples/csi/rbd/snapshotclass.yaml CephFS Snapshots \u00b6 VolumeSnapshotClass \u00b6 In VolumeSnapshotClass , the csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the cephfsplugin. In the volumesnapshotclass, update the value of the clusterID field to match the namespace that Rook is running in. When Ceph CSI is deployed by Rook, the operator will automatically maintain a configmap whose contents will match this key. By default this is \"rook-ceph\". 1 kubectl create -f deploy/examples/csi/cephfs/snapshotclass.yaml VolumeSnapshot \u00b6 In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the CephFS CSI driver. 1 kubectl create -f deploy/examples/csi/cephfs/snapshot.yaml Verify CephFS Snapshot Creation \u00b6 1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-cephfslugin-snapclass rook-ceph.cephfs.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE cephfs-pvc-snapshot true cephfs-pvc 1Gi csi-cephfsplugin-snapclass snapcontent-34476204-a14a-4d59-bfbc-2bbba695652c 3h50m 3h51m The snapshot will be ready to restore to a new PVC when READYTOUSE field of the volumesnapshot is set to true. Restore the snapshot to a new PVC \u00b6 In pvc-restore , dataSource should be the name of the VolumeSnapshot previously created. The dataSource kind should be the VolumeSnapshot . Create a new PVC from the snapshot 1 kubectl create -f deploy/examples/csi/cephfs/pvc-restore.yaml Verify CephFS Restore PVC Creation \u00b6 1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE cephfs-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi RWX rook-cephfs 55m cephfs-pvc-restore Bound pvc-95308c75-6c93-4928-a551-6b5137192209 1Gi RWX rook-cephfs 34s CephFS snapshot resource Cleanup \u00b6 To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/cephfs/pvc-restore.yaml kubectl delete -f deploy/examples/csi/cephfs/snapshot.yaml kubectl delete -f deploy/examples/csi/cephfs/snapshotclass.yaml Limitations \u00b6 There is a limit of 400 snapshots per cephFS filesystem. The PVC cannot be deleted if it has snapshots. make sure all the snapshots on the PVC are deleted before you delete the PVC.","title":"Snapshots"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#prerequisites","text":"Rook officially supports v1 snapshots for Kubernetes v1.20+. Install the snapshot controller and snapshot v1 CRD as required. More info can be found here . Note If only Alpha snapshots are available, enable snapshotter in rook-ceph-operator-config or helm chart values.yaml , change the external-snapshotter image to registry.k8s.io/sig-storage/csi-snapshotter:v1.2.2 and refer to the alpha snapshots documentation VolumeSnapshot betav1 is deprecated in Kubernetes 1.20+ and removed in 1.24.0. If you still require betav1 snapshots, change the external-snapshotter image to registry.k8s.io/sig-storage/csi-snapshotter:v5.0.1 and refer to the betav1 snapshots documentation We also need a VolumeSnapshotClass for volume snapshot to work. The purpose of a VolumeSnapshotClass is defined in the kubernetes documentation . In short, as the documentation describes it: Info Just like StorageClass provides a way for administrators to describe the \"classes\" of storage they offer when provisioning a volume, VolumeSnapshotClass provides a way to describe the \"classes\" of storage when provisioning a volume snapshot.","title":"Prerequisites"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#upgrade-snapshot-api","text":"If your Kubernetes version is updated to a newer version of the snapshot API, follow the upgrade guide here to upgrade from v1alpha1 to v1beta1, or v1beta1 to v1.","title":"Upgrade Snapshot API"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#rbd-snapshots","text":"","title":"RBD Snapshots"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#volumesnapshotclass","text":"In VolumeSnapshotClass , the csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the rbdplugin and pool to reflect the Ceph pool name. Update the value of the clusterID field to match the namespace that Rook is running in. When Ceph CSI is deployed by Rook, the operator will automatically maintain a configmap whose contents will match this key. By default this is \"rook-ceph\". 1 kubectl create -f deploy/examples/csi/rbd/snapshotclass.yaml","title":"VolumeSnapshotClass"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#volumesnapshot","text":"In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the RBD CSI driver. 1 kubectl create -f deploy/examples/csi/rbd/snapshot.yaml","title":"Volumesnapshot"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#verify-rbd-snapshot-creation","text":"1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-rbdplugin-snapclass rook-ceph.rbd.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE rbd-pvc-snapshot true rbd-pvc 1Gi csi-rbdplugin-snapclass snapcontent-79090db0-7c66-4b18-bf4a-634772c7cac7 3h50m 3h51m The snapshot will be ready to restore to a new PVC when the READYTOUSE field of the volumesnapshot is set to true.","title":"Verify RBD Snapshot Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#restore-the-snapshot-to-a-new-pvc","text":"In pvc-restore , dataSource should be the name of the VolumeSnapshot previously created. The dataSource kind should be the VolumeSnapshot . Create a new PVC from the snapshot 1 kubectl create -f deploy/examples/csi/rbd/pvc-restore.yaml","title":"Restore the snapshot to a new PVC"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#verify-rbd-clone-pvc-creation","text":"1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-84294e34-577a-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 34m rbd-pvc-restore Bound pvc-575537bf-577f-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 8s","title":"Verify RBD Clone PVC Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#rbd-snapshot-resource-cleanup","text":"To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/rbd/pvc-restore.yaml kubectl delete -f deploy/examples/csi/rbd/snapshot.yaml kubectl delete -f deploy/examples/csi/rbd/snapshotclass.yaml","title":"RBD snapshot resource Cleanup"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#cephfs-snapshots","text":"","title":"CephFS Snapshots"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#volumesnapshotclass_1","text":"In VolumeSnapshotClass , the csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the cephfsplugin. In the volumesnapshotclass, update the value of the clusterID field to match the namespace that Rook is running in. When Ceph CSI is deployed by Rook, the operator will automatically maintain a configmap whose contents will match this key. By default this is \"rook-ceph\". 1 kubectl create -f deploy/examples/csi/cephfs/snapshotclass.yaml","title":"VolumeSnapshotClass"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#volumesnapshot_1","text":"In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the CephFS CSI driver. 1 kubectl create -f deploy/examples/csi/cephfs/snapshot.yaml","title":"VolumeSnapshot"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#verify-cephfs-snapshot-creation","text":"1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-cephfslugin-snapclass rook-ceph.cephfs.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE cephfs-pvc-snapshot true cephfs-pvc 1Gi csi-cephfsplugin-snapclass snapcontent-34476204-a14a-4d59-bfbc-2bbba695652c 3h50m 3h51m The snapshot will be ready to restore to a new PVC when READYTOUSE field of the volumesnapshot is set to true.","title":"Verify CephFS Snapshot Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#restore-the-snapshot-to-a-new-pvc_1","text":"In pvc-restore , dataSource should be the name of the VolumeSnapshot previously created. The dataSource kind should be the VolumeSnapshot . Create a new PVC from the snapshot 1 kubectl create -f deploy/examples/csi/cephfs/pvc-restore.yaml","title":"Restore the snapshot to a new PVC"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#verify-cephfs-restore-pvc-creation","text":"1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE cephfs-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi RWX rook-cephfs 55m cephfs-pvc-restore Bound pvc-95308c75-6c93-4928-a551-6b5137192209 1Gi RWX rook-cephfs 34s","title":"Verify CephFS Restore PVC Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#cephfs-snapshot-resource-cleanup","text":"To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/cephfs/pvc-restore.yaml kubectl delete -f deploy/examples/csi/cephfs/snapshot.yaml kubectl delete -f deploy/examples/csi/cephfs/snapshotclass.yaml","title":"CephFS snapshot resource Cleanup"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#limitations","text":"There is a limit of 400 snapshots per cephFS filesystem. The PVC cannot be deleted if it has snapshots. make sure all the snapshots on the PVC are deleted before you delete the PVC.","title":"Limitations"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/","text":"The CSI Volume Cloning feature adds support for specifying existing PVCs in the dataSource field to indicate a user would like to clone a Volume. A Clone is defined as a duplicate of an existing Kubernetes Volume that can be consumed as any standard Volume would be. The only difference is that upon provisioning, rather than creating a \"new\" empty Volume, the back end device creates an exact duplicate of the specified Volume. Refer to clone-doc for more info. RBD Volume Cloning \u00b6 Volume Clone Prerequisites \u00b6 Requires Kubernetes v1.16+ which supports volume clone. Ceph-csi diver v3.0.0+ which supports volume clone. Volume Cloning \u00b6 In pvc-clone , dataSource should be the name of the PVC which is already created by RBD CSI driver. The dataSource kind should be the PersistentVolumeClaim and also storageclass should be same as the source PVC . Create a new PVC Clone from the PVC 1 kubectl create -f deploy/examples/csi/rbd/pvc-clone.yaml Verify RBD volume Clone PVC Creation \u00b6 1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi >RWO rook-ceph-block 34m rbd-pvc-clone Bound pvc-70473135-577f-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 8s RBD clone resource Cleanup \u00b6 To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/rbd/pvc-clone.yaml CephFS Volume Cloning \u00b6 Volume Clone Prerequisites \u00b6 Requires Kubernetes v1.16+ which supports volume clone. Ceph-csi diver v3.1.0+ which supports volume clone. Volume Cloning \u00b6 In pvc-clone , dataSource should be the name of the PVC which is already created by CephFS CSI driver. The dataSource kind should be the PersistentVolumeClaim and also storageclass should be same as the source PVC . Create a new PVC Clone from the PVC 1 kubectl create -f deploy/examples/csi/cephfs/pvc-clone.yaml Verify CephFS volume Clone PVC Creation \u00b6 1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE cephfs-pvc Bound pvc-1ea51547-a88b-4ab0-8b4a-812caeaf025d 1Gi RWX rook-cephfs 39m cephfs-pvc-clone Bound pvc-b575bc35-d521-4c41-b4f9-1d733cd28fdf 1Gi RWX rook-cephfs 8s CephFS clone resource Cleanup \u00b6 To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/cephfs/pvc-clone.yaml","title":"Volume clone"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#rbd-volume-cloning","text":"","title":"RBD Volume Cloning"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#volume-clone-prerequisites","text":"Requires Kubernetes v1.16+ which supports volume clone. Ceph-csi diver v3.0.0+ which supports volume clone.","title":"Volume Clone Prerequisites"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#volume-cloning","text":"In pvc-clone , dataSource should be the name of the PVC which is already created by RBD CSI driver. The dataSource kind should be the PersistentVolumeClaim and also storageclass should be same as the source PVC . Create a new PVC Clone from the PVC 1 kubectl create -f deploy/examples/csi/rbd/pvc-clone.yaml","title":"Volume Cloning"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#verify-rbd-volume-clone-pvc-creation","text":"1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi >RWO rook-ceph-block 34m rbd-pvc-clone Bound pvc-70473135-577f-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 8s","title":"Verify RBD volume Clone PVC Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#rbd-clone-resource-cleanup","text":"To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/rbd/pvc-clone.yaml","title":"RBD clone resource Cleanup"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#cephfs-volume-cloning","text":"","title":"CephFS Volume Cloning"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#volume-clone-prerequisites_1","text":"Requires Kubernetes v1.16+ which supports volume clone. Ceph-csi diver v3.1.0+ which supports volume clone.","title":"Volume Clone Prerequisites"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#volume-cloning_1","text":"In pvc-clone , dataSource should be the name of the PVC which is already created by CephFS CSI driver. The dataSource kind should be the PersistentVolumeClaim and also storageclass should be same as the source PVC . Create a new PVC Clone from the PVC 1 kubectl create -f deploy/examples/csi/cephfs/pvc-clone.yaml","title":"Volume Cloning"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#verify-cephfs-volume-clone-pvc-creation","text":"1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE cephfs-pvc Bound pvc-1ea51547-a88b-4ab0-8b4a-812caeaf025d 1Gi RWX rook-cephfs 39m cephfs-pvc-clone Bound pvc-b575bc35-d521-4c41-b4f9-1d733cd28fdf 1Gi RWX rook-cephfs 8s","title":"Verify CephFS volume Clone PVC Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#cephfs-clone-resource-cleanup","text":"To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/cephfs/pvc-clone.yaml","title":"CephFS clone resource Cleanup"},{"location":"Storage-Configuration/Ceph-CSI/custom-images/","text":"By default, Rook will deploy the latest stable version of the Ceph CSI driver. Commonly, there is no need to change this default version that is deployed. For scenarios that require deploying a custom image (e.g. downstream releases), the defaults can be overridden with the following settings. The CSI configuration variables are found in the rook-ceph-operator-config ConfigMap. These settings can also be specified as environment variables on the operator deployment, though the configmap values will override the env vars if both are specified. 1 kubectl -n $ROOK_OPERATOR_NAMESPACE edit configmap rook-ceph-operator-config The default upstream images are included below, which you can change to your desired images. 1 2 3 4 5 6 7 ROOK_CSI_CEPH_IMAGE : \"quay.io/cephcsi/cephcsi:v3.9.0\" ROOK_CSI_REGISTRAR_IMAGE : \"registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.8.0\" ROOK_CSI_PROVISIONER_IMAGE : \"registry.k8s.io/sig-storage/csi-provisioner:v3.5.0\" ROOK_CSI_ATTACHER_IMAGE : \"registry.k8s.io/sig-storage/csi-attacher:v4.3.0\" ROOK_CSI_RESIZER_IMAGE : \"registry.k8s.io/sig-storage/csi-resizer:v1.8.0\" ROOK_CSI_SNAPSHOTTER_IMAGE : \"registry.k8s.io/sig-storage/csi-snapshotter:v6.2.2\" ROOK_CSIADDONS_IMAGE : \"quay.io/csiaddons/k8s-sidecar:v0.7.0\" Use private repository \u00b6 If image version is not passed along with the image name in any of the variables above, Rook will add the corresponding default version to that image. Example: if ROOK_CSI_CEPH_IMAGE: \"quay.io/private-repo/cephcsi\" is passed, Rook will add internal default version and consume it as \"quay.io/private-repo/cephcsi:v3.9.0\" . Use default images \u00b6 If you would like Rook to use the default upstream images, then you may simply remove all variables matching ROOK_CSI_*_IMAGE from the above ConfigMap and/or the operator deployment. Verifying updates \u00b6 You can use the below command to see the CSI images currently being used in the cluster. Note that not all images (like volumereplication-operator ) may be present in every cluster depending on which CSI features are enabled. 1 kubectl --namespace rook-ceph get pod -o jsonpath='{range .items[*]}{range .spec.containers[*]}{.image}{\"\\n\"}' -l 'app in (csi-rbdplugin,csi-rbdplugin-provisioner,csi-cephfsplugin,csi-cephfsplugin-provisioner)' | sort | uniq The default images can also be found with each release in the images list","title":"Custom Images"},{"location":"Storage-Configuration/Ceph-CSI/custom-images/#use-private-repository","text":"If image version is not passed along with the image name in any of the variables above, Rook will add the corresponding default version to that image. Example: if ROOK_CSI_CEPH_IMAGE: \"quay.io/private-repo/cephcsi\" is passed, Rook will add internal default version and consume it as \"quay.io/private-repo/cephcsi:v3.9.0\" .","title":"Use private repository"},{"location":"Storage-Configuration/Ceph-CSI/custom-images/#use-default-images","text":"If you would like Rook to use the default upstream images, then you may simply remove all variables matching ROOK_CSI_*_IMAGE from the above ConfigMap and/or the operator deployment.","title":"Use default images"},{"location":"Storage-Configuration/Ceph-CSI/custom-images/#verifying-updates","text":"You can use the below command to see the CSI images currently being used in the cluster. Note that not all images (like volumereplication-operator ) may be present in every cluster depending on which CSI features are enabled. 1 kubectl --namespace rook-ceph get pod -o jsonpath='{range .items[*]}{range .spec.containers[*]}{.image}{\"\\n\"}' -l 'app in (csi-rbdplugin,csi-rbdplugin-provisioner,csi-cephfsplugin,csi-cephfsplugin-provisioner)' | sort | uniq The default images can also be found with each release in the images list","title":"Verifying updates"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/","text":"The dashboard is a very helpful tool to give you an overview of the status of your Ceph cluster, including overall health, status of the mon quorum, status of the mgr, osd, and other Ceph daemons, view pools and PG status, show logs for the daemons, and more. Rook makes it simple to enable the dashboard. Enable the Ceph Dashboard \u00b6 The dashboard can be enabled with settings in the CephCluster CRD. The CephCluster CRD must have the dashboard enabled setting set to true . This is the default setting in the example manifests. 1 2 3 4 [ ... ] spec : dashboard : enabled : true The Rook operator will enable the ceph-mgr dashboard module. A service object will be created to expose that port inside the Kubernetes cluster. Rook will enable port 8443 for https access. This example shows that port 8443 was configured. 1 2 3 4 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 10.108.111.192  9283/TCP 3h rook-ceph-mgr-dashboard ClusterIP 10.110.113.240  8443/TCP 3h The first service is for reporting the Prometheus metrics , while the latter service is for the dashboard. If you are on a node in the cluster, you will be able to connect to the dashboard by using either the DNS name of the service at https://rook-ceph-mgr-dashboard-https:8443 or by connecting to the cluster IP, in this example at https://10.110.113.240:8443 . Login Credentials \u00b6 After you connect to the dashboard you will need to login for secure access. Rook creates a default user named admin and generates a secret called rook-ceph-dashboard-password in the namespace where the Rook Ceph cluster is running. To retrieve the generated password, you can run the following: 1 kubectl -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath=\"{['data']['password']}\" | base64 --decode && echo Configure the Dashboard \u00b6 The following dashboard configuration settings are supported: 1 2 3 4 5 spec : dashboard : urlPrefix : /ceph-dashboard port : 8443 ssl : true urlPrefix If you are accessing the dashboard via a reverse proxy, you may wish to serve it under a URL prefix. To get the dashboard to use hyperlinks that include your prefix, you can set the urlPrefix setting. port The port that the dashboard is served on may be changed from the default using the port setting. The corresponding K8s service exposing the port will automatically be updated. ssl The dashboard may be served without SSL (useful for when you deploy the dashboard behind a proxy already served using SSL) by setting the ssl option to be false. Visualization of 'Physical Disks' section in the dashboard \u00b6 Information about physical disks is available only in Rook host clusters . The Rook manager module is required by the dashboard to obtain the information about physical disks, but it is disabled by default. Before it is enabled, the dashboard 'Physical Disks' section will show an error message. To prepare the Rook manager module to be used in the dashboard, modify your Ceph Cluster CRD: 1 2 3 4 mgr : modules : - name : rook enabled : true And apply the changes: 1 $ kubectl apply -f cluster.yaml Once the Rook manager module is enabled as the orchestrator backend, there are two settings required for showing disk information: ROOK_ENABLE_DISCOVERY_DAEMON : Set to true to provide the dashboard the information about physical disks. The default is false . ROOK_DISCOVER_DEVICES_INTERVAL : The interval for changes to be refreshed in the set of physical disks in the cluster. The default is 60 minutes. Modify the operator.yaml, and apply the changes: 1 $ kubectl apply -f operator.yaml Viewing the Dashboard External to the Cluster \u00b6 Commonly you will want to view the dashboard from outside the cluster. For example, on a development machine with the cluster running inside minikube you will want to access the dashboard from the host. There are several ways to expose a service that will depend on the environment you are running in. You can use an Ingress Controller or other methods for exposing services such as NodePort, LoadBalancer, or ExternalIPs. Node Port \u00b6 The simplest way to expose the service in minikube or similar environment is using the NodePort to open a port on the VM that can be accessed by the host. To create a service with the NodePort, save this yaml as dashboard-external-https.yaml . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : v1 kind : Service metadata : name : rook-ceph-mgr-dashboard-external-https namespace : rook-ceph labels : app : rook-ceph-mgr rook_cluster : rook-ceph spec : ports : - name : dashboard port : 8443 protocol : TCP targetPort : 8443 selector : app : rook-ceph-mgr rook_cluster : rook-ceph sessionAffinity : None type : NodePort Now create the service: 1 kubectl create -f dashboard-external-https.yaml You will see the new service rook-ceph-mgr-dashboard-external-https created: 1 2 3 4 5 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 10.108.111.192  9283/TCP 4h rook-ceph-mgr-dashboard ClusterIP 10.110.113.240  8443/TCP 4h rook-ceph-mgr-dashboard-external-https NodePort 10.101.209.6  8443:31176/TCP 4h In this example, port 31176 will be opened to expose port 8443 from the ceph-mgr pod. Find the ip address of the VM. If using minikube, you can run minikube ip to find the ip address. Now you can enter the URL in your browser such as https://192.168.99.110:31176 and the dashboard will appear. Load Balancer \u00b6 If you have a cluster on a cloud provider that supports load balancers, you can create a service that is provisioned with a public hostname. The yaml is the same as dashboard-external-https.yaml except for the following property: 1 2 3 spec : [ ... ] type : LoadBalancer Now create the service: 1 kubectl create -f dashboard-loadbalancer.yaml You will see the new service rook-ceph-mgr-dashboard-loadbalancer created: 1 2 3 4 5 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 172.30.11.40  9283/TCP 4h rook-ceph-mgr-dashboard ClusterIP 172.30.203.185  8443/TCP 4h rook-ceph-mgr-dashboard-loadbalancer LoadBalancer 172.30.27.242 a7f23e8e2839511e9b7a5122b08f2038-1251669398.us-east-1.elb.amazonaws.com 8443:32747/TCP 4h Now you can enter the URL in your browser such as https://a7f23e8e2839511e9b7a5122b08f2038-1251669398.us-east-1.elb.amazonaws.com:8443 and the dashboard will appear. Ingress Controller \u00b6 If you have a cluster with an nginx Ingress Controller and a Certificate Manager (e.g. cert-manager ) then you can create an Ingress like the one below. This example achieves four things: Exposes the dashboard on the Internet (using a reverse proxy) Issues a valid TLS Certificate for the specified domain name (using ACME ) Tells the reverse proxy that the dashboard itself uses HTTPS Tells the reverse proxy that the dashboard itself does not have a valid certificate (it is self-signed) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 apiVersion : networking.k8s.io/v1 kind : Ingress metadata : name : rook-ceph-mgr-dashboard namespace : rook-ceph annotations : kubernetes.io/tls-acme : \"true\" nginx.ingress.kubernetes.io/backend-protocol : \"HTTPS\" nginx.ingress.kubernetes.io/server-snippet : | proxy_ssl_verify off; spec : ingressClassName : \"nginx\" tls : - hosts : - rook-ceph.example.com secretName : rook-ceph.example.com rules : - host : rook-ceph.example.com http : paths : - path : / pathType : Prefix backend : service : name : rook-ceph-mgr-dashboard port : name : https-dashboard Customise the Ingress resource to match your cluster. Replace the example domain name rook-ceph.example.com with a domain name that will resolve to your Ingress Controller (creating the DNS entry if required). Now create the Ingress: 1 kubectl create -f dashboard-ingress-https.yaml You will see the new Ingress rook-ceph-mgr-dashboard created: 1 2 3 $ kubectl -n rook-ceph get ingress NAME HOSTS ADDRESS PORTS AGE rook-ceph-mgr-dashboard rook-ceph.example.com 80, 443 5m And the new Secret for the TLS certificate: 1 2 3 kubectl -n rook-ceph get secret rook-ceph.example.com NAME TYPE DATA AGE rook-ceph.example.com kubernetes.io/tls 2 4m You can now browse to https://rook-ceph.example.com/ to log into the dashboard.","title":"Ceph Dashboard"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#enable-the-ceph-dashboard","text":"The dashboard can be enabled with settings in the CephCluster CRD. The CephCluster CRD must have the dashboard enabled setting set to true . This is the default setting in the example manifests. 1 2 3 4 [ ... ] spec : dashboard : enabled : true The Rook operator will enable the ceph-mgr dashboard module. A service object will be created to expose that port inside the Kubernetes cluster. Rook will enable port 8443 for https access. This example shows that port 8443 was configured. 1 2 3 4 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 10.108.111.192  9283/TCP 3h rook-ceph-mgr-dashboard ClusterIP 10.110.113.240  8443/TCP 3h The first service is for reporting the Prometheus metrics , while the latter service is for the dashboard. If you are on a node in the cluster, you will be able to connect to the dashboard by using either the DNS name of the service at https://rook-ceph-mgr-dashboard-https:8443 or by connecting to the cluster IP, in this example at https://10.110.113.240:8443 .","title":"Enable the Ceph Dashboard"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#login-credentials","text":"After you connect to the dashboard you will need to login for secure access. Rook creates a default user named admin and generates a secret called rook-ceph-dashboard-password in the namespace where the Rook Ceph cluster is running. To retrieve the generated password, you can run the following: 1 kubectl -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath=\"{['data']['password']}\" | base64 --decode && echo","title":"Login Credentials"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#configure-the-dashboard","text":"The following dashboard configuration settings are supported: 1 2 3 4 5 spec : dashboard : urlPrefix : /ceph-dashboard port : 8443 ssl : true urlPrefix If you are accessing the dashboard via a reverse proxy, you may wish to serve it under a URL prefix. To get the dashboard to use hyperlinks that include your prefix, you can set the urlPrefix setting. port The port that the dashboard is served on may be changed from the default using the port setting. The corresponding K8s service exposing the port will automatically be updated. ssl The dashboard may be served without SSL (useful for when you deploy the dashboard behind a proxy already served using SSL) by setting the ssl option to be false.","title":"Configure the Dashboard"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#visualization-of-physical-disks-section-in-the-dashboard","text":"Information about physical disks is available only in Rook host clusters . The Rook manager module is required by the dashboard to obtain the information about physical disks, but it is disabled by default. Before it is enabled, the dashboard 'Physical Disks' section will show an error message. To prepare the Rook manager module to be used in the dashboard, modify your Ceph Cluster CRD: 1 2 3 4 mgr : modules : - name : rook enabled : true And apply the changes: 1 $ kubectl apply -f cluster.yaml Once the Rook manager module is enabled as the orchestrator backend, there are two settings required for showing disk information: ROOK_ENABLE_DISCOVERY_DAEMON : Set to true to provide the dashboard the information about physical disks. The default is false . ROOK_DISCOVER_DEVICES_INTERVAL : The interval for changes to be refreshed in the set of physical disks in the cluster. The default is 60 minutes. Modify the operator.yaml, and apply the changes: 1 $ kubectl apply -f operator.yaml","title":"Visualization of 'Physical Disks' section in the dashboard"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#viewing-the-dashboard-external-to-the-cluster","text":"Commonly you will want to view the dashboard from outside the cluster. For example, on a development machine with the cluster running inside minikube you will want to access the dashboard from the host. There are several ways to expose a service that will depend on the environment you are running in. You can use an Ingress Controller or other methods for exposing services such as NodePort, LoadBalancer, or ExternalIPs.","title":"Viewing the Dashboard External to the Cluster"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#node-port","text":"The simplest way to expose the service in minikube or similar environment is using the NodePort to open a port on the VM that can be accessed by the host. To create a service with the NodePort, save this yaml as dashboard-external-https.yaml . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : v1 kind : Service metadata : name : rook-ceph-mgr-dashboard-external-https namespace : rook-ceph labels : app : rook-ceph-mgr rook_cluster : rook-ceph spec : ports : - name : dashboard port : 8443 protocol : TCP targetPort : 8443 selector : app : rook-ceph-mgr rook_cluster : rook-ceph sessionAffinity : None type : NodePort Now create the service: 1 kubectl create -f dashboard-external-https.yaml You will see the new service rook-ceph-mgr-dashboard-external-https created: 1 2 3 4 5 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 10.108.111.192  9283/TCP 4h rook-ceph-mgr-dashboard ClusterIP 10.110.113.240  8443/TCP 4h rook-ceph-mgr-dashboard-external-https NodePort 10.101.209.6  8443:31176/TCP 4h In this example, port 31176 will be opened to expose port 8443 from the ceph-mgr pod. Find the ip address of the VM. If using minikube, you can run minikube ip to find the ip address. Now you can enter the URL in your browser such as https://192.168.99.110:31176 and the dashboard will appear.","title":"Node Port"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#load-balancer","text":"If you have a cluster on a cloud provider that supports load balancers, you can create a service that is provisioned with a public hostname. The yaml is the same as dashboard-external-https.yaml except for the following property: 1 2 3 spec : [ ... ] type : LoadBalancer Now create the service: 1 kubectl create -f dashboard-loadbalancer.yaml You will see the new service rook-ceph-mgr-dashboard-loadbalancer created: 1 2 3 4 5 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 172.30.11.40  9283/TCP 4h rook-ceph-mgr-dashboard ClusterIP 172.30.203.185  8443/TCP 4h rook-ceph-mgr-dashboard-loadbalancer LoadBalancer 172.30.27.242 a7f23e8e2839511e9b7a5122b08f2038-1251669398.us-east-1.elb.amazonaws.com 8443:32747/TCP 4h Now you can enter the URL in your browser such as https://a7f23e8e2839511e9b7a5122b08f2038-1251669398.us-east-1.elb.amazonaws.com:8443 and the dashboard will appear.","title":"Load Balancer"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#ingress-controller","text":"If you have a cluster with an nginx Ingress Controller and a Certificate Manager (e.g. cert-manager ) then you can create an Ingress like the one below. This example achieves four things: Exposes the dashboard on the Internet (using a reverse proxy) Issues a valid TLS Certificate for the specified domain name (using ACME ) Tells the reverse proxy that the dashboard itself uses HTTPS Tells the reverse proxy that the dashboard itself does not have a valid certificate (it is self-signed) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 apiVersion : networking.k8s.io/v1 kind : Ingress metadata : name : rook-ceph-mgr-dashboard namespace : rook-ceph annotations : kubernetes.io/tls-acme : \"true\" nginx.ingress.kubernetes.io/backend-protocol : \"HTTPS\" nginx.ingress.kubernetes.io/server-snippet : | proxy_ssl_verify off; spec : ingressClassName : \"nginx\" tls : - hosts : - rook-ceph.example.com secretName : rook-ceph.example.com rules : - host : rook-ceph.example.com http : paths : - path : / pathType : Prefix backend : service : name : rook-ceph-mgr-dashboard port : name : https-dashboard Customise the Ingress resource to match your cluster. Replace the example domain name rook-ceph.example.com with a domain name that will resolve to your Ingress Controller (creating the DNS entry if required). Now create the Ingress: 1 kubectl create -f dashboard-ingress-https.yaml You will see the new Ingress rook-ceph-mgr-dashboard created: 1 2 3 $ kubectl -n rook-ceph get ingress NAME HOSTS ADDRESS PORTS AGE rook-ceph-mgr-dashboard rook-ceph.example.com 80, 443 5m And the new Secret for the TLS certificate: 1 2 3 kubectl -n rook-ceph get secret rook-ceph.example.com NAME TYPE DATA AGE rook-ceph.example.com kubernetes.io/tls 2 4m You can now browse to https://rook-ceph.example.com/ to log into the dashboard.","title":"Ingress Controller"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/","text":"Each Rook Ceph cluster has some built in metrics collectors/exporters for monitoring with Prometheus . If you do not have Prometheus running, follow the steps below to enable monitoring of Rook. If your cluster already contains a Prometheus instance, it will automatically discover Rook's scrape endpoint using the standard prometheus.io/scrape and prometheus.io/port annotations. Attention This assumes that the Prometheus instances is searching all your Kubernetes namespaces for Pods with these annotations. If prometheus is already installed in a cluster, it may not be configured to watch for third-party service monitors such as for Rook. Normally you should be able to add the prometheus annotations prometheus.io/scrape=true and prometheus.io/port={port} and prometheus would automatically configure the scrape points and start gathering metrics. If prometheus isn't configured to do this, see the prometheus operator docs . Prometheus Operator \u00b6 First the Prometheus operator needs to be started in the cluster so it can watch for our requests to start monitoring Rook and respond by deploying the correct Prometheus pods and configuration. A full explanation can be found in the Prometheus operator repository on GitHub , but the quick instructions can be found here: 1 kubectl apply -f https://raw.githubusercontent.com/coreos/prometheus-operator/v0.40.0/bundle.yaml This will start the Prometheus operator, but before moving on, wait until the operator is in the Running state: 1 kubectl get pod Once the Prometheus operator is in the Running state, proceed to the next section to create a Prometheus instance. Prometheus Instances \u00b6 With the Prometheus operator running, we can create service monitors that will watch the Rook cluster. There are two sources for metrics collection: Prometheus manager module: It is responsible for exposing all metrics other than ceph daemons performance counters. Ceph exporter: It is responsible for exposing only ceph daemons performance counters as prometheus metrics. From the root of your locally cloned Rook repo, go the monitoring directory: 1 2 $ git clone --single-branch --branch master https://github.com/rook/rook.git cd rook/deploy/examples/monitoring Create the service monitor as well as the Prometheus server pod and service: 1 2 3 4 kubectl create -f service-monitor.yaml kubectl create -f exporter-service-monitor.yaml kubectl create -f prometheus.yaml kubectl create -f prometheus-service.yaml Ensure that the Prometheus server pod gets created and advances to the Running state before moving on: 1 kubectl -n rook-ceph get pod prometheus-rook-prometheus-0 Note It is not recommended to consume storage from the Ceph cluster for Prometheus. If the Ceph cluster fails, Prometheus would become unresponsive and thus not alert you of the failure. Prometheus Web Console \u00b6 Once the Prometheus server is running, you can open a web browser and go to the URL that is output from this command: 1 echo \"http://$(kubectl -n rook-ceph -o jsonpath={.status.hostIP} get pod prometheus-rook-prometheus-0):30900\" You should now see the Prometheus monitoring website. Click on Graph in the top navigation bar. In the dropdown that says insert metric at cursor , select any metric you would like to see, for example ceph_cluster_total_used_bytes Click on the Execute button. Below the Execute button, ensure the Graph tab is selected and you should now see a graph of your chosen metric over time. Prometheus Consoles \u00b6 You can find Prometheus Consoles for and from Ceph here: GitHub ceph/cephmetrics - dashboards/current directory . A guide to how you can write your own Prometheus consoles can be found on the official Prometheus site here: Prometheus.io Documentation - Console Templates . Prometheus Alerts \u00b6 To enable the Ceph Prometheus alerts via the helm charts, set the following properties in values.yaml: rook-ceph chart: monitoring.enabled: true rook-ceph-cluster chart: monitoring.enabled: true monitoring.createPrometheusRules: true Alternatively, to enable the Ceph Prometheus alerts with example manifests follow these steps: Create the RBAC and prometheus rules: 1 2 kubectl create -f deploy/examples/monitoring/rbac.yaml kubectl create -f deploy/examples/monitoring/localrules.yaml Make following changes to your CephCluster object (e.g., cluster.yaml ). 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph [ ... ] spec : [ ... ] monitoring : enabled : true [ ... ] Deploy or update the CephCluster object. 1 kubectl apply -f cluster.yaml Note This expects the Prometheus Operator and a Prometheus instance to be pre-installed by the admin. Customize Alerts \u00b6 The Prometheus alerts can be customized with a post-processor using tools such as Kustomize . For example, first extract the helm chart: 1 helm template -f values.yaml rook-release/rook-ceph-cluster > cluster-chart.yaml Now create the desired customization configuration files. This simple example will show how to update the severity of a rule, add a label to a rule, and change the for time value. Create a file named kustomization.yaml: 1 2 3 4 5 6 7 8 9 patches : - path : modifications.yaml target : group : monitoring.coreos.com kind : PrometheusRule name : prometheus-ceph-rules version : v1 resources : - cluster-chart.yaml Create a file named modifications.yaml 1 2 3 4 5 6 7 8 - op : add path : /spec/groups/0/rules/0/labels value : my-label : foo severity : none - op : add path : /spec/groups/0/rules/0/for value : 15m Finally, run kustomize to update the desired prometheus rules: 1 2 kustomize build . > updated-chart.yaml kubectl create -f updated-chart.yaml Grafana Dashboards \u00b6 The dashboards have been created by @galexrt . For feedback on the dashboards please reach out to him on the Rook.io Slack . Note The dashboards are only compatible with Grafana 7.2.0 or higher. Also note that the dashboards are updated from time to time, to fix issues and improve them. The following Grafana dashboards are available: Ceph - Cluster Ceph - OSD (Single) Ceph - Pools Updates and Upgrades \u00b6 When updating Rook, there may be updates to RBAC for monitoring. It is easy to apply the changes with each update or upgrade. This should be done at the same time you update Rook common resources like common.yaml . 1 kubectl apply -f deploy/examples/monitoring/rbac.yaml Hint This is updated automatically if you are upgrading via the helm chart Teardown \u00b6 To clean up all the artifacts created by the monitoring walk-through, copy/paste the entire block below (note that errors about resources \"not found\" can be ignored): 1 2 3 4 kubectl delete -f service-monitor.yaml kubectl delete -f prometheus.yaml kubectl delete -f prometheus-service.yaml kubectl delete -f https://raw.githubusercontent.com/coreos/prometheus-operator/v0.40.0/bundle.yaml Then the rest of the instructions in the Prometheus Operator docs can be followed to finish cleaning up. Special Cases \u00b6 Tectonic Bare Metal \u00b6 Tectonic strongly discourages the tectonic-system Prometheus instance to be used outside their intentions, so you need to create a new Prometheus Operator yourself. After this you only need to create the service monitor as stated above. CSI Liveness \u00b6 To integrate CSI liveness and grpc into ceph monitoring we will need to deploy a service and service monitor. 1 kubectl create -f csi-metrics-service-monitor.yaml This will create the service monitor to have prometheus monitor CSI Collecting RBD per-image IO statistics \u00b6 RBD per-image IO statistics collection is disabled by default. This can be enabled by setting enableRBDStats: true in the CephBlockPool spec. Prometheus does not need to be restarted after enabling it. Using custom label selectors in Prometheus \u00b6 If Prometheus needs to select specific resources, we can do so by injecting labels into these objects and using it as label selector. 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph [ ... ] spec : [ ... ] labels : monitoring : prometheus : k8s [ ... ] Horizontal Pod Scaling using Kubernetes Event-driven Autoscaling (KEDA) \u00b6 Using metrics exported from the Prometheus service, the horizontal pod scaling can use the custom metrics other than CPU and memory consumption. It can be done with help of Prometheus Scaler provided by the KEDA . See the KEDA deployment guide for details. Following is an example to autoscale RGW: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : keda.sh/v1alpha1 kind : ScaledObject metadata : name : rgw-scale namespace : rook-ceph spec : scaleTargetRef : kind : Deployment name : rook-ceph-rgw-my-store-a # deployment for the autoscaling minReplicaCount : 1 maxReplicaCount : 5 triggers : - type : prometheus metadata : serverAddress : http://rook-prometheus.rook-ceph.svc:9090 metricName : collecting_ceph_rgw_put query : | sum(rate(ceph_rgw_put[2m])) # prometheus query used for autoscaling threshold : \"90\" Warning During reconciliation of a CephObjectStore , the Rook Operator will reset the replica count for RGW which was set by horizontal pod scaler. The horizontal pod autoscaler will change the again once it re-evaluates the rule. This can result in a performance hiccup of several seconds after a reconciliation. This is briefly discussed (here)[ https://github.com/rook/rook/issues/10001 ]","title":"Prometheus Monitoring"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#prometheus-operator","text":"First the Prometheus operator needs to be started in the cluster so it can watch for our requests to start monitoring Rook and respond by deploying the correct Prometheus pods and configuration. A full explanation can be found in the Prometheus operator repository on GitHub , but the quick instructions can be found here: 1 kubectl apply -f https://raw.githubusercontent.com/coreos/prometheus-operator/v0.40.0/bundle.yaml This will start the Prometheus operator, but before moving on, wait until the operator is in the Running state: 1 kubectl get pod Once the Prometheus operator is in the Running state, proceed to the next section to create a Prometheus instance.","title":"Prometheus Operator"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#prometheus-instances","text":"With the Prometheus operator running, we can create service monitors that will watch the Rook cluster. There are two sources for metrics collection: Prometheus manager module: It is responsible for exposing all metrics other than ceph daemons performance counters. Ceph exporter: It is responsible for exposing only ceph daemons performance counters as prometheus metrics. From the root of your locally cloned Rook repo, go the monitoring directory: 1 2 $ git clone --single-branch --branch master https://github.com/rook/rook.git cd rook/deploy/examples/monitoring Create the service monitor as well as the Prometheus server pod and service: 1 2 3 4 kubectl create -f service-monitor.yaml kubectl create -f exporter-service-monitor.yaml kubectl create -f prometheus.yaml kubectl create -f prometheus-service.yaml Ensure that the Prometheus server pod gets created and advances to the Running state before moving on: 1 kubectl -n rook-ceph get pod prometheus-rook-prometheus-0 Note It is not recommended to consume storage from the Ceph cluster for Prometheus. If the Ceph cluster fails, Prometheus would become unresponsive and thus not alert you of the failure.","title":"Prometheus Instances"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#prometheus-web-console","text":"Once the Prometheus server is running, you can open a web browser and go to the URL that is output from this command: 1 echo \"http://$(kubectl -n rook-ceph -o jsonpath={.status.hostIP} get pod prometheus-rook-prometheus-0):30900\" You should now see the Prometheus monitoring website. Click on Graph in the top navigation bar. In the dropdown that says insert metric at cursor , select any metric you would like to see, for example ceph_cluster_total_used_bytes Click on the Execute button. Below the Execute button, ensure the Graph tab is selected and you should now see a graph of your chosen metric over time.","title":"Prometheus Web Console"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#prometheus-consoles","text":"You can find Prometheus Consoles for and from Ceph here: GitHub ceph/cephmetrics - dashboards/current directory . A guide to how you can write your own Prometheus consoles can be found on the official Prometheus site here: Prometheus.io Documentation - Console Templates .","title":"Prometheus Consoles"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#prometheus-alerts","text":"To enable the Ceph Prometheus alerts via the helm charts, set the following properties in values.yaml: rook-ceph chart: monitoring.enabled: true rook-ceph-cluster chart: monitoring.enabled: true monitoring.createPrometheusRules: true Alternatively, to enable the Ceph Prometheus alerts with example manifests follow these steps: Create the RBAC and prometheus rules: 1 2 kubectl create -f deploy/examples/monitoring/rbac.yaml kubectl create -f deploy/examples/monitoring/localrules.yaml Make following changes to your CephCluster object (e.g., cluster.yaml ). 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph [ ... ] spec : [ ... ] monitoring : enabled : true [ ... ] Deploy or update the CephCluster object. 1 kubectl apply -f cluster.yaml Note This expects the Prometheus Operator and a Prometheus instance to be pre-installed by the admin.","title":"Prometheus Alerts"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#customize-alerts","text":"The Prometheus alerts can be customized with a post-processor using tools such as Kustomize . For example, first extract the helm chart: 1 helm template -f values.yaml rook-release/rook-ceph-cluster > cluster-chart.yaml Now create the desired customization configuration files. This simple example will show how to update the severity of a rule, add a label to a rule, and change the for time value. Create a file named kustomization.yaml: 1 2 3 4 5 6 7 8 9 patches : - path : modifications.yaml target : group : monitoring.coreos.com kind : PrometheusRule name : prometheus-ceph-rules version : v1 resources : - cluster-chart.yaml Create a file named modifications.yaml 1 2 3 4 5 6 7 8 - op : add path : /spec/groups/0/rules/0/labels value : my-label : foo severity : none - op : add path : /spec/groups/0/rules/0/for value : 15m Finally, run kustomize to update the desired prometheus rules: 1 2 kustomize build . > updated-chart.yaml kubectl create -f updated-chart.yaml","title":"Customize Alerts"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#grafana-dashboards","text":"The dashboards have been created by @galexrt . For feedback on the dashboards please reach out to him on the Rook.io Slack . Note The dashboards are only compatible with Grafana 7.2.0 or higher. Also note that the dashboards are updated from time to time, to fix issues and improve them. The following Grafana dashboards are available: Ceph - Cluster Ceph - OSD (Single) Ceph - Pools","title":"Grafana Dashboards"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#updates-and-upgrades","text":"When updating Rook, there may be updates to RBAC for monitoring. It is easy to apply the changes with each update or upgrade. This should be done at the same time you update Rook common resources like common.yaml . 1 kubectl apply -f deploy/examples/monitoring/rbac.yaml Hint This is updated automatically if you are upgrading via the helm chart","title":"Updates and Upgrades"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#teardown","text":"To clean up all the artifacts created by the monitoring walk-through, copy/paste the entire block below (note that errors about resources \"not found\" can be ignored): 1 2 3 4 kubectl delete -f service-monitor.yaml kubectl delete -f prometheus.yaml kubectl delete -f prometheus-service.yaml kubectl delete -f https://raw.githubusercontent.com/coreos/prometheus-operator/v0.40.0/bundle.yaml Then the rest of the instructions in the Prometheus Operator docs can be followed to finish cleaning up.","title":"Teardown"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#special-cases","text":"","title":"Special Cases"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#tectonic-bare-metal","text":"Tectonic strongly discourages the tectonic-system Prometheus instance to be used outside their intentions, so you need to create a new Prometheus Operator yourself. After this you only need to create the service monitor as stated above.","title":"Tectonic Bare Metal"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#csi-liveness","text":"To integrate CSI liveness and grpc into ceph monitoring we will need to deploy a service and service monitor. 1 kubectl create -f csi-metrics-service-monitor.yaml This will create the service monitor to have prometheus monitor CSI","title":"CSI Liveness"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#collecting-rbd-per-image-io-statistics","text":"RBD per-image IO statistics collection is disabled by default. This can be enabled by setting enableRBDStats: true in the CephBlockPool spec. Prometheus does not need to be restarted after enabling it.","title":"Collecting RBD per-image IO statistics"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#using-custom-label-selectors-in-prometheus","text":"If Prometheus needs to select specific resources, we can do so by injecting labels into these objects and using it as label selector. 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph [ ... ] spec : [ ... ] labels : monitoring : prometheus : k8s [ ... ]","title":"Using custom label selectors in Prometheus"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#horizontal-pod-scaling-using-kubernetes-event-driven-autoscaling-keda","text":"Using metrics exported from the Prometheus service, the horizontal pod scaling can use the custom metrics other than CPU and memory consumption. It can be done with help of Prometheus Scaler provided by the KEDA . See the KEDA deployment guide for details. Following is an example to autoscale RGW: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : keda.sh/v1alpha1 kind : ScaledObject metadata : name : rgw-scale namespace : rook-ceph spec : scaleTargetRef : kind : Deployment name : rook-ceph-rgw-my-store-a # deployment for the autoscaling minReplicaCount : 1 maxReplicaCount : 5 triggers : - type : prometheus metadata : serverAddress : http://rook-prometheus.rook-ceph.svc:9090 metricName : collecting_ceph_rgw_put query : | sum(rate(ceph_rgw_put[2m])) # prometheus query used for autoscaling threshold : \"90\" Warning During reconciliation of a CephObjectStore , the Rook Operator will reset the replica count for RGW which was set by horizontal pod scaler. The horizontal pod autoscaler will change the again once it re-evaluates the rule. This can result in a performance hiccup of several seconds after a reconciliation. This is briefly discussed (here)[ https://github.com/rook/rook/issues/10001 ]","title":"Horizontal Pod Scaling using Kubernetes Event-driven Autoscaling (KEDA)"},{"location":"Storage-Configuration/NFS/nfs-advanced/","text":"All CephNFS daemons are configured using shared RADOS objects stored in a Ceph pool named .nfs . Users can modify the configuration object for each CephNFS cluster if they wish to customize the configuration. Changing configuration of the .nfs pool \u00b6 By default, Rook creates the .nfs pool with Ceph's default configuration. If you wish to change the configuration of this pool (for example to change its failure domain or replication factor), you can create a CephBlockPool with the spec.name field set to .nfs . This pool must be replicated and cannot be erasure coded. deploy/examples/nfs.yaml contains a sample for reference. Adding custom NFS-Ganesha config file changes \u00b6 Ceph uses NFS-Ganesha servers. The config file format for these objects is documented in the NFS-Ganesha project . Use Ceph's rados tool from the toolbox to interact with the configuration object. The below command will get you started by dumping the contents of the config object to stdout. The output will look something like the example shown if you have already created two exports as documented above. It is best not to modify any of the export objects created by Ceph so as not to cause errors with Ceph's export management. 1 2 3 $ rados --pool  --namespace  get conf-nfs. - % url \"rados:////export-1\" % url \"rados:////export-2\" rados ls and rados put are other commands you will want to work with the other shared configuration objects. Of note, it is possible to pre-populate the NFS configuration and export objects prior to creating CephNFS server clusters. Creating NFS export over RGW \u00b6 Warning RGW NFS export is experimental for the moment. It is not recommended for scenario of modifying existing content. For creating an NFS export over RGW(CephObjectStore) storage backend, the below command can be used. This creates an export for the /testrgw pseudo path on an existing bucket bkt4exp as an example. You could use /testrgw pseudo for nfs mount operation afterwards. 1 ceph nfs export create rgw my-nfs /testrgw bkt4exp","title":"Advanced configuration"},{"location":"Storage-Configuration/NFS/nfs-advanced/#changing-configuration-of-the-nfs-pool","text":"By default, Rook creates the .nfs pool with Ceph's default configuration. If you wish to change the configuration of this pool (for example to change its failure domain or replication factor), you can create a CephBlockPool with the spec.name field set to .nfs . This pool must be replicated and cannot be erasure coded. deploy/examples/nfs.yaml contains a sample for reference.","title":"Changing configuration of the .nfs pool"},{"location":"Storage-Configuration/NFS/nfs-advanced/#adding-custom-nfs-ganesha-config-file-changes","text":"Ceph uses NFS-Ganesha servers. The config file format for these objects is documented in the NFS-Ganesha project . Use Ceph's rados tool from the toolbox to interact with the configuration object. The below command will get you started by dumping the contents of the config object to stdout. The output will look something like the example shown if you have already created two exports as documented above. It is best not to modify any of the export objects created by Ceph so as not to cause errors with Ceph's export management. 1 2 3 $ rados --pool  --namespace  get conf-nfs. - % url \"rados:////export-1\" % url \"rados:////export-2\" rados ls and rados put are other commands you will want to work with the other shared configuration objects. Of note, it is possible to pre-populate the NFS configuration and export objects prior to creating CephNFS server clusters.","title":"Adding custom NFS-Ganesha config file changes"},{"location":"Storage-Configuration/NFS/nfs-advanced/#creating-nfs-export-over-rgw","text":"Warning RGW NFS export is experimental for the moment. It is not recommended for scenario of modifying existing content. For creating an NFS export over RGW(CephObjectStore) storage backend, the below command can be used. This creates an export for the /testrgw pseudo path on an existing bucket bkt4exp as an example. You could use /testrgw pseudo for nfs mount operation afterwards. 1 ceph nfs export create rgw my-nfs /testrgw bkt4exp","title":"Creating NFS export over RGW"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/","text":"Attention This feature is experimental and will not support upgrades to future versions. For this section, we will refer to Rook's deployment examples in the deploy/examples directory. Enabling the CSI drivers \u00b6 The Ceph CSI NFS provisioner and driver require additional RBAC to operate. Apply the deploy/examples/csi/nfs/rbac.yaml manifest to deploy the additional resources. Rook will only deploy the Ceph CSI NFS provisioner and driver components when the ROOK_CSI_ENABLE_NFS config is set to \"true\" in the rook-ceph-operator-config configmap. Change the value in your manifest, or patch the resource as below. 1 kubectl --namespace rook-ceph patch configmap rook-ceph-operator-config --type merge --patch '{\"data\":{\"ROOK_CSI_ENABLE_NFS\": \"true\"}}' Note The rook-ceph operator Helm chart will deploy the required RBAC and enable the driver components if csi.nfs.enabled is set to true . Creating NFS exports via PVC \u00b6 Prerequisites \u00b6 In order to create NFS exports via the CSI driver, you must first create a CephFilesystem to serve as the underlying storage for the exports, and you must create a CephNFS to run an NFS server that will expose the exports. RGWs cannot be used for the CSI driver. From the examples, filesystem.yaml creates a CephFilesystem called myfs , and nfs.yaml creates an NFS server called my-nfs . You may need to enable or disable the Ceph orchestrator. Follow the same steps documented above based on your Ceph version and desires. You must also create a storage class. Ceph CSI is designed to support any arbitrary Ceph cluster, but we are focused here only on Ceph clusters deployed by Rook. Let's take a look at a portion of the example storage class found at deploy/examples/csi/nfs/storageclass.yaml and break down how the values are determined. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-nfs provisioner : rook-ceph.nfs.csi.ceph.com # [1] parameters : nfsCluster : my-nfs # [2] server : rook-ceph-nfs-my-nfs-a # [3] clusterID : rook-ceph # [4] fsName : myfs # [5] pool : myfs-replicated # [6] # [7] (entire csi.storage.k8s.io/* section immediately below) csi.storage.k8s.io/provisioner-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-cephfs-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph # ... some fields omitted ... provisioner : rook-ceph .nfs.csi.ceph.com because rook-ceph is the namespace where the CephCluster is installed nfsCluster : my-nfs because this is the name of the CephNFS server : rook-ceph-nfs- my-nfs -a because Rook creates this Kubernetes Service for the CephNFS named my-nfs clusterID : rook-ceph because this is the namespace where the CephCluster is installed fsName : myfs because this is the name of the CephFilesystem used to back the NFS exports pool : myfs - replicated because myfs is the name of the CephFilesystem defined in fsName and because replicated is the name of a data pool defined in the CephFilesystem csi.storage.k8s.io/* : note that these values are shared with the Ceph CSI CephFS provisioner Creating a PVC \u00b6 See deploy/examples/csi/nfs/pvc.yaml for an example of how to create a PVC that will create an NFS export. The export will be created and a PV created for the PVC immediately, even without a Pod to mount the PVC. Attaching an export to a pod \u00b6 See deploy/examples/csi/nfs/pod.yaml for an example of how a PVC can be connected to an application pod. Connecting to an export directly \u00b6 After a PVC is created successfully, the share parameter set on the resulting PV contains the share path which can be used as the export path when mounting the export manually . In the example below /0001-0009-rook-ceph-0000000000000001-55c910f9-a1af-11ed-9772-1a471870b2f5 is the export path. 1 2 $ kubectl get pv pvc-b559f225-de79-451b-a327-3dbec1f95a1c -o jsonpath = '{.spec.csi.volumeAttributes}' /0001-0009-rook-ceph-0000000000000001-55c910f9-a1af-11ed-9772-1a471870b2f5 Taking snapshots of NFS exports \u00b6 NFS export PVCs can be snapshotted and later restored to new PVCs. Creating snapshots \u00b6 First, create a VolumeSnapshotClass as in the example here . The csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the cephfsplugin here . 1 kubectl create -f deploy/examples/csi/nfs/snapshotclass.yaml In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the NFS CSI driver. 1 kubectl create -f deploy/examples/csi/nfs/snapshot.yaml Verifying snapshots \u00b6 1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-nfslugin-snapclass rook-ceph.nfs.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE nfs-pvc-snapshot true nfs-pvc 1Gi csi-nfsplugin-snapclass snapcontent-34476204-a14a-4d59-bfbc-2bbba695652c 3h50m 3h51m The snapshot will be ready to restore to a new PVC when READYTOUSE field of the volumesnapshot is set to true. Restoring snapshot to a new PVC \u00b6 In pvc-restore , dataSource name should be the name of the VolumeSnapshot previously created. The dataSource kind should be \"VolumeSnapshot\". Create a new PVC from the snapshot. 1 kubectl create -f deploy/examples/csi/nfs/pvc-restore.yaml Verifying restored PVC Creation \u00b6 1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE nfs-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi RWX rook-nfs 55m nfs-pvc-restore Bound pvc-95308c75-6c93-4928-a551-6b5137192209 1Gi RWX rook-nfs 34s Cleaning up snapshot resource \u00b6 To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/nfs/pvc-restore.yaml kubectl delete -f deploy/examples/csi/nfs/snapshot.yaml kubectl delete -f deploy/examples/csi/nfs/snapshotclass.yaml Cloning NFS exports \u00b6 Creating clones \u00b6 In pvc-clone , dataSource should be the name of the PVC which is already created by NFS CSI driver. The dataSource kind should be \"PersistentVolumeClaim\" and also storageclass should be same as the source PVC. Create a new PVC Clone from the PVC as in the example here . 1 kubectl create -f deploy/examples/csi/nfs/pvc-clone.yaml Verifying a cloned PVC \u00b6 1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE nfs-pvc Bound pvc-1ea51547-a88b-4ab0-8b4a-812caeaf025d 1Gi RWX rook-nfs 39m nfs-pvc-clone Bound pvc-b575bc35-d521-4c41-b4f9-1d733cd28fdf 1Gi RWX rook-nfs 8s Cleaning up clone resources \u00b6 To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/nfs/pvc-clone.yaml","title":"CSI provisioner and driver"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#enabling-the-csi-drivers","text":"The Ceph CSI NFS provisioner and driver require additional RBAC to operate. Apply the deploy/examples/csi/nfs/rbac.yaml manifest to deploy the additional resources. Rook will only deploy the Ceph CSI NFS provisioner and driver components when the ROOK_CSI_ENABLE_NFS config is set to \"true\" in the rook-ceph-operator-config configmap. Change the value in your manifest, or patch the resource as below. 1 kubectl --namespace rook-ceph patch configmap rook-ceph-operator-config --type merge --patch '{\"data\":{\"ROOK_CSI_ENABLE_NFS\": \"true\"}}' Note The rook-ceph operator Helm chart will deploy the required RBAC and enable the driver components if csi.nfs.enabled is set to true .","title":"Enabling the CSI drivers"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#creating-nfs-exports-via-pvc","text":"","title":"Creating NFS exports via PVC"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#prerequisites","text":"In order to create NFS exports via the CSI driver, you must first create a CephFilesystem to serve as the underlying storage for the exports, and you must create a CephNFS to run an NFS server that will expose the exports. RGWs cannot be used for the CSI driver. From the examples, filesystem.yaml creates a CephFilesystem called myfs , and nfs.yaml creates an NFS server called my-nfs . You may need to enable or disable the Ceph orchestrator. Follow the same steps documented above based on your Ceph version and desires. You must also create a storage class. Ceph CSI is designed to support any arbitrary Ceph cluster, but we are focused here only on Ceph clusters deployed by Rook. Let's take a look at a portion of the example storage class found at deploy/examples/csi/nfs/storageclass.yaml and break down how the values are determined. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-nfs provisioner : rook-ceph.nfs.csi.ceph.com # [1] parameters : nfsCluster : my-nfs # [2] server : rook-ceph-nfs-my-nfs-a # [3] clusterID : rook-ceph # [4] fsName : myfs # [5] pool : myfs-replicated # [6] # [7] (entire csi.storage.k8s.io/* section immediately below) csi.storage.k8s.io/provisioner-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-cephfs-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph # ... some fields omitted ... provisioner : rook-ceph .nfs.csi.ceph.com because rook-ceph is the namespace where the CephCluster is installed nfsCluster : my-nfs because this is the name of the CephNFS server : rook-ceph-nfs- my-nfs -a because Rook creates this Kubernetes Service for the CephNFS named my-nfs clusterID : rook-ceph because this is the namespace where the CephCluster is installed fsName : myfs because this is the name of the CephFilesystem used to back the NFS exports pool : myfs - replicated because myfs is the name of the CephFilesystem defined in fsName and because replicated is the name of a data pool defined in the CephFilesystem csi.storage.k8s.io/* : note that these values are shared with the Ceph CSI CephFS provisioner","title":"Prerequisites"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#creating-a-pvc","text":"See deploy/examples/csi/nfs/pvc.yaml for an example of how to create a PVC that will create an NFS export. The export will be created and a PV created for the PVC immediately, even without a Pod to mount the PVC.","title":"Creating a PVC"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#attaching-an-export-to-a-pod","text":"See deploy/examples/csi/nfs/pod.yaml for an example of how a PVC can be connected to an application pod.","title":"Attaching an export to a pod"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#connecting-to-an-export-directly","text":"After a PVC is created successfully, the share parameter set on the resulting PV contains the share path which can be used as the export path when mounting the export manually . In the example below /0001-0009-rook-ceph-0000000000000001-55c910f9-a1af-11ed-9772-1a471870b2f5 is the export path. 1 2 $ kubectl get pv pvc-b559f225-de79-451b-a327-3dbec1f95a1c -o jsonpath = '{.spec.csi.volumeAttributes}' /0001-0009-rook-ceph-0000000000000001-55c910f9-a1af-11ed-9772-1a471870b2f5","title":"Connecting to an export directly"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#taking-snapshots-of-nfs-exports","text":"NFS export PVCs can be snapshotted and later restored to new PVCs.","title":"Taking snapshots of NFS exports"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#creating-snapshots","text":"First, create a VolumeSnapshotClass as in the example here . The csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the cephfsplugin here . 1 kubectl create -f deploy/examples/csi/nfs/snapshotclass.yaml In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the NFS CSI driver. 1 kubectl create -f deploy/examples/csi/nfs/snapshot.yaml","title":"Creating snapshots"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#verifying-snapshots","text":"1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-nfslugin-snapclass rook-ceph.nfs.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE nfs-pvc-snapshot true nfs-pvc 1Gi csi-nfsplugin-snapclass snapcontent-34476204-a14a-4d59-bfbc-2bbba695652c 3h50m 3h51m The snapshot will be ready to restore to a new PVC when READYTOUSE field of the volumesnapshot is set to true.","title":"Verifying snapshots"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#restoring-snapshot-to-a-new-pvc","text":"In pvc-restore , dataSource name should be the name of the VolumeSnapshot previously created. The dataSource kind should be \"VolumeSnapshot\". Create a new PVC from the snapshot. 1 kubectl create -f deploy/examples/csi/nfs/pvc-restore.yaml","title":"Restoring snapshot to a new PVC"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#verifying-restored-pvc-creation","text":"1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE nfs-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi RWX rook-nfs 55m nfs-pvc-restore Bound pvc-95308c75-6c93-4928-a551-6b5137192209 1Gi RWX rook-nfs 34s","title":"Verifying restored PVC Creation"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#cleaning-up-snapshot-resource","text":"To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/nfs/pvc-restore.yaml kubectl delete -f deploy/examples/csi/nfs/snapshot.yaml kubectl delete -f deploy/examples/csi/nfs/snapshotclass.yaml","title":"Cleaning up snapshot resource"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#cloning-nfs-exports","text":"","title":"Cloning NFS exports"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#creating-clones","text":"In pvc-clone , dataSource should be the name of the PVC which is already created by NFS CSI driver. The dataSource kind should be \"PersistentVolumeClaim\" and also storageclass should be same as the source PVC. Create a new PVC Clone from the PVC as in the example here . 1 kubectl create -f deploy/examples/csi/nfs/pvc-clone.yaml","title":"Creating clones"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#verifying-a-cloned-pvc","text":"1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE nfs-pvc Bound pvc-1ea51547-a88b-4ab0-8b4a-812caeaf025d 1Gi RWX rook-nfs 39m nfs-pvc-clone Bound pvc-b575bc35-d521-4c41-b4f9-1d733cd28fdf 1Gi RWX rook-nfs 8s","title":"Verifying a cloned PVC"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#cleaning-up-clone-resources","text":"To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/nfs/pvc-clone.yaml","title":"Cleaning up clone resources"},{"location":"Storage-Configuration/NFS/nfs-security/","text":"Rook provides security for CephNFS server clusters through two high-level features: user ID mapping and user authentication . Attention All features in this document are experimental and may not support upgrades to future versions. Attention Some configurations of these features may break the ability to mount NFS storage to pods via PVCs . The NFS CSI driver may not be able to mount exports for pods when ID mapping is configured. User ID mapping \u00b6 User ID mapping allows the NFS server to map connected NFS client IDs to a different user domain, allowing NFS clients to be associated with a particular user in your organization. For example, users stored in LDAP can be associated with NFS users and vice versa. ID mapping via SSSD \u00b6 SSSD is the System Security Services Daemon. It can be used to provide user ID mapping from a number of sources including LDAP, Active Directory, and FreeIPA. Currently, only LDAP has been tested. Attention The Ceph container image must have the sssd-client package installed to support SSSD. This package is included in quay.io/ceph/ceph in v17.2.4 and newer. For older Ceph versions you may build your own Ceph image which adds RUN yum install sssd-client && yum clean all . SSSD configuration \u00b6 SSSD requires a configuration file in order to configure its connection to the user ID mapping system (e.g., LDAP). The file follows the sssd.conf format documented in its man pages . Methods of providing the configuration file are documented in the NFS CRD security section . Recommendations: - The SSSD sidecar only requires the namespace switch (a.k.a. \"nsswitch\" or \"nss\"). We recommend enabling only the nss service to lower CPU usage. - NFS-Ganesha does not require user enumeration. We recommend leaving this option unset or setting enumerate = false to speed up lookups and reduce RAM usage. - NFS exports created via documented methods do not require listing all members of groups. We recommend setting ignore_group_members = true to speed up LDAP lookups. Only customized exports that set manage_gids need to consider this option. A sample sssd.conf file is shown below. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 [sssd] # Only the nss service is required for the SSSD sidecar. services = nss domains = default config_file_version = 2 [nss] filter_users = root [domain/default] id_provider = ldap ldap_uri = ldap://server-address.example.net ldap_search_base = dc=example,dc=net ldap_default_bind_dn = cn=admin,dc=example,dc=net ldap_default_authtok_type = password ldap_default_authtok = my-password ldap_user_search_base = ou=users,dc=example,dc=net ldap_group_search_base = ou=groups,dc=example,dc=net ldap_access_filter = memberOf=cn=rook,ou=groups,dc=example,dc=net # recommended options for speeding up LDAP lookups: enumerate = false ignore_group_members = true The SSSD configuration file may be omitted from the CephNFS spec if desired. In this case, Rook will not set /etc/sssd/sssd.conf in any way. This allows you to manage the sssd.conf file yourself however you wish. For example, you may build it into your custom Ceph container image, or use the Vault agent injector to securely add the file via annotations on the CephNFS spec (passed to the NFS server pods). User authentication \u00b6 User authentication allows NFS clients and the Rook CephNFS servers to authenticate with each other to ensure security. Authentication through Kerberos \u00b6 Kerberos is the authentication mechanism natively supported by NFS-Ganesha. With NFSv4, individual users are authenticated and not merely client machines. Kerberos configuration \u00b6 Kerberos authentication requires configuration files in order for the NFS-Ganesha server to authenticate to the Kerberos server (KDC). The requirements are two-parted: 1. one or more kerberos configuration files that configures the connection to the Kerberos server. This file follows the krb5.conf format documented in its man pages . 2. a keytab file that provides credentials for the service principal that NFS-Ganesha will use to authenticate with the Kerberos server. 3. a kerberos domain name which will be used to map kerberos credentials to uid/gid domain name that NFS-Ganesha will use to authenticate with the Methods of providing the configuration files are documented in the NFS CRD security section . Recommendations: - Rook configures Kerberos to log to stderr. We suggest removing logging sections from config files to avoid consuming unnecessary disk space from logging to files. A sample Kerberos config file is shown below. 1 2 3 4 5 6 7 8 9 10 11 12 [libdefaults] default_realm = EXAMPLE.NET [realms] EXAMPLE.NET = { kdc = kdc.example.net:88 admin_server = kdc.example.net:749 } [domain_realm] .example.net = EXAMPLE.NET example.net = EXAMPLE.NET The Kerberos config files ( configFiles ) may be omitted from the Ceph NFS spec if desired. In this case, Rook will not add any config files to /etc/krb5.conf.rook/ , but it will still configure Kerberos to load any config files it finds there. This allows you to manage these files yourself however you wish. Similarly, the keytab file ( keytabFile ) may be omitted from the CephNFS spec if desired. In this case, Rook will not set /etc/krb5.keytab in any way. This allows you to manage the krb5.keytab file yourself however you wish. As an example for either of the above cases, you may build files into your custom Ceph container image or use the Vault agent injector to securely add files via annotations on the CephNFS spec (passed to the NFS server pods). NFS service principals \u00b6 The Kerberos service principal used by Rook's CephNFS servers to authenticate with the Kerberos server is built up from 3 components: 1. the configured from spec.security.kerberos.principalName that acts as the service name 2. the hostname of the server on which NFS-Ganesha is running which is in turn built up from the namespace and name of the CephNFS resource, joined by a hyphen. e.g., rooknamespace-nfsname 3. the realm as configured by the kerberos config file(s) from spec.security.kerberos.configFiles The full service principal name is constructed as /-@ . For ease of scaling up or down CephNFS clusters, this principal is used for all servers in the CephNFS cluster. Users must add this service principal to their Kerberos server configuration. Example For a CephNFS named \"fileshare\" in the \"business-unit\" Kubernetes namespace that has a principalName of \"sales-apac\" and where the Kerberos realm is \"EXAMPLE.NET\", the full principal name will be sales-apac/business-unit-fileshare@EXAMPLE.NET . Advanced spec.security.kerberos.principalName corresponds directly to NFS-Ganesha's NFS_KRB5:PrincipalName config. See the NFS-Ganesha wiki for more details. Kerberos domain name \u00b6 The kerberos domain name is used to setup the domain name in /etc/idmapd.conf. This domain name is used by idmap to map the kerberos credential to the user uid/gid. Without this configured, NFS-Ganesha will be unable to map the Kerberos principal to an uid/gid and will instead use the configured anonuid/anongid (default: -2) when accessing the local filesystem.","title":"Security"},{"location":"Storage-Configuration/NFS/nfs-security/#user-id-mapping","text":"User ID mapping allows the NFS server to map connected NFS client IDs to a different user domain, allowing NFS clients to be associated with a particular user in your organization. For example, users stored in LDAP can be associated with NFS users and vice versa.","title":"User ID mapping"},{"location":"Storage-Configuration/NFS/nfs-security/#id-mapping-via-sssd","text":"SSSD is the System Security Services Daemon. It can be used to provide user ID mapping from a number of sources including LDAP, Active Directory, and FreeIPA. Currently, only LDAP has been tested. Attention The Ceph container image must have the sssd-client package installed to support SSSD. This package is included in quay.io/ceph/ceph in v17.2.4 and newer. For older Ceph versions you may build your own Ceph image which adds RUN yum install sssd-client && yum clean all .","title":"ID mapping via SSSD"},{"location":"Storage-Configuration/NFS/nfs-security/#sssd-configuration","text":"SSSD requires a configuration file in order to configure its connection to the user ID mapping system (e.g., LDAP). The file follows the sssd.conf format documented in its man pages . Methods of providing the configuration file are documented in the NFS CRD security section . Recommendations: - The SSSD sidecar only requires the namespace switch (a.k.a. \"nsswitch\" or \"nss\"). We recommend enabling only the nss service to lower CPU usage. - NFS-Ganesha does not require user enumeration. We recommend leaving this option unset or setting enumerate = false to speed up lookups and reduce RAM usage. - NFS exports created via documented methods do not require listing all members of groups. We recommend setting ignore_group_members = true to speed up LDAP lookups. Only customized exports that set manage_gids need to consider this option. A sample sssd.conf file is shown below. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 [sssd] # Only the nss service is required for the SSSD sidecar. services = nss domains = default config_file_version = 2 [nss] filter_users = root [domain/default] id_provider = ldap ldap_uri = ldap://server-address.example.net ldap_search_base = dc=example,dc=net ldap_default_bind_dn = cn=admin,dc=example,dc=net ldap_default_authtok_type = password ldap_default_authtok = my-password ldap_user_search_base = ou=users,dc=example,dc=net ldap_group_search_base = ou=groups,dc=example,dc=net ldap_access_filter = memberOf=cn=rook,ou=groups,dc=example,dc=net # recommended options for speeding up LDAP lookups: enumerate = false ignore_group_members = true The SSSD configuration file may be omitted from the CephNFS spec if desired. In this case, Rook will not set /etc/sssd/sssd.conf in any way. This allows you to manage the sssd.conf file yourself however you wish. For example, you may build it into your custom Ceph container image, or use the Vault agent injector to securely add the file via annotations on the CephNFS spec (passed to the NFS server pods).","title":"SSSD configuration"},{"location":"Storage-Configuration/NFS/nfs-security/#user-authentication","text":"User authentication allows NFS clients and the Rook CephNFS servers to authenticate with each other to ensure security.","title":"User authentication"},{"location":"Storage-Configuration/NFS/nfs-security/#authentication-through-kerberos","text":"Kerberos is the authentication mechanism natively supported by NFS-Ganesha. With NFSv4, individual users are authenticated and not merely client machines.","title":"Authentication through Kerberos"},{"location":"Storage-Configuration/NFS/nfs-security/#kerberos-configuration","text":"Kerberos authentication requires configuration files in order for the NFS-Ganesha server to authenticate to the Kerberos server (KDC). The requirements are two-parted: 1. one or more kerberos configuration files that configures the connection to the Kerberos server. This file follows the krb5.conf format documented in its man pages . 2. a keytab file that provides credentials for the service principal that NFS-Ganesha will use to authenticate with the Kerberos server. 3. a kerberos domain name which will be used to map kerberos credentials to uid/gid domain name that NFS-Ganesha will use to authenticate with the Methods of providing the configuration files are documented in the NFS CRD security section . Recommendations: - Rook configures Kerberos to log to stderr. We suggest removing logging sections from config files to avoid consuming unnecessary disk space from logging to files. A sample Kerberos config file is shown below. 1 2 3 4 5 6 7 8 9 10 11 12 [libdefaults] default_realm = EXAMPLE.NET [realms] EXAMPLE.NET = { kdc = kdc.example.net:88 admin_server = kdc.example.net:749 } [domain_realm] .example.net = EXAMPLE.NET example.net = EXAMPLE.NET The Kerberos config files ( configFiles ) may be omitted from the Ceph NFS spec if desired. In this case, Rook will not add any config files to /etc/krb5.conf.rook/ , but it will still configure Kerberos to load any config files it finds there. This allows you to manage these files yourself however you wish. Similarly, the keytab file ( keytabFile ) may be omitted from the CephNFS spec if desired. In this case, Rook will not set /etc/krb5.keytab in any way. This allows you to manage the krb5.keytab file yourself however you wish. As an example for either of the above cases, you may build files into your custom Ceph container image or use the Vault agent injector to securely add files via annotations on the CephNFS spec (passed to the NFS server pods).","title":"Kerberos configuration"},{"location":"Storage-Configuration/NFS/nfs-security/#nfs-service-principals","text":"The Kerberos service principal used by Rook's CephNFS servers to authenticate with the Kerberos server is built up from 3 components: 1. the configured from spec.security.kerberos.principalName that acts as the service name 2. the hostname of the server on which NFS-Ganesha is running which is in turn built up from the namespace and name of the CephNFS resource, joined by a hyphen. e.g., rooknamespace-nfsname 3. the realm as configured by the kerberos config file(s) from spec.security.kerberos.configFiles The full service principal name is constructed as /-@ . For ease of scaling up or down CephNFS clusters, this principal is used for all servers in the CephNFS cluster. Users must add this service principal to their Kerberos server configuration. Example For a CephNFS named \"fileshare\" in the \"business-unit\" Kubernetes namespace that has a principalName of \"sales-apac\" and where the Kerberos realm is \"EXAMPLE.NET\", the full principal name will be sales-apac/business-unit-fileshare@EXAMPLE.NET . Advanced spec.security.kerberos.principalName corresponds directly to NFS-Ganesha's NFS_KRB5:PrincipalName config. See the NFS-Ganesha wiki for more details.","title":"NFS service principals"},{"location":"Storage-Configuration/NFS/nfs-security/#kerberos-domain-name","text":"The kerberos domain name is used to setup the domain name in /etc/idmapd.conf. This domain name is used by idmap to map the kerberos credential to the user uid/gid. Without this configured, NFS-Ganesha will be unable to map the Kerberos principal to an uid/gid and will instead use the configured anonuid/anongid (default: -2) when accessing the local filesystem.","title":"Kerberos domain name"},{"location":"Storage-Configuration/NFS/nfs/","text":"NFS storage can be mounted with read/write permission from multiple pods. NFS storage may be especially useful for leveraging an existing Rook cluster to provide NFS storage for legacy applications that assume an NFS client connection. Such applications may not have been migrated to Kubernetes or might not yet support PVCs. Rook NFS storage can provide access to the same network filesystem storage from within the Kubernetes cluster via PVC while simultaneously providing access via direct client connection from within or outside of the Kubernetes cluster. Warning Simultaneous access to NFS storage from Pods and from from external clients complicates NFS user ID mapping significantly. Client IDs mapped from external clients will not be the same as the IDs associated with the NFS CSI driver, which mount exports for Kubernetes pods. Warning Due to a number of Ceph issues and changes, Rook officially only supports Ceph v16.2.7 or higher for CephNFS. If you are using an earlier version, upgrade your Ceph version following the advice given in Rook's v1.9 NFS docs . Note CephNFSes support NFSv4.1+ access only. Serving earlier protocols inhibits responsiveness after a server restart. Prerequisites \u00b6 This guide assumes you have created a Rook cluster as explained in the main quickstart guide as well as a Ceph filesystem which will act as the backing storage for NFS. Many samples reference the CephNFS and CephFilesystem example manifests here and here . Creating an NFS cluster \u00b6 Create the NFS cluster by specifying the desired settings documented for the NFS CRD . Creating Exports \u00b6 When a CephNFS is first created, all NFS daemons within the CephNFS cluster will share a configuration with no exports defined. When creating an export, it is necessary to specify the CephFilesystem which will act as the backing storage for the NFS export. RADOS Gateways (RGWs), provided by CephObjectStores , can also be used as backing storage for NFS exports if desired. Using the Ceph Dashboard \u00b6 Exports can be created via the Ceph dashboard as well. To enable and use the Ceph dashboard in Rook, see here . Using the Ceph CLI \u00b6 The Ceph CLI can be used from the Rook toolbox pod to create and manage NFS exports. To do so, first ensure the necessary Ceph mgr modules are enabled, if necessary, and that the Ceph orchestrator backend is set to Rook. Enable the Ceph orchestrator if necessary \u00b6 Required for Ceph v16.2.7 and below Optional for Ceph v16.2.8 and above Must be disabled for Ceph v17.2.1 due to a Ceph regression 1 2 3 ceph mgr module enable rook ceph mgr module enable nfs ceph orch set backend rook Ceph's NFS CLI can create NFS exports that are backed by CephFS (a CephFilesystem) or Ceph Object Gateway (a CephObjectStore). cluster_id or cluster-name in the Ceph NFS docs normally refers to the name of the NFS cluster, which is the CephNFS name in the Rook context. For creating an NFS export for the CephNFS and CephFilesystem example manifests, the below command can be used. This creates an export for the /test pseudo path. 1 ceph nfs export create cephfs my-nfs /test myfs The below command will list the current NFS exports for the example CephNFS cluster, which will give the output shown for the current example. 1 2 3 4 $ ceph nfs export ls my-nfs [ \"/test\" ] The simple /test export's info can be listed as well. Notice from the example that only NFS protocol v4 via TCP is supported. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 $ ceph nfs export info my-nfs /test { \"export_id\": 1, \"path\": \"/\", \"cluster_id\": \"my-nfs\", \"pseudo\": \"/test\", \"access_type\": \"RW\", \"squash\": \"none\", \"security_label\": true, \"protocols\": [ 4 ], \"transports\": [ \"TCP\" ], \"fsal\": { \"name\": \"CEPH\", \"user_id\": \"nfs.my-nfs.1\", \"fs_name\": \"myfs\" }, \"clients\": [] } If you are done managing NFS exports and don't need the Ceph orchestrator module enabled for anything else, it may be preferable to disable the Rook and NFS mgr modules to free up a small amount of RAM in the Ceph mgr Pod. 1 2 ceph orch set backend \"\" ceph mgr module disable rook Mounting exports \u00b6 Each CephNFS server has a unique Kubernetes Service. This is because NFS clients can't readily handle NFS failover. CephNFS services are named with the pattern rook-ceph-nfs--  is a unique letter ID (e.g., a, b, c, etc.) for a given NFS server. For example, rook-ceph-nfs-my-nfs-a . For each NFS client, choose an NFS service to use for the connection. With NFS v4, you can mount an export by its path using a mount command like below. You can mount all exports at once by omitting the export path and leaving the directory as just / . 1 mount -t nfs4 -o proto=tcp :/  Exposing the NFS server outside of the Kubernetes cluster \u00b6 Use a LoadBalancer Service to expose an NFS server (and its exports) outside of the Kubernetes cluster. The Service's endpoint can be used as the NFS service address when mounting the export manually . We provide an example Service here: deploy/examples/nfs-load-balancer.yaml . NFS Security \u00b6 Security options for NFS are documented here . Ceph CSI NFS provisioner and NFS CSI driver \u00b6 The NFS CSI provisioner and driver are documented here Advanced configuration \u00b6 Advanced NFS configuration is documented here Known issues \u00b6 Known issues are documented on the NFS CRD page .","title":"NFS Storage Overview"},{"location":"Storage-Configuration/NFS/nfs/#prerequisites","text":"This guide assumes you have created a Rook cluster as explained in the main quickstart guide as well as a Ceph filesystem which will act as the backing storage for NFS. Many samples reference the CephNFS and CephFilesystem example manifests here and here .","title":"Prerequisites"},{"location":"Storage-Configuration/NFS/nfs/#creating-an-nfs-cluster","text":"Create the NFS cluster by specifying the desired settings documented for the NFS CRD .","title":"Creating an NFS cluster"},{"location":"Storage-Configuration/NFS/nfs/#creating-exports","text":"When a CephNFS is first created, all NFS daemons within the CephNFS cluster will share a configuration with no exports defined. When creating an export, it is necessary to specify the CephFilesystem which will act as the backing storage for the NFS export. RADOS Gateways (RGWs), provided by CephObjectStores , can also be used as backing storage for NFS exports if desired.","title":"Creating Exports"},{"location":"Storage-Configuration/NFS/nfs/#using-the-ceph-dashboard","text":"Exports can be created via the Ceph dashboard as well. To enable and use the Ceph dashboard in Rook, see here .","title":"Using the Ceph Dashboard"},{"location":"Storage-Configuration/NFS/nfs/#using-the-ceph-cli","text":"The Ceph CLI can be used from the Rook toolbox pod to create and manage NFS exports. To do so, first ensure the necessary Ceph mgr modules are enabled, if necessary, and that the Ceph orchestrator backend is set to Rook.","title":"Using the Ceph CLI"},{"location":"Storage-Configuration/NFS/nfs/#enable-the-ceph-orchestrator-if-necessary","text":"Required for Ceph v16.2.7 and below Optional for Ceph v16.2.8 and above Must be disabled for Ceph v17.2.1 due to a Ceph regression 1 2 3 ceph mgr module enable rook ceph mgr module enable nfs ceph orch set backend rook Ceph's NFS CLI can create NFS exports that are backed by CephFS (a CephFilesystem) or Ceph Object Gateway (a CephObjectStore). cluster_id or cluster-name in the Ceph NFS docs normally refers to the name of the NFS cluster, which is the CephNFS name in the Rook context. For creating an NFS export for the CephNFS and CephFilesystem example manifests, the below command can be used. This creates an export for the /test pseudo path. 1 ceph nfs export create cephfs my-nfs /test myfs The below command will list the current NFS exports for the example CephNFS cluster, which will give the output shown for the current example. 1 2 3 4 $ ceph nfs export ls my-nfs [ \"/test\" ] The simple /test export's info can be listed as well. Notice from the example that only NFS protocol v4 via TCP is supported. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 $ ceph nfs export info my-nfs /test { \"export_id\": 1, \"path\": \"/\", \"cluster_id\": \"my-nfs\", \"pseudo\": \"/test\", \"access_type\": \"RW\", \"squash\": \"none\", \"security_label\": true, \"protocols\": [ 4 ], \"transports\": [ \"TCP\" ], \"fsal\": { \"name\": \"CEPH\", \"user_id\": \"nfs.my-nfs.1\", \"fs_name\": \"myfs\" }, \"clients\": [] } If you are done managing NFS exports and don't need the Ceph orchestrator module enabled for anything else, it may be preferable to disable the Rook and NFS mgr modules to free up a small amount of RAM in the Ceph mgr Pod. 1 2 ceph orch set backend \"\" ceph mgr module disable rook","title":"Enable the Ceph orchestrator if necessary"},{"location":"Storage-Configuration/NFS/nfs/#mounting-exports","text":"Each CephNFS server has a unique Kubernetes Service. This is because NFS clients can't readily handle NFS failover. CephNFS services are named with the pattern rook-ceph-nfs--  is a unique letter ID (e.g., a, b, c, etc.) for a given NFS server. For example, rook-ceph-nfs-my-nfs-a . For each NFS client, choose an NFS service to use for the connection. With NFS v4, you can mount an export by its path using a mount command like below. You can mount all exports at once by omitting the export path and leaving the directory as just / . 1 mount -t nfs4 -o proto=tcp :/ ","title":"Mounting exports"},{"location":"Storage-Configuration/NFS/nfs/#exposing-the-nfs-server-outside-of-the-kubernetes-cluster","text":"Use a LoadBalancer Service to expose an NFS server (and its exports) outside of the Kubernetes cluster. The Service's endpoint can be used as the NFS service address when mounting the export manually . We provide an example Service here: deploy/examples/nfs-load-balancer.yaml .","title":"Exposing the NFS server outside of the Kubernetes cluster"},{"location":"Storage-Configuration/NFS/nfs/#nfs-security","text":"Security options for NFS are documented here .","title":"NFS Security"},{"location":"Storage-Configuration/NFS/nfs/#ceph-csi-nfs-provisioner-and-nfs-csi-driver","text":"The NFS CSI provisioner and driver are documented here","title":"Ceph CSI NFS provisioner and NFS CSI driver"},{"location":"Storage-Configuration/NFS/nfs/#advanced-configuration","text":"Advanced NFS configuration is documented here","title":"Advanced configuration"},{"location":"Storage-Configuration/NFS/nfs/#known-issues","text":"Known issues are documented on the NFS CRD page .","title":"Known issues"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/","text":"Rook supports the creation of new buckets and access to existing buckets via two custom resources: an Object Bucket Claim (OBC) is custom resource which requests a bucket (new or existing) and is described by a Custom Resource Definition (CRD) shown below. an Object Bucket (OB) is a custom resource automatically generated when a bucket is provisioned. It is a global resource, typically not visible to non-admin users, and contains information specific to the bucket. It is described by an OB CRD, also shown below. An OBC references a storage class which is created by an administrator. The storage class defines whether the bucket requested is a new bucket or an existing bucket. It also defines the bucket retention policy. Users request a new or existing bucket by creating an OBC which is shown below. The ceph provisioner detects the OBC and creates a new bucket or grants access to an existing bucket, depending the storage class referenced in the OBC. It also generates a Secret which provides credentials to access the bucket, and a ConfigMap which contains the bucket's endpoint. Application pods consume the information in the Secret and ConfigMap to access the bucket. Please note that to make provisioner watch the cluster namespace only you need to set ROOK_OBC_WATCH_OPERATOR_NAMESPACE to true in the operator manifest, otherwise it watches all namespaces. Example \u00b6 OBC Custom Resource \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : name : ceph-bucket [1] namespace : rook-ceph [2] spec : bucketName : [ 3 ] generateBucketName : photo-booth [4] storageClassName : rook-ceph-bucket [5] additionalConfig : [ 6 ] maxObjects : \"1000\" maxSize : \"2G\" name of the ObjectBucketClaim . This name becomes the name of the Secret and ConfigMap. namespace (optional) of the ObjectBucketClaim , which is also the namespace of the ConfigMap and Secret. bucketName name of the bucket . Not recommended for new buckets since names must be unique within an entire object store. generateBucketName value becomes the prefix for a randomly generated name, if supplied then bucketName must be empty. If both bucketName and generateBucketName are supplied then BucketName has precedence and GenerateBucketName is ignored. If both bucketName and generateBucketName are blank or omitted then the storage class is expected to contain the name of an existing bucket. It's an error if all three bucket related names are blank or omitted. storageClassName which defines the StorageClass which contains the names of the bucket provisioner, the object-store and specifies the bucket retention policy. additionalConfig is an optional list of key-value pairs used to define attributes specific to the bucket being provisioned by this OBC. This information is typically tuned to a particular bucket provisioner and may limit application portability. Options supported: maxObjects : The maximum number of objects in the bucket maxSize : The maximum size of the bucket, please note minimum recommended value is 4K. OBC Custom Resource after Bucket Provisioning \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : creationTimestamp : \"2019-10-18T09:54:01Z\" generation : 2 name : ceph-bucket namespace : default [1] resourceVersion : \"559491\" spec : ObjectBucketName : obc-default-ceph-bucket [2] additionalConfig : null bucketName : photo-booth-c1178d61-1517-431f-8408-ec4c9fa50bee [3] storageClassName : rook-ceph-bucket [4] status : phase : Bound [5] namespace where OBC got created. ObjectBucketName generated OB name created using name space and OBC name. the generated (in this case), unique bucket name for the new bucket. name of the storage class from OBC got created. phases of bucket creation: Pending : the operator is processing the request. Bound : the operator finished processing the request and linked the OBC and OB Released : the OB has been deleted, leaving the OBC unclaimed but unavailable. Failed : not currently set. App Pod \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 apiVersion : v1 kind : Pod metadata : name : app-pod namespace : dev-user spec : containers : - name : mycontainer image : redis envFrom : [ 1 ] - configMapRef : name : ceph-bucket [2] - secretRef : name : ceph-bucket [3] use env: if mapping of the defined key names to the env var names used by the app is needed. makes available to the pod as env variables: BUCKET_HOST , BUCKET_PORT , BUCKET_NAME makes available to the pod as env variables: AWS_ACCESS_KEY_ID , AWS_SECRET_ACCESS_KEY StorageClass \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-bucket labels : aws-s3/object [1] provisioner : rook-ceph.ceph.rook.io/bucket [2] parameters : [ 3 ] objectStoreName : my-store objectStoreNamespace : rook-ceph bucketName : ceph-bucket [4] reclaimPolicy : Delete [5] label (optional) here associates this StorageClass to a specific provisioner. provisioner responsible for handling OBCs referencing this StorageClass . all parameter required. bucketName is required for access to existing buckets but is omitted when provisioning new buckets. Unlike greenfield provisioning, the brownfield bucket name appears in the StorageClass , not the OBC . rook-ceph provisioner decides how to treat the reclaimPolicy when an OBC is deleted for the bucket. See explanation as specified in Kubernetes Delete = physically delete the bucket. Retain = do not physically delete the bucket.","title":"Bucket Claim"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/#example","text":"","title":"Example"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/#obc-custom-resource","text":"1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : name : ceph-bucket [1] namespace : rook-ceph [2] spec : bucketName : [ 3 ] generateBucketName : photo-booth [4] storageClassName : rook-ceph-bucket [5] additionalConfig : [ 6 ] maxObjects : \"1000\" maxSize : \"2G\" name of the ObjectBucketClaim . This name becomes the name of the Secret and ConfigMap. namespace (optional) of the ObjectBucketClaim , which is also the namespace of the ConfigMap and Secret. bucketName name of the bucket . Not recommended for new buckets since names must be unique within an entire object store. generateBucketName value becomes the prefix for a randomly generated name, if supplied then bucketName must be empty. If both bucketName and generateBucketName are supplied then BucketName has precedence and GenerateBucketName is ignored. If both bucketName and generateBucketName are blank or omitted then the storage class is expected to contain the name of an existing bucket. It's an error if all three bucket related names are blank or omitted. storageClassName which defines the StorageClass which contains the names of the bucket provisioner, the object-store and specifies the bucket retention policy. additionalConfig is an optional list of key-value pairs used to define attributes specific to the bucket being provisioned by this OBC. This information is typically tuned to a particular bucket provisioner and may limit application portability. Options supported: maxObjects : The maximum number of objects in the bucket maxSize : The maximum size of the bucket, please note minimum recommended value is 4K.","title":"OBC Custom Resource"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/#obc-custom-resource-after-bucket-provisioning","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : creationTimestamp : \"2019-10-18T09:54:01Z\" generation : 2 name : ceph-bucket namespace : default [1] resourceVersion : \"559491\" spec : ObjectBucketName : obc-default-ceph-bucket [2] additionalConfig : null bucketName : photo-booth-c1178d61-1517-431f-8408-ec4c9fa50bee [3] storageClassName : rook-ceph-bucket [4] status : phase : Bound [5] namespace where OBC got created. ObjectBucketName generated OB name created using name space and OBC name. the generated (in this case), unique bucket name for the new bucket. name of the storage class from OBC got created. phases of bucket creation: Pending : the operator is processing the request. Bound : the operator finished processing the request and linked the OBC and OB Released : the OB has been deleted, leaving the OBC unclaimed but unavailable. Failed : not currently set.","title":"OBC Custom Resource after Bucket Provisioning"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/#app-pod","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 apiVersion : v1 kind : Pod metadata : name : app-pod namespace : dev-user spec : containers : - name : mycontainer image : redis envFrom : [ 1 ] - configMapRef : name : ceph-bucket [2] - secretRef : name : ceph-bucket [3] use env: if mapping of the defined key names to the env var names used by the app is needed. makes available to the pod as env variables: BUCKET_HOST , BUCKET_PORT , BUCKET_NAME makes available to the pod as env variables: AWS_ACCESS_KEY_ID , AWS_SECRET_ACCESS_KEY","title":"App Pod"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/#storageclass","text":"1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-bucket labels : aws-s3/object [1] provisioner : rook-ceph.ceph.rook.io/bucket [2] parameters : [ 3 ] objectStoreName : my-store objectStoreNamespace : rook-ceph bucketName : ceph-bucket [4] reclaimPolicy : Delete [5] label (optional) here associates this StorageClass to a specific provisioner. provisioner responsible for handling OBCs referencing this StorageClass . all parameter required. bucketName is required for access to existing buckets but is omitted when provisioning new buckets. Unlike greenfield provisioning, the brownfield bucket name appears in the StorageClass , not the OBC . rook-ceph provisioner decides how to treat the reclaimPolicy when an OBC is deleted for the bucket. See explanation as specified in Kubernetes Delete = physically delete the bucket. Retain = do not physically delete the bucket.","title":"StorageClass"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/","text":"Rook supports the creation of bucket notifications via two custom resources: a CephBucketNotification is a custom resource the defines: topic, events and filters of a bucket notification, and is described by a Custom Resource Definition (CRD) shown below. Bucket notifications are associated with a bucket by setting labels on the Object Bucket claim (OBC). See the Ceph documentation for detailed information: Bucket Notifications - Ceph Object Gateway - Ceph Documentation . a CephBucketTopic is custom resource which represents a bucket notification topic and is described by a CRD shown below. A bucket notification topic represents an endpoint (or a \"topic\" inside this endpoint) to which bucket notifications could be sent. Notifications \u00b6 A CephBucketNotification defines what bucket actions trigger the notification and which topic to send notifications to. A CephBucketNotification may also define a filter, based on the object's name and other object attributes. Notifications can be associated with buckets created via ObjectBucketClaims by adding labels to an ObjectBucketClaim with the following format: 1 bucket-notification- :  The CephBucketTopic, CephBucketNotification and ObjectBucketClaim must all belong to the same namespace. If a bucket was created manually (not via an ObjectBucketClaim), notifications on this bucket should also be created manually. However, topics in these notifications may reference topics that were created via CephBucketTopic resources. Topics \u00b6 A CephBucketTopic represents an endpoint (of types: Kafka, AMQP0.9.1 or HTTP), or a specific resource inside this endpoint (e.g a Kafka or an AMQP topic, or a specific URI in an HTTP server). The CephBucketTopic also holds any additional info needed for a CephObjectStore's RADOS Gateways (RGW) to connect to the endpoint. Topics don't belong to a specific bucket or notification. Notifications from multiple buckets may be sent to the same topic, and one bucket (via multiple CephBucketNotifications) may send notifications to multiple topics. Notification Reliability and Delivery \u00b6 Notifications may be sent synchronously, as part of the operation that triggered them. In this mode, the operation is acknowledged only after the notification is sent to the topic\u2019s configured endpoint, which means that the round trip time of the notification is added to the latency of the operation itself. The original triggering operation will still be considered as successful even if the notification fail with an error, cannot be delivered or times out. Notifications may also be sent asynchronously. They will be committed into persistent storage and then asynchronously sent to the topic\u2019s configured endpoint. In this case, the only latency added to the original operation is of committing the notification to persistent storage. If the notification fail with an error, cannot be delivered or times out, it will be retried until successfully acknowledged. Example \u00b6 CephBucketTopic Custom Resource \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 apiVersion : ceph.rook.io/v1 kind : CephBucketTopic metadata : name : my-topic [1] namespace : my-app-space [2] spec : objectStoreName : my-store [3] objectStoreNamespace : rook-ceph [4] opaqueData : my@email.com [5] persistent : false [6] endpoint : [ 7 ] http : [ 8 ] uri : http://my-notification-endpoint:8080 # uri: http://my-notification-endpoint:8080/my-topic # uri: https://my-notification-endpoint:8443 disableVerifySSL : true [9] sendCloudEvents : false [10] # amqp: [11] # uri: amqp://my-rabbitmq-service:5672 # uri: amqp://my-rabbitmq-service:5672/vhost1 # uri: amqps://user@password:my-rabbitmq-service:5672 # disableVerifySSL: true [12] # ackLevel: broker [13] # exchange: my-exchange [14] # kafka: [15] # uri: kafka://my-kafka-service:9092 # disableVerifySSL: true [16] # ackLevel: broker [17] # useSSL: false [18] name of the CephBucketTopic In case of AMQP endpoint, the name is used for the AMQP topic (\u201crouting key\u201d for a topic exchange) In case of Kafka endpoint, the name is used as the Kafka topic namespace (optional) of the CephBucketTopic . Should match the namespace of the CephBucketNotification associated with this CephBucketTopic, and the OBC with the label referencing the CephBucketNotification objectStoreName is the name of the object store in which the topic should be created. This must be the same object store used for the buckets associated with the notifications referencing this topic. objectStoreNamespace is the namespace of the object store in which the topic should be created opaqueData (optional) is added to all notifications triggered by a notifications associated with the topic persistent (optional) indicates whether notifications to this endpoint are persistent (=asynchronous) or sent synchronously (\u201cfalse\u201d by default) endpoint to which to send the notifications to. Exactly one of the endpoints must be defined: http , amqp , kafka http (optional) hold the spec for an HTTP endpoint. The format of the URI would be: http[s]://[:][/] port defaults to: 80/443 for HTTP/S accordingly disableVerifySSL indicates whether the RGW is going to verify the SSL certificate of the HTTP server in case HTTPS is used (\"false\" by default) sendCloudEvents : (optional) send the notifications with the CloudEvents header . Supported for Ceph Quincy (v17) or newer (\"false\" by default) amqp (optional) hold the spec for an AMQP endpoint. The format of the URI would be: amqp[s]://[:@][:][/] port defaults to: 5672/5671 for AMQP/S accordingly user/password defaults to: guest/guest user/password may only be provided if HTTPS is used with the RGW. If not, topic creation request will be rejected vhost defaults to: \u201c/\u201d disableVerifySSL (optional) indicates whether the RGW is going to verify the SSL certificate of the AMQP server in case AMQPS is used (\"false\" by default) ackLevel (optional) indicates what kind of ack the RGW is waiting for after sending the notifications: \u201cnone\u201d: message is considered \u201cdelivered\u201d if sent to broker \u201cbroker\u201d: message is considered \u201cdelivered\u201d if acked by broker (default) \u201croutable\u201d: message is considered \u201cdelivered\u201d if broker can route to a consumer exchange in the AMQP broker that would route the notifications. Different topics pointing to the same endpoint must use the same exchange kafka (optional) hold the spec for a Kafka endpoint. The format of the URI would be: kafka://[:@][: :  # are ignored by the operator's bucket notifications provisioning mechanism some-label : some-value # the following label adds notifications to this bucket bucket-notification-my-notification : my-notification bucket-notification-another-notification : another-notification spec : generateBucketName : ceph-bkt storageClassName : rook-ceph-delete-bucket","title":"Object Bucket Notifications"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/#notifications","text":"A CephBucketNotification defines what bucket actions trigger the notification and which topic to send notifications to. A CephBucketNotification may also define a filter, based on the object's name and other object attributes. Notifications can be associated with buckets created via ObjectBucketClaims by adding labels to an ObjectBucketClaim with the following format: 1 bucket-notification- :  The CephBucketTopic, CephBucketNotification and ObjectBucketClaim must all belong to the same namespace. If a bucket was created manually (not via an ObjectBucketClaim), notifications on this bucket should also be created manually. However, topics in these notifications may reference topics that were created via CephBucketTopic resources.","title":"Notifications"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/#topics","text":"A CephBucketTopic represents an endpoint (of types: Kafka, AMQP0.9.1 or HTTP), or a specific resource inside this endpoint (e.g a Kafka or an AMQP topic, or a specific URI in an HTTP server). The CephBucketTopic also holds any additional info needed for a CephObjectStore's RADOS Gateways (RGW) to connect to the endpoint. Topics don't belong to a specific bucket or notification. Notifications from multiple buckets may be sent to the same topic, and one bucket (via multiple CephBucketNotifications) may send notifications to multiple topics.","title":"Topics"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/#notification-reliability-and-delivery","text":"Notifications may be sent synchronously, as part of the operation that triggered them. In this mode, the operation is acknowledged only after the notification is sent to the topic\u2019s configured endpoint, which means that the round trip time of the notification is added to the latency of the operation itself. The original triggering operation will still be considered as successful even if the notification fail with an error, cannot be delivered or times out. Notifications may also be sent asynchronously. They will be committed into persistent storage and then asynchronously sent to the topic\u2019s configured endpoint. In this case, the only latency added to the original operation is of committing the notification to persistent storage. If the notification fail with an error, cannot be delivered or times out, it will be retried until successfully acknowledged.","title":"Notification Reliability and Delivery"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/#example","text":"","title":"Example"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/#cephbuckettopic-custom-resource","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 apiVersion : ceph.rook.io/v1 kind : CephBucketTopic metadata : name : my-topic [1] namespace : my-app-space [2] spec : objectStoreName : my-store [3] objectStoreNamespace : rook-ceph [4] opaqueData : my@email.com [5] persistent : false [6] endpoint : [ 7 ] http : [ 8 ] uri : http://my-notification-endpoint:8080 # uri: http://my-notification-endpoint:8080/my-topic # uri: https://my-notification-endpoint:8443 disableVerifySSL : true [9] sendCloudEvents : false [10] # amqp: [11] # uri: amqp://my-rabbitmq-service:5672 # uri: amqp://my-rabbitmq-service:5672/vhost1 # uri: amqps://user@password:my-rabbitmq-service:5672 # disableVerifySSL: true [12] # ackLevel: broker [13] # exchange: my-exchange [14] # kafka: [15] # uri: kafka://my-kafka-service:9092 # disableVerifySSL: true [16] # ackLevel: broker [17] # useSSL: false [18] name of the CephBucketTopic In case of AMQP endpoint, the name is used for the AMQP topic (\u201crouting key\u201d for a topic exchange) In case of Kafka endpoint, the name is used as the Kafka topic namespace (optional) of the CephBucketTopic . Should match the namespace of the CephBucketNotification associated with this CephBucketTopic, and the OBC with the label referencing the CephBucketNotification objectStoreName is the name of the object store in which the topic should be created. This must be the same object store used for the buckets associated with the notifications referencing this topic. objectStoreNamespace is the namespace of the object store in which the topic should be created opaqueData (optional) is added to all notifications triggered by a notifications associated with the topic persistent (optional) indicates whether notifications to this endpoint are persistent (=asynchronous) or sent synchronously (\u201cfalse\u201d by default) endpoint to which to send the notifications to. Exactly one of the endpoints must be defined: http , amqp , kafka http (optional) hold the spec for an HTTP endpoint. The format of the URI would be: http[s]://[:][/] port defaults to: 80/443 for HTTP/S accordingly disableVerifySSL indicates whether the RGW is going to verify the SSL certificate of the HTTP server in case HTTPS is used (\"false\" by default) sendCloudEvents : (optional) send the notifications with the CloudEvents header . Supported for Ceph Quincy (v17) or newer (\"false\" by default) amqp (optional) hold the spec for an AMQP endpoint. The format of the URI would be: amqp[s]://[:@][:][/] port defaults to: 5672/5671 for AMQP/S accordingly user/password defaults to: guest/guest user/password may only be provided if HTTPS is used with the RGW. If not, topic creation request will be rejected vhost defaults to: \u201c/\u201d disableVerifySSL (optional) indicates whether the RGW is going to verify the SSL certificate of the AMQP server in case AMQPS is used (\"false\" by default) ackLevel (optional) indicates what kind of ack the RGW is waiting for after sending the notifications: \u201cnone\u201d: message is considered \u201cdelivered\u201d if sent to broker \u201cbroker\u201d: message is considered \u201cdelivered\u201d if acked by broker (default) \u201croutable\u201d: message is considered \u201cdelivered\u201d if broker can route to a consumer exchange in the AMQP broker that would route the notifications. Different topics pointing to the same endpoint must use the same exchange kafka (optional) hold the spec for a Kafka endpoint. The format of the URI would be: kafka://[:@][: :  # are ignored by the operator's bucket notifications provisioning mechanism some-label : some-value # the following label adds notifications to this bucket bucket-notification-my-notification : my-notification bucket-notification-another-notification : another-notification spec : generateBucketName : ceph-bkt storageClassName : rook-ceph-delete-bucket","title":"OBC Custom Resource"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/","text":"Multisite is a feature of Ceph that allows object stores to replicate their data over multiple Ceph clusters. Multisite also allows object stores to be independent and isolated from other object stores in a cluster. When a ceph-object-store is created without the zone section; a realm, zone group, and zone is created with the same name as the ceph-object-store. Since it is the only ceph-object-store in the realm, the data in the ceph-object-store remain independent and isolated from others on the same cluster. When a ceph-object-store is created with the zone section, the ceph-object-store will join a custom created zone, zone group, and realm each with a different names than its own. This allows the ceph-object-store to replicate its data over multiple Ceph clusters. To review core multisite concepts please read the ceph-multisite design overview . Prerequisites \u00b6 This guide assumes a Rook cluster as explained in the Quickstart . Creating Object Multisite \u00b6 If an admin wants to set up multisite on a Rook Ceph cluster, the following resources must be created: A realm A zonegroup A zone A ceph object store with the zone section object-multisite.yaml in the examples directory can be used to create the multisite CRDs. 1 kubectl create -f object-multisite.yaml The first zone group created in a realm is the master zone group. The first zone created in a zone group is the master zone. When a non-master zone or non-master zone group is created, the zone group or zone is not in the Ceph Radosgw Multisite Period until an object-store is created in that zone (and zone group). The zone will create the pools for the object-store(s) that are in the zone to use. When one of the multisite CRs (realm, zone group, zone) is deleted the underlying ceph realm/zone group/zone is not deleted, neither are the pools created by the zone. See the \"Multisite Cleanup\" section for more information. For more information on the multisite CRDs, see the related CRDs: - CephObjectRealm - CephObjectZoneGroup - CephObjectZone Pulling a Realm \u00b6 If an admin wants to sync data from another cluster, the admin needs to pull a realm on a Rook Ceph cluster from another Rook Ceph (or Ceph) cluster. To begin doing this, the admin needs 2 pieces of information: An endpoint from the realm being pulled from The access key and the system key of the system user from the realm being pulled from. Getting the Pull Endpoint \u00b6 To pull a Ceph realm from a remote Ceph cluster, an endpoint must be added to the CephObjectRealm's pull section in the spec . This endpoint must be from the master zone in the master zone group of that realm. If an admin does not know of an endpoint that fits this criteria, the admin can find such an endpoint on the remote Ceph cluster (via the tool box if it is a Rook Ceph Cluster) by running: 1 2 3 4 5 6 $ radosgw-admin zonegroup get --rgw-realm = $REALM_NAME --rgw-zonegroup = $MASTER_ZONEGROUP_NAME { ... \"endpoints\": [http://10.17.159.77:80], ... } A list of endpoints in the master zone group in the master zone is in the endpoints section of the JSON output of the zonegoup get command. This endpoint must also be resolvable from the new Rook Ceph cluster. To test this run the curl command on the endpoint: 1 2 $ curl -L http://10.17.159.77:80 anonymous Finally add the endpoint to the pull section of the CephObjectRealm's spec. The CephObjectRealm should have the same name as the CephObjectRealm/Ceph realm it is pulling from. 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : realm-a namespace : rook-ceph spec : pull : endpoint : http://10.17.159.77:80 Getting Realm Access Key and Secret Key \u00b6 The access key and secret key of the system user are keys that allow other Ceph clusters to pull the realm of the system user. Getting the Realm Access Key and Secret Key from the Rook Ceph Cluster \u00b6 System User for Multisite \u00b6 When an admin creates a ceph-object-realm a system user automatically gets created for the realm with an access key and a secret key. This system user has the name \"$REALM_NAME-system-user\". For the example if realm name is realm-a , then uid for the system user is \"realm-a-system-user\". These keys for the user are exported as a kubernetes secret called \"$REALM_NAME-keys\" (ex: realm-a-keys). This system user used by RGW internally for the data replication. Getting keys from k8s secret \u00b6 To get these keys from the cluster the realm was originally created on, run: 1 kubectl -n $ORIGINAL_CLUSTER_NAMESPACE get secrets realm-a-keys -o yaml > realm-a-keys.yaml Edit the realm-a-keys.yaml file, and change the namespace with the namespace that the new Rook Ceph cluster exists in. Then create a kubernetes secret on the pulling Rook Ceph cluster with the same secrets yaml file. 1 kubectl create -f realm-a-keys.yaml Getting the Realm Access Key and Secret Key from a Non Rook Ceph Cluster \u00b6 The access key and the secret key of the system user can be found in the output of running the following command on a non-rook ceph cluster: 1 radosgw-admin user info --uid=\"realm-a-system-user\" 1 2 3 4 5 6 7 8 9 10 11 { ... \"keys\" : [ { \"user\" : \"realm-a-system-user\" \"access_key\" : \"aSw4blZIKV9nKEU5VC0=\" \"secret_key\" : \"JSlDXFt5TlgjSV9QOE9XUndrLiI5JEo9YDBsJg==\" , } ], ... } Then base64 encode the each of the keys and create a .yaml file for the Kubernetes secret from the following template. Only the access-key , secret-key , and namespace sections need to be replaced. 1 2 3 4 5 6 7 8 9 apiVersion : v1 data : access-key : YVN3NGJsWklLVjluS0VVNVZDMD0= secret-key : SlNsRFhGdDVUbGdqU1Y5UU9FOVhVbmRyTGlJNUpFbzlZREJzSmc9PQ== kind : Secret metadata : name : realm-a-keys namespace : $NEW_ROOK_CLUSTER_NAMESPACE type : kubernetes.io/rook Finally, create a kubernetes secret on the pulling Rook Ceph cluster with the new secrets yaml file. 1 kubectl create -f realm-a-keys.yaml Pulling a Realm on a New Rook Ceph Cluster \u00b6 Once the admin knows the endpoint and the secret for the keys has been created, the admin should create: A CephObjectRealm matching to the realm on the other Ceph cluster, with an endpoint as described above. A CephObjectZoneGroup matching the master zone group name or the master CephObjectZoneGroup from the cluster the realm was pulled from. A CephObjectZone referring to the CephObjectZoneGroup created above. A CephObjectStore referring to the new CephObjectZone resource. object-multisite-pull-realm.yaml (with changes) in the examples directory can be used to create the multisite CRDs. 1 kubectl create -f object-multisite-pull-realm.yaml Scaling a Multisite \u00b6 Scaling the number of gateways that run the synchronization thread to 2 or more can increase the latency of the replication of each S3 object. The recommended way to scale a mutisite configuration is to dissociate the gateway dedicated to the synchronization from gateways that serve clients. The two types of gateways can be deployed by creating two CephObjectStores associated with the same CephObjectZone. The objectstore that deploys the gateway dedicated to the synchronization must have spec.gateway.instances set to 1 , while the objectstore that deploys the client gateways have multiple replicas and should disable the synchronization thread on the gateways by setting spec.gateway.disableMultisiteSyncTraffic to true . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 --- apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : replication namespace : rook-ceph spec : gateway : port : 80 instances : 1 disableMultisiteSyncTraffic : false zone : name : zone-a --- apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : clients namespace : rook-ceph spec : gateway : port : 80 instances : 5 disableMultisiteSyncTraffic : true zone : name : zone-a Multisite Cleanup \u00b6 Multisite configuration must be cleaned up by hand. Deleting a realm/zone group/zone CR will not delete the underlying Ceph realm, zone group, zone, or the pools associated with a zone. Realm Deletion \u00b6 Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-realm resource is deleted or modified, the realm is not deleted from the Ceph cluster. Realm deletion must be done via the toolbox. Deleting a Realm \u00b6 The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. The following command, run via the toolbox, deletes the realm. 1 radosgw-admin realm delete --rgw-realm=realm-a Zone Group Deletion \u00b6 Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-zone group resource is deleted or modified, the zone group is not deleted from the Ceph cluster. Zone Group deletion must be done through the toolbox. Deleting a Zone Group \u00b6 The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. The following command, run via the toolbox, deletes the zone group. 1 2 radosgw-admin zonegroup delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a Deleting and Reconfiguring the Ceph Object Zone \u00b6 Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-zone resource is deleted or modified, the zone is not deleted from the Ceph cluster. Zone deletion must be done through the toolbox. Changing the Master Zone \u00b6 The Rook toolbox can change the master zone in a zone group. 1 2 3 radosgw-admin zone modify --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a --master radosgw-admin zonegroup modify --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --master radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a Deleting Zone \u00b6 The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. There are two scenarios possible when deleting a zone. The following commands, run via the toolbox, deletes the zone if there is only one zone in the zone group. 1 2 radosgw-admin zone delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a In the other scenario, there are more than one zones in a zone group. Care must be taken when changing which zone is the master zone. Please read the following documentation before running the below commands: The following commands, run via toolboxes, remove the zone from the zone group first, then delete the zone. 1 2 3 4 radosgw-admin zonegroup rm --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin zone delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a When a zone is deleted, the pools for that zone are not deleted. Deleting Pools for a Zone \u00b6 The Rook toolbox can delete pools. Deleting pools should be done with caution. The following documentation on pools should be read before deleting any pools. When a zone is created the following pools are created for each zone: 1 2 3 4 5 6 $ ZONE_NAME.rgw.control $ ZONE_NAME.rgw.meta $ ZONE_NAME.rgw.log $ ZONE_NAME.rgw.buckets.index $ ZONE_NAME.rgw.buckets.non-ec $ ZONE_NAME.rgw.buckets.data Here is an example command to delete the .rgw.buckets.data pool for zone-a. 1 ceph osd pool rm zone-a.rgw.buckets.data zone-a.rgw.buckets.data --yes-i-really-really-mean-it In this command the pool name must be mentioned twice for the pool to be removed. Removing an Object Store from a Zone \u00b6 When an object-store (created in a zone) is deleted, the endpoint for that object store is removed from that zone, via 1 kubectl delete -f object-store.yaml Removing object store(s) from the master zone of the master zone group should be done with caution. When all of these object-stores are deleted the period cannot be updated and that realm cannot be pulled. Configure an Existing Object Store for Multisite \u00b6 When an object store is configured by Rook, it internally creates a zone, zone group, and realm with the same name as the object store. To enable multisite, you will need to create the corresponding zone, zone group, and realm CRs with the same name as the object store. For example, to create multisite CRs for an object store named my-store : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : my-store namespace : rook-ceph # namespace:cluster --- apiVersion : ceph.rook.io/v1 kind : CephObjectZoneGroup metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : realm : my-store --- apiVersion : ceph.rook.io/v1 kind : CephObjectZone metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : zoneGroup : my-store metadataPool : replicated : size : 3 dataPool : replicated : size : 3 preservePoolsOnDelete : false # recommended to set this value if ingress used for exposing rgw endpoints # customEndpoints: # - \"http://rgw-a.fqdn\" Now modify the existing CephObjectStore CR to exclude pool settings and add a reference to the zone. 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : gateway : port : 80 instances : 1 zone : name : my-store Using custom names \u00b6 If names different from the object store need to be set for the realm, zone, or zone group, first rename them in the backend via toolbox pod, then following the procedure above. 1 2 3 4 radosgw-admin realm rename --rgw-realm=my-store --realm-new-name= radosgw-admin zonegroup rename --rgw-zonegroup=my-store --zonegroup-new-name= --rgw-realm= radosgw-admin zone rename --rgw-zone=my-store --zone-new-name= --rgw-zonegroup= --rgw-realm= radosgw-admin period update --commit Important Renaming in the toolbox must be performed before creating the multisite CRs","title":"Object Store Multisite"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#prerequisites","text":"This guide assumes a Rook cluster as explained in the Quickstart .","title":"Prerequisites"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#creating-object-multisite","text":"If an admin wants to set up multisite on a Rook Ceph cluster, the following resources must be created: A realm A zonegroup A zone A ceph object store with the zone section object-multisite.yaml in the examples directory can be used to create the multisite CRDs. 1 kubectl create -f object-multisite.yaml The first zone group created in a realm is the master zone group. The first zone created in a zone group is the master zone. When a non-master zone or non-master zone group is created, the zone group or zone is not in the Ceph Radosgw Multisite Period until an object-store is created in that zone (and zone group). The zone will create the pools for the object-store(s) that are in the zone to use. When one of the multisite CRs (realm, zone group, zone) is deleted the underlying ceph realm/zone group/zone is not deleted, neither are the pools created by the zone. See the \"Multisite Cleanup\" section for more information. For more information on the multisite CRDs, see the related CRDs: - CephObjectRealm - CephObjectZoneGroup - CephObjectZone","title":"Creating Object Multisite"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#pulling-a-realm","text":"If an admin wants to sync data from another cluster, the admin needs to pull a realm on a Rook Ceph cluster from another Rook Ceph (or Ceph) cluster. To begin doing this, the admin needs 2 pieces of information: An endpoint from the realm being pulled from The access key and the system key of the system user from the realm being pulled from.","title":"Pulling a Realm"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#getting-the-pull-endpoint","text":"To pull a Ceph realm from a remote Ceph cluster, an endpoint must be added to the CephObjectRealm's pull section in the spec . This endpoint must be from the master zone in the master zone group of that realm. If an admin does not know of an endpoint that fits this criteria, the admin can find such an endpoint on the remote Ceph cluster (via the tool box if it is a Rook Ceph Cluster) by running: 1 2 3 4 5 6 $ radosgw-admin zonegroup get --rgw-realm = $REALM_NAME --rgw-zonegroup = $MASTER_ZONEGROUP_NAME { ... \"endpoints\": [http://10.17.159.77:80], ... } A list of endpoints in the master zone group in the master zone is in the endpoints section of the JSON output of the zonegoup get command. This endpoint must also be resolvable from the new Rook Ceph cluster. To test this run the curl command on the endpoint: 1 2 $ curl -L http://10.17.159.77:80 anonymous Finally add the endpoint to the pull section of the CephObjectRealm's spec. The CephObjectRealm should have the same name as the CephObjectRealm/Ceph realm it is pulling from. 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : realm-a namespace : rook-ceph spec : pull : endpoint : http://10.17.159.77:80","title":"Getting the Pull Endpoint"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#getting-realm-access-key-and-secret-key","text":"The access key and secret key of the system user are keys that allow other Ceph clusters to pull the realm of the system user.","title":"Getting Realm Access Key and Secret Key"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#getting-the-realm-access-key-and-secret-key-from-the-rook-ceph-cluster","text":"","title":"Getting the Realm Access Key and Secret Key from the Rook Ceph Cluster"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#system-user-for-multisite","text":"When an admin creates a ceph-object-realm a system user automatically gets created for the realm with an access key and a secret key. This system user has the name \"$REALM_NAME-system-user\". For the example if realm name is realm-a , then uid for the system user is \"realm-a-system-user\". These keys for the user are exported as a kubernetes secret called \"$REALM_NAME-keys\" (ex: realm-a-keys). This system user used by RGW internally for the data replication.","title":"System User for Multisite"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#getting-keys-from-k8s-secret","text":"To get these keys from the cluster the realm was originally created on, run: 1 kubectl -n $ORIGINAL_CLUSTER_NAMESPACE get secrets realm-a-keys -o yaml > realm-a-keys.yaml Edit the realm-a-keys.yaml file, and change the namespace with the namespace that the new Rook Ceph cluster exists in. Then create a kubernetes secret on the pulling Rook Ceph cluster with the same secrets yaml file. 1 kubectl create -f realm-a-keys.yaml","title":"Getting keys from k8s secret"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#getting-the-realm-access-key-and-secret-key-from-a-non-rook-ceph-cluster","text":"The access key and the secret key of the system user can be found in the output of running the following command on a non-rook ceph cluster: 1 radosgw-admin user info --uid=\"realm-a-system-user\" 1 2 3 4 5 6 7 8 9 10 11 { ... \"keys\" : [ { \"user\" : \"realm-a-system-user\" \"access_key\" : \"aSw4blZIKV9nKEU5VC0=\" \"secret_key\" : \"JSlDXFt5TlgjSV9QOE9XUndrLiI5JEo9YDBsJg==\" , } ], ... } Then base64 encode the each of the keys and create a .yaml file for the Kubernetes secret from the following template. Only the access-key , secret-key , and namespace sections need to be replaced. 1 2 3 4 5 6 7 8 9 apiVersion : v1 data : access-key : YVN3NGJsWklLVjluS0VVNVZDMD0= secret-key : SlNsRFhGdDVUbGdqU1Y5UU9FOVhVbmRyTGlJNUpFbzlZREJzSmc9PQ== kind : Secret metadata : name : realm-a-keys namespace : $NEW_ROOK_CLUSTER_NAMESPACE type : kubernetes.io/rook Finally, create a kubernetes secret on the pulling Rook Ceph cluster with the new secrets yaml file. 1 kubectl create -f realm-a-keys.yaml","title":"Getting the Realm Access Key and Secret Key from a Non Rook Ceph Cluster"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#pulling-a-realm-on-a-new-rook-ceph-cluster","text":"Once the admin knows the endpoint and the secret for the keys has been created, the admin should create: A CephObjectRealm matching to the realm on the other Ceph cluster, with an endpoint as described above. A CephObjectZoneGroup matching the master zone group name or the master CephObjectZoneGroup from the cluster the realm was pulled from. A CephObjectZone referring to the CephObjectZoneGroup created above. A CephObjectStore referring to the new CephObjectZone resource. object-multisite-pull-realm.yaml (with changes) in the examples directory can be used to create the multisite CRDs. 1 kubectl create -f object-multisite-pull-realm.yaml","title":"Pulling a Realm on a New Rook Ceph Cluster"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#scaling-a-multisite","text":"Scaling the number of gateways that run the synchronization thread to 2 or more can increase the latency of the replication of each S3 object. The recommended way to scale a mutisite configuration is to dissociate the gateway dedicated to the synchronization from gateways that serve clients. The two types of gateways can be deployed by creating two CephObjectStores associated with the same CephObjectZone. The objectstore that deploys the gateway dedicated to the synchronization must have spec.gateway.instances set to 1 , while the objectstore that deploys the client gateways have multiple replicas and should disable the synchronization thread on the gateways by setting spec.gateway.disableMultisiteSyncTraffic to true . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 --- apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : replication namespace : rook-ceph spec : gateway : port : 80 instances : 1 disableMultisiteSyncTraffic : false zone : name : zone-a --- apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : clients namespace : rook-ceph spec : gateway : port : 80 instances : 5 disableMultisiteSyncTraffic : true zone : name : zone-a","title":"Scaling a Multisite"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#multisite-cleanup","text":"Multisite configuration must be cleaned up by hand. Deleting a realm/zone group/zone CR will not delete the underlying Ceph realm, zone group, zone, or the pools associated with a zone.","title":"Multisite Cleanup"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#realm-deletion","text":"Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-realm resource is deleted or modified, the realm is not deleted from the Ceph cluster. Realm deletion must be done via the toolbox.","title":"Realm Deletion"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#deleting-a-realm","text":"The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. The following command, run via the toolbox, deletes the realm. 1 radosgw-admin realm delete --rgw-realm=realm-a","title":"Deleting a Realm"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#zone-group-deletion","text":"Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-zone group resource is deleted or modified, the zone group is not deleted from the Ceph cluster. Zone Group deletion must be done through the toolbox.","title":"Zone Group Deletion"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#deleting-a-zone-group","text":"The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. The following command, run via the toolbox, deletes the zone group. 1 2 radosgw-admin zonegroup delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a","title":"Deleting a Zone Group"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#deleting-and-reconfiguring-the-ceph-object-zone","text":"Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-zone resource is deleted or modified, the zone is not deleted from the Ceph cluster. Zone deletion must be done through the toolbox.","title":"Deleting and Reconfiguring the Ceph Object Zone"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#changing-the-master-zone","text":"The Rook toolbox can change the master zone in a zone group. 1 2 3 radosgw-admin zone modify --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a --master radosgw-admin zonegroup modify --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --master radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a","title":"Changing the Master Zone"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#deleting-zone","text":"The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. There are two scenarios possible when deleting a zone. The following commands, run via the toolbox, deletes the zone if there is only one zone in the zone group. 1 2 radosgw-admin zone delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a In the other scenario, there are more than one zones in a zone group. Care must be taken when changing which zone is the master zone. Please read the following documentation before running the below commands: The following commands, run via toolboxes, remove the zone from the zone group first, then delete the zone. 1 2 3 4 radosgw-admin zonegroup rm --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin zone delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a When a zone is deleted, the pools for that zone are not deleted.","title":"Deleting Zone"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#deleting-pools-for-a-zone","text":"The Rook toolbox can delete pools. Deleting pools should be done with caution. The following documentation on pools should be read before deleting any pools. When a zone is created the following pools are created for each zone: 1 2 3 4 5 6 $ ZONE_NAME.rgw.control $ ZONE_NAME.rgw.meta $ ZONE_NAME.rgw.log $ ZONE_NAME.rgw.buckets.index $ ZONE_NAME.rgw.buckets.non-ec $ ZONE_NAME.rgw.buckets.data Here is an example command to delete the .rgw.buckets.data pool for zone-a. 1 ceph osd pool rm zone-a.rgw.buckets.data zone-a.rgw.buckets.data --yes-i-really-really-mean-it In this command the pool name must be mentioned twice for the pool to be removed.","title":"Deleting Pools for a Zone"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#removing-an-object-store-from-a-zone","text":"When an object-store (created in a zone) is deleted, the endpoint for that object store is removed from that zone, via 1 kubectl delete -f object-store.yaml Removing object store(s) from the master zone of the master zone group should be done with caution. When all of these object-stores are deleted the period cannot be updated and that realm cannot be pulled.","title":"Removing an Object Store from a Zone"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#configure-an-existing-object-store-for-multisite","text":"When an object store is configured by Rook, it internally creates a zone, zone group, and realm with the same name as the object store. To enable multisite, you will need to create the corresponding zone, zone group, and realm CRs with the same name as the object store. For example, to create multisite CRs for an object store named my-store : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : my-store namespace : rook-ceph # namespace:cluster --- apiVersion : ceph.rook.io/v1 kind : CephObjectZoneGroup metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : realm : my-store --- apiVersion : ceph.rook.io/v1 kind : CephObjectZone metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : zoneGroup : my-store metadataPool : replicated : size : 3 dataPool : replicated : size : 3 preservePoolsOnDelete : false # recommended to set this value if ingress used for exposing rgw endpoints # customEndpoints: # - \"http://rgw-a.fqdn\" Now modify the existing CephObjectStore CR to exclude pool settings and add a reference to the zone. 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : gateway : port : 80 instances : 1 zone : name : my-store","title":"Configure an Existing Object Store for Multisite"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#using-custom-names","text":"If names different from the object store need to be set for the realm, zone, or zone group, first rename them in the backend via toolbox pod, then following the procedure above. 1 2 3 4 radosgw-admin realm rename --rgw-realm=my-store --realm-new-name= radosgw-admin zonegroup rename --rgw-zonegroup=my-store --zonegroup-new-name= --rgw-realm= radosgw-admin zone rename --rgw-zone=my-store --zone-new-name= --rgw-zonegroup= --rgw-realm= radosgw-admin period update --commit Important Renaming in the toolbox must be performed before creating the multisite CRs","title":"Using custom names"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/","text":"The Ceph COSI driver provisions buckets for object storage. This document instructs on enabling the driver and consuming a bucket from a sample application. Note The Ceph COSI driver is currently in experimental mode. Prerequisites \u00b6 COSI requires: 1. A running Rook object store 2. COSI controller Deploy the COSI controller with these commands: 1 2 kubectl apply -k github.com/kubernetes-sigs/container-object-storage-interface-api kubectl apply -k github.com/kubernetes-sigs/container-object-storage-interface-controller Ceph COSI Driver \u00b6 The Ceph COSI driver will be started when the CephCOSIDriver CR is created and when the first CephObjectStore is created. 1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephCOSIDriver metadata : name : ceph-cosi-driver namespace : rook-ceph spec : deploymentStrategy : \"Auto\" 1 2 cd deploy/examples/cosi kubectl create -f cephcosidriver.yaml The driver is created in the same namespace as Rook operator. Admin Operations \u00b6 Create a Ceph Object Store User \u00b6 Create a CephObjectStoreUser to be used by the COSI driver for provisioning buckets. 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : cosi namespace : rook-ceph spec : displayName : \"cosi user\" store : my-store capabilities : bucket : \"*\" user : \"*\" 1 kubectl create -f cosi-user.yaml Above step will be automated in future by the Rook operator. Create a BucketClass and BucketAccessClass \u00b6 The BucketClass and BucketAccessClass are CRDs defined by COSI. The BucketClass defines the storage class for the bucket. The BucketAccessClass defines the access class for the bucket. The BucketClass and BucketAccessClass are defined as below: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 kind : BucketClass apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bcc driverName : ceph.objectstorage.k8s.io deletionPolicy : Delete parameters : objectStoreUserSecretName : rook-ceph-object-user-my-store-cosi objectStoreUserSecretNamespace : rook-ceph --- kind : BucketAccessClass apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bac driverName : ceph.objectstorage.k8s.io authenticationType : KEY parameters : objectStoreUserSecretName : rook-ceph-object-user-my-store-cosi objectStoreUserSecretNamespace : rook-ceph 1 kubectl create -f bucketclass.yaml -f bucketaccessclass.yaml The objectStoreUserSecretName and objectStoreUserSecretNamespace are the name and namespace of the CephObjectStoreUser created in the previous step. User Operations \u00b6 Create a Bucket \u00b6 To create a bucket, use the BucketClass to pointing the required object store and then define BucketClaim request as below: 1 2 3 4 5 6 7 8 9 kind : BucketClaim apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bc namespace : default # any namespace can be used spec : bucketClassName : sample-bcc protocols : - s3 1 kubectl create -f bucketclaim.yaml Bucket Access \u00b6 Define access to the bucket by creating the BucketAccess resource: 1 2 3 4 5 6 7 8 9 10 11 kind : BucketAccess apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-access namespace : default # any namespace can be used spec : bucketAccessClassName : sample-bac bucketClaimName : sample-bc protocol : s3 # Change to the name of the secret where access details are stored credentialsSecretName : sample-secret-name 1 kubectl create -f bucketaccess.yaml The secret will be created which contains the access details for the bucket in JSON format in the namespace of BucketAccess: 1 kubectl get secret sample-secret-name -o yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 { \"metadata\" : { \"name\" : \"bc-81733d1a-ac7a-4759-96f3-fbcc07c0cee9\" , \"creationTimestamp\" : null }, \"spec\" : { \"bucketName\" : \"sample-bcc1fc94b04-6011-45e0-a3d8-b6a093055783\" , \"authenticationType\" : \"KEY\" , \"secretS3\" : { \"endpoint\" : \"http://rook-ceph-rgw-my-store.rook-ceph.svc:80\" , \"region\" : \"us-east\" , \"accessKeyID\" : \"LI2LES8QMR9GB5SZLB02\" , \"accessSecretKey\" : \"s0WAmcn8N1eIBgNV0mjCwZWQmJiCF4B0SAzbhYCL\" }, \"secretAzure\" : null , \"protocols\" : [ \"s3\" ] } } Consuming the Bucket via secret \u00b6 To access the bucket from an application pod, mount the secret for accessing the bucket: 1 2 3 4 5 6 7 8 9 10 11 volumes : - name : cosi-secrets secret : # Set the name of the secret from the BucketAccess secretName : sample-secret-name spec : containers : - name : sample-app volumeMounts : - name : cosi-secrets mountPath : /data/cosi The Secret will be mounted in the pod in the path: /data/cosi/BucketInfo . The app must parse the JSON object to load the bucket connection details. Another approach is the json data can be parsed by the application to access the bucket via init container. Following is a sample init container which parses the json data and creates a file with the access details: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 set -e jsonfile = %s if [ -d \" $jsonfile \" ] ; then export ENDPOINT = $( jq -r '.spec.secretS3.endpoint' $jsonfile ) export BUCKET = $( jq -r '.spec.bucketName' $jsonfile ) export AWS_ACCESS_KEY_ID = $( jq -r '.spec.secretS3.accessKeyID' $jsonfile ) export AWS_SECRET_ACCESS_KEY = $( jq -r '.spec.secretS3.accessSecretKey' $jsonfile ) fi else echo \"Error: $jsonfile does not exist\" exit 1 fi 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 apiVersion : v1 kind : Pod metadata : name : sample-app namespace : rook-ceph spec : containers : - name : sample-app image : busybox command : [ \"/bin/sh\" , \"-c\" , \"sleep 3600\" ] volumeMounts : - name : cosi-secrets mountPath : /data/cosi initContainers : - name : init-cosi image : busybox command : [ \"/bin/sh\" , \"-c\" , \"setup-aws-credentials /data/cosi/BucketInfo/credentials\" ] volumeMounts : - name : cosi-secrets mountPath : /data/cosi volumes : - name : cosi-secrets secret : # Set the name of the secret from the BucketAccess secretName : sample-secret-name","title":"Container Object Storage Interface (COSI)"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#prerequisites","text":"COSI requires: 1. A running Rook object store 2. COSI controller Deploy the COSI controller with these commands: 1 2 kubectl apply -k github.com/kubernetes-sigs/container-object-storage-interface-api kubectl apply -k github.com/kubernetes-sigs/container-object-storage-interface-controller","title":"Prerequisites"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#ceph-cosi-driver","text":"The Ceph COSI driver will be started when the CephCOSIDriver CR is created and when the first CephObjectStore is created. 1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephCOSIDriver metadata : name : ceph-cosi-driver namespace : rook-ceph spec : deploymentStrategy : \"Auto\" 1 2 cd deploy/examples/cosi kubectl create -f cephcosidriver.yaml The driver is created in the same namespace as Rook operator.","title":"Ceph COSI Driver"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#admin-operations","text":"","title":"Admin Operations"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#create-a-ceph-object-store-user","text":"Create a CephObjectStoreUser to be used by the COSI driver for provisioning buckets. 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : cosi namespace : rook-ceph spec : displayName : \"cosi user\" store : my-store capabilities : bucket : \"*\" user : \"*\" 1 kubectl create -f cosi-user.yaml Above step will be automated in future by the Rook operator.","title":"Create a Ceph Object Store User"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#create-a-bucketclass-and-bucketaccessclass","text":"The BucketClass and BucketAccessClass are CRDs defined by COSI. The BucketClass defines the storage class for the bucket. The BucketAccessClass defines the access class for the bucket. The BucketClass and BucketAccessClass are defined as below: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 kind : BucketClass apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bcc driverName : ceph.objectstorage.k8s.io deletionPolicy : Delete parameters : objectStoreUserSecretName : rook-ceph-object-user-my-store-cosi objectStoreUserSecretNamespace : rook-ceph --- kind : BucketAccessClass apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bac driverName : ceph.objectstorage.k8s.io authenticationType : KEY parameters : objectStoreUserSecretName : rook-ceph-object-user-my-store-cosi objectStoreUserSecretNamespace : rook-ceph 1 kubectl create -f bucketclass.yaml -f bucketaccessclass.yaml The objectStoreUserSecretName and objectStoreUserSecretNamespace are the name and namespace of the CephObjectStoreUser created in the previous step.","title":"Create a BucketClass and BucketAccessClass"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#user-operations","text":"","title":"User Operations"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#create-a-bucket","text":"To create a bucket, use the BucketClass to pointing the required object store and then define BucketClaim request as below: 1 2 3 4 5 6 7 8 9 kind : BucketClaim apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bc namespace : default # any namespace can be used spec : bucketClassName : sample-bcc protocols : - s3 1 kubectl create -f bucketclaim.yaml","title":"Create a Bucket"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#bucket-access","text":"Define access to the bucket by creating the BucketAccess resource: 1 2 3 4 5 6 7 8 9 10 11 kind : BucketAccess apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-access namespace : default # any namespace can be used spec : bucketAccessClassName : sample-bac bucketClaimName : sample-bc protocol : s3 # Change to the name of the secret where access details are stored credentialsSecretName : sample-secret-name 1 kubectl create -f bucketaccess.yaml The secret will be created which contains the access details for the bucket in JSON format in the namespace of BucketAccess: 1 kubectl get secret sample-secret-name -o yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 { \"metadata\" : { \"name\" : \"bc-81733d1a-ac7a-4759-96f3-fbcc07c0cee9\" , \"creationTimestamp\" : null }, \"spec\" : { \"bucketName\" : \"sample-bcc1fc94b04-6011-45e0-a3d8-b6a093055783\" , \"authenticationType\" : \"KEY\" , \"secretS3\" : { \"endpoint\" : \"http://rook-ceph-rgw-my-store.rook-ceph.svc:80\" , \"region\" : \"us-east\" , \"accessKeyID\" : \"LI2LES8QMR9GB5SZLB02\" , \"accessSecretKey\" : \"s0WAmcn8N1eIBgNV0mjCwZWQmJiCF4B0SAzbhYCL\" }, \"secretAzure\" : null , \"protocols\" : [ \"s3\" ] } }","title":"Bucket Access"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#consuming-the-bucket-via-secret","text":"To access the bucket from an application pod, mount the secret for accessing the bucket: 1 2 3 4 5 6 7 8 9 10 11 volumes : - name : cosi-secrets secret : # Set the name of the secret from the BucketAccess secretName : sample-secret-name spec : containers : - name : sample-app volumeMounts : - name : cosi-secrets mountPath : /data/cosi The Secret will be mounted in the pod in the path: /data/cosi/BucketInfo . The app must parse the JSON object to load the bucket connection details. Another approach is the json data can be parsed by the application to access the bucket via init container. Following is a sample init container which parses the json data and creates a file with the access details: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 set -e jsonfile = %s if [ -d \" $jsonfile \" ] ; then export ENDPOINT = $( jq -r '.spec.secretS3.endpoint' $jsonfile ) export BUCKET = $( jq -r '.spec.bucketName' $jsonfile ) export AWS_ACCESS_KEY_ID = $( jq -r '.spec.secretS3.accessKeyID' $jsonfile ) export AWS_SECRET_ACCESS_KEY = $( jq -r '.spec.secretS3.accessSecretKey' $jsonfile ) fi else echo \"Error: $jsonfile does not exist\" exit 1 fi 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 apiVersion : v1 kind : Pod metadata : name : sample-app namespace : rook-ceph spec : containers : - name : sample-app image : busybox command : [ \"/bin/sh\" , \"-c\" , \"sleep 3600\" ] volumeMounts : - name : cosi-secrets mountPath : /data/cosi initContainers : - name : init-cosi image : busybox command : [ \"/bin/sh\" , \"-c\" , \"setup-aws-credentials /data/cosi/BucketInfo/credentials\" ] volumeMounts : - name : cosi-secrets mountPath : /data/cosi volumes : - name : cosi-secrets secret : # Set the name of the secret from the BucketAccess secretName : sample-secret-name","title":"Consuming the Bucket via secret"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/","text":"Object storage exposes an S3 API to the storage cluster for applications to put and get data. Prerequisites \u00b6 This guide assumes a Rook cluster as explained in the Quickstart . Configure an Object Store \u00b6 Rook has the ability to either deploy an object store in Kubernetes or to connect to an external RGW service. Most commonly, the object store will be configured locally by Rook. Alternatively, if you have an existing Ceph cluster with Rados Gateways, see the external section to consume it from Rook. Create a Local Object Store \u00b6 The below sample will create a CephObjectStore that starts the RGW service in the cluster with an S3 API. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). See the Object Store CRD , for more detail on the settings available for a CephObjectStore . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : host erasureCoded : dataChunks : 2 codingChunks : 1 preservePoolsOnDelete : true gateway : sslCertificateRef : port : 80 # securePort: 443 instances : 1 After the CephObjectStore is created, the Rook operator will then create all the pools and other resources necessary to start the service. This may take a minute to complete. Create an object store: 1 kubectl create -f object.yaml To confirm the object store is configured, wait for the RGW pod(s) to start: 1 kubectl -n rook-ceph get pod -l app=rook-ceph-rgw Connect to an External Object Store \u00b6 Rook can connect to existing RGW gateways to work in conjunction with the external mode of the CephCluster CRD. First, create a rgw-admin-ops-user user in the Ceph cluster with the necessary caps: 1 radosgw-admin user create --uid=rgw-admin-ops-user --display-name=\"RGW Admin Ops User\" --caps=\"buckets=*;users=*;usage=read;metadata=read;zone=read\" --rgw-realm= --rgw-zonegroup= --rgw-zone= The rgw-admin-ops-user user is required by the Rook operator to manage buckets and users via the admin ops and s3 api. The multisite configuration needs to be specified only if the admin sets up multisite for RGW. Then create a secret with the user credentials: 1 kubectl -n rook-ceph create secret generic --type=\"kubernetes.io/rook\" rgw-admin-ops-user --from-literal=accessKey= --from-literal=secretKey= If you have an external CephCluster CR, you can instruct Rook to consume external gateways with the following: 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : external-store namespace : rook-ceph spec : gateway : port : 8080 externalRgwEndpoints : - ip : 192.168.39.182 # hostname: example.com Use the existing object-external.yaml file. Even though multiple endpoints can be specified, it is recommend to use only one endpoint. This endpoint is randomly added to configmap of OBC and secret of the cephobjectstoreuser . Rook never guarantees the randomly picked endpoint is a working one or not. If there are multiple endpoints, please add load balancer in front of them and use the load balancer endpoint in the externalRgwEndpoints list. When ready, the message in the cephobjectstore status similar to this one: 1 2 3 kubectl -n rook-ceph get cephobjectstore external-store NAME PHASE external-store Ready Any pod from your cluster can now access this endpoint: 1 2 $ curl 10 .100.28.138:8080 anonymous Create a Bucket \u00b6 Info This document is a guide for creating bucket with an Object Bucket Claim (OBC). To create a bucket with the experimental COSI Driver, see the COSI documentation . Now that the object store is configured, next we need to create a bucket where a client can read and write objects. A bucket can be created by defining a storage class, similar to the pattern used by block and file storage. First, define the storage class that will allow object clients to create a bucket. The storage class defines the object storage system, the bucket retention policy, and other properties required by the administrator. Save the following as storageclass-bucket-delete.yaml (the example is named as such due to the Delete reclaim policy). 1 2 3 4 5 6 7 8 9 10 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-bucket # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.ceph.rook.io/bucket reclaimPolicy : Delete parameters : objectStoreName : my-store objectStoreNamespace : rook-ceph If you\u2019ve deployed the Rook operator in a namespace other than rook-ceph , change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace my-namespace the provisioner value should be my-namespace.ceph.rook.io/bucket . 1 kubectl create -f storageclass-bucket-delete.yaml Based on this storage class, an object client can now request a bucket by creating an Object Bucket Claim (OBC). When the OBC is created, the Rook bucket provisioner will create a new bucket. Notice that the OBC references the storage class that was created above. Save the following as object-bucket-claim-delete.yaml (the example is named as such due to the Delete reclaim policy): 1 2 3 4 5 6 7 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : name : ceph-bucket spec : generateBucketName : ceph-bkt storageClassName : rook-ceph-bucket 1 kubectl create -f object-bucket-claim-delete.yaml Now that the claim is created, the operator will create the bucket as well as generate other artifacts to enable access to the bucket. A secret and ConfigMap are created with the same name as the OBC and in the same namespace. The secret contains credentials used by the application pod to access the bucket. The ConfigMap contains bucket endpoint information and is also consumed by the pod. See the Object Bucket Claim Documentation for more details on the CephObjectBucketClaims . Client Connections \u00b6 The following commands extract key pieces of information from the secret and configmap:\" 1 2 3 4 5 6 # config-map, secret, OBC will part of default if no specific name space mentioned export AWS_HOST=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_HOST}') export PORT=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_PORT}') export BUCKET_NAME=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_NAME}') export AWS_ACCESS_KEY_ID=$(kubectl -n default get secret ceph-bucket -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 --decode) export AWS_SECRET_ACCESS_KEY=$(kubectl -n default get secret ceph-bucket -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 --decode) Consume the Object Storage \u00b6 Now that you have the object store configured and a bucket created, you can consume the object storage from an S3 client. This section will guide you through testing the connection to the CephObjectStore and uploading and downloading from it. Run the following commands after you have connected to the Rook toolbox . Connection Environment Variables \u00b6 To simplify the s3 client commands, you will want to set the four environment variables for use by your client (ie. inside the toolbox). See above for retrieving the variables for a bucket created by an ObjectBucketClaim . 1 2 3 4 export AWS_HOST= export PORT= export AWS_ACCESS_KEY_ID= export AWS_SECRET_ACCESS_KEY= Host : The DNS host name where the rgw service is found in the cluster. Assuming you are using the default rook-ceph cluster, it will be rook-ceph-rgw-my-store.rook-ceph.svc . Port : The endpoint where the rgw service is listening. Run kubectl -n rook-ceph get svc rook-ceph-rgw-my-store , to get the port. Access key : The user's access_key as printed above Secret key : The user's secret_key as printed above The variables for the user generated in this example might be: 1 2 3 4 export AWS_HOST=rook-ceph-rgw-my-store.rook-ceph.svc export PORT=80 export AWS_ACCESS_KEY_ID=XEZDB3UJ6X7HVBE7X7MA export AWS_SECRET_ACCESS_KEY=7yGIZON7EhFORz0I40BFniML36D2rl8CQQ5kXU6l The access key and secret key can be retrieved as described in the section above on client connections or below in the section creating a user if you are not creating the buckets with an ObjectBucketClaim . Configure s5cmd \u00b6 To test the CephObjectStore , set the object store credentials in the toolbox pod that contains the s5cmd tool. Important The default toolbox.yaml does not contain the s5cmd. The toolbox must be started with the rook operator image (toolbox-operator-image), which does contain s5cmd. 1 2 3 4 5 6 7 kubectl create -f deploy/examples/toolbox-operator-image.yaml mkdir ~/.aws cat > ~/.aws/credentials << EOF [default] aws_access_key_id = ${AWS_ACCESS_KEY_ID} aws_secret_access_key = ${AWS_SECRET_ACCESS_KEY} EOF PUT or GET an object \u00b6 Upload a file to the newly created bucket 1 2 echo \"Hello Rook\" > /tmp/rookObj s5cmd --endpoint-url http://$AWS_HOST:$PORT cp /tmp/rookObj s3://$BUCKET_NAME Download and verify the file from the bucket 1 2 s5cmd --endpoint-url http://$AWS_HOST:$PORT cp s3://$BUCKET_NAME/rookObj /tmp/rookObj-download cat /tmp/rookObj-download Monitoring health \u00b6 Rook configures health probes on the deployment created for CephObjectStore gateways. Refer to the CRD document for information about configuring the probes and monitoring the deployment status. Access External to the Cluster \u00b6 Rook sets up the object storage so pods will have access internal to the cluster. If your applications are running outside the cluster, you will need to setup an external service through a NodePort . First, note the service that exposes RGW internal to the cluster. We will leave this service intact and create a new service for external access. 1 2 3 $ kubectl -n rook-ceph get service rook-ceph-rgw-my-store NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-rgw-my-store 10.3.0.177  80/TCP 2m Save the external service as rgw-external.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : v1 kind : Service metadata : name : rook-ceph-rgw-my-store-external namespace : rook-ceph labels : app : rook-ceph-rgw rook_cluster : rook-ceph rook_object_store : my-store spec : ports : - name : rgw port : 80 protocol : TCP targetPort : 80 selector : app : rook-ceph-rgw rook_cluster : rook-ceph rook_object_store : my-store sessionAffinity : None type : NodePort Now create the external service. 1 kubectl create -f rgw-external.yaml See both rgw services running and notice what port the external service is running on: 1 2 3 4 $ kubectl -n rook-ceph get service rook-ceph-rgw-my-store rook-ceph-rgw-my-store-external NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-rgw-my-store ClusterIP 10.104.82.228  80/TCP 4m rook-ceph-rgw-my-store-external NodePort 10.111.113.237  80:31536/TCP 39s Internally the rgw service is running on port 80 . The external port in this case is 31536 . Now you can access the CephObjectStore from anywhere! All you need is the hostname for any machine in the cluster, the external port, and the user credentials. Create a User \u00b6 If you need to create an independent set of user credentials to access the S3 endpoint, create a CephObjectStoreUser . The user will be used to connect to the RGW service in the cluster using the S3 API. The user will be independent of any object bucket claims that you might have created in the earlier instructions in this document. See the Object Store User CRD for more detail on the settings available for a CephObjectStoreUser . 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : my-user namespace : rook-ceph spec : store : my-store displayName : \"my display name\" When the CephObjectStoreUser is created, the Rook operator will then create the RGW user on the specified CephObjectStore and store the Access Key and Secret Key in a kubernetes secret in the same namespace as the CephObjectStoreUser . 1 2 # Create the object store user kubectl create -f object-user.yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 # To confirm the object store user is configured, describe the secret $ kubectl -n rook-ceph describe secret rook-ceph-object-user-my-store-my-user Name: rook-ceph-object-user-my-store-my-user Namespace: rook-ceph Labels: app=rook-ceph-rgw rook_cluster=rook-ceph rook_object_store=my-store Annotations:  Type: kubernetes.io/rook Data ==== AccessKey: 20 bytes SecretKey: 40 bytes The AccessKey and SecretKey data fields can be mounted in a pod as an environment variable. More information on consuming kubernetes secrets can be found in the K8s secret documentation To directly retrieve the secrets: 1 2 kubectl -n rook-ceph get secret rook-ceph-object-user-my-store-my-user -o jsonpath='{.data.AccessKey}' | base64 --decode kubectl -n rook-ceph get secret rook-ceph-object-user-my-store-my-user -o jsonpath='{.data.SecretKey}' | base64 --decode Object Multisite \u00b6 Multisite is a feature of Ceph that allows object stores to replicate its data over multiple Ceph clusters. Multisite also allows object stores to be independent and isolated from other object stores in a cluster. For more information on multisite please read the ceph multisite overview for how to run it.","title":"Object Storage Overview"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#prerequisites","text":"This guide assumes a Rook cluster as explained in the Quickstart .","title":"Prerequisites"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#configure-an-object-store","text":"Rook has the ability to either deploy an object store in Kubernetes or to connect to an external RGW service. Most commonly, the object store will be configured locally by Rook. Alternatively, if you have an existing Ceph cluster with Rados Gateways, see the external section to consume it from Rook.","title":"Configure an Object Store"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#create-a-local-object-store","text":"The below sample will create a CephObjectStore that starts the RGW service in the cluster with an S3 API. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). See the Object Store CRD , for more detail on the settings available for a CephObjectStore . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : host erasureCoded : dataChunks : 2 codingChunks : 1 preservePoolsOnDelete : true gateway : sslCertificateRef : port : 80 # securePort: 443 instances : 1 After the CephObjectStore is created, the Rook operator will then create all the pools and other resources necessary to start the service. This may take a minute to complete. Create an object store: 1 kubectl create -f object.yaml To confirm the object store is configured, wait for the RGW pod(s) to start: 1 kubectl -n rook-ceph get pod -l app=rook-ceph-rgw","title":"Create a Local Object Store"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#connect-to-an-external-object-store","text":"Rook can connect to existing RGW gateways to work in conjunction with the external mode of the CephCluster CRD. First, create a rgw-admin-ops-user user in the Ceph cluster with the necessary caps: 1 radosgw-admin user create --uid=rgw-admin-ops-user --display-name=\"RGW Admin Ops User\" --caps=\"buckets=*;users=*;usage=read;metadata=read;zone=read\" --rgw-realm= --rgw-zonegroup= --rgw-zone= The rgw-admin-ops-user user is required by the Rook operator to manage buckets and users via the admin ops and s3 api. The multisite configuration needs to be specified only if the admin sets up multisite for RGW. Then create a secret with the user credentials: 1 kubectl -n rook-ceph create secret generic --type=\"kubernetes.io/rook\" rgw-admin-ops-user --from-literal=accessKey= --from-literal=secretKey= If you have an external CephCluster CR, you can instruct Rook to consume external gateways with the following: 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : external-store namespace : rook-ceph spec : gateway : port : 8080 externalRgwEndpoints : - ip : 192.168.39.182 # hostname: example.com Use the existing object-external.yaml file. Even though multiple endpoints can be specified, it is recommend to use only one endpoint. This endpoint is randomly added to configmap of OBC and secret of the cephobjectstoreuser . Rook never guarantees the randomly picked endpoint is a working one or not. If there are multiple endpoints, please add load balancer in front of them and use the load balancer endpoint in the externalRgwEndpoints list. When ready, the message in the cephobjectstore status similar to this one: 1 2 3 kubectl -n rook-ceph get cephobjectstore external-store NAME PHASE external-store Ready Any pod from your cluster can now access this endpoint: 1 2 $ curl 10 .100.28.138:8080 anonymous","title":"Connect to an External Object Store"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#create-a-bucket","text":"Info This document is a guide for creating bucket with an Object Bucket Claim (OBC). To create a bucket with the experimental COSI Driver, see the COSI documentation . Now that the object store is configured, next we need to create a bucket where a client can read and write objects. A bucket can be created by defining a storage class, similar to the pattern used by block and file storage. First, define the storage class that will allow object clients to create a bucket. The storage class defines the object storage system, the bucket retention policy, and other properties required by the administrator. Save the following as storageclass-bucket-delete.yaml (the example is named as such due to the Delete reclaim policy). 1 2 3 4 5 6 7 8 9 10 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-bucket # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.ceph.rook.io/bucket reclaimPolicy : Delete parameters : objectStoreName : my-store objectStoreNamespace : rook-ceph If you\u2019ve deployed the Rook operator in a namespace other than rook-ceph , change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace my-namespace the provisioner value should be my-namespace.ceph.rook.io/bucket . 1 kubectl create -f storageclass-bucket-delete.yaml Based on this storage class, an object client can now request a bucket by creating an Object Bucket Claim (OBC). When the OBC is created, the Rook bucket provisioner will create a new bucket. Notice that the OBC references the storage class that was created above. Save the following as object-bucket-claim-delete.yaml (the example is named as such due to the Delete reclaim policy): 1 2 3 4 5 6 7 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : name : ceph-bucket spec : generateBucketName : ceph-bkt storageClassName : rook-ceph-bucket 1 kubectl create -f object-bucket-claim-delete.yaml Now that the claim is created, the operator will create the bucket as well as generate other artifacts to enable access to the bucket. A secret and ConfigMap are created with the same name as the OBC and in the same namespace. The secret contains credentials used by the application pod to access the bucket. The ConfigMap contains bucket endpoint information and is also consumed by the pod. See the Object Bucket Claim Documentation for more details on the CephObjectBucketClaims .","title":"Create a Bucket"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#client-connections","text":"The following commands extract key pieces of information from the secret and configmap:\" 1 2 3 4 5 6 # config-map, secret, OBC will part of default if no specific name space mentioned export AWS_HOST=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_HOST}') export PORT=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_PORT}') export BUCKET_NAME=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_NAME}') export AWS_ACCESS_KEY_ID=$(kubectl -n default get secret ceph-bucket -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 --decode) export AWS_SECRET_ACCESS_KEY=$(kubectl -n default get secret ceph-bucket -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 --decode)","title":"Client Connections"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#consume-the-object-storage","text":"Now that you have the object store configured and a bucket created, you can consume the object storage from an S3 client. This section will guide you through testing the connection to the CephObjectStore and uploading and downloading from it. Run the following commands after you have connected to the Rook toolbox .","title":"Consume the Object Storage"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#connection-environment-variables","text":"To simplify the s3 client commands, you will want to set the four environment variables for use by your client (ie. inside the toolbox). See above for retrieving the variables for a bucket created by an ObjectBucketClaim . 1 2 3 4 export AWS_HOST= export PORT= export AWS_ACCESS_KEY_ID= export AWS_SECRET_ACCESS_KEY= Host : The DNS host name where the rgw service is found in the cluster. Assuming you are using the default rook-ceph cluster, it will be rook-ceph-rgw-my-store.rook-ceph.svc . Port : The endpoint where the rgw service is listening. Run kubectl -n rook-ceph get svc rook-ceph-rgw-my-store , to get the port. Access key : The user's access_key as printed above Secret key : The user's secret_key as printed above The variables for the user generated in this example might be: 1 2 3 4 export AWS_HOST=rook-ceph-rgw-my-store.rook-ceph.svc export PORT=80 export AWS_ACCESS_KEY_ID=XEZDB3UJ6X7HVBE7X7MA export AWS_SECRET_ACCESS_KEY=7yGIZON7EhFORz0I40BFniML36D2rl8CQQ5kXU6l The access key and secret key can be retrieved as described in the section above on client connections or below in the section creating a user if you are not creating the buckets with an ObjectBucketClaim .","title":"Connection Environment Variables"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#configure-s5cmd","text":"To test the CephObjectStore , set the object store credentials in the toolbox pod that contains the s5cmd tool. Important The default toolbox.yaml does not contain the s5cmd. The toolbox must be started with the rook operator image (toolbox-operator-image), which does contain s5cmd. 1 2 3 4 5 6 7 kubectl create -f deploy/examples/toolbox-operator-image.yaml mkdir ~/.aws cat > ~/.aws/credentials << EOF [default] aws_access_key_id = ${AWS_ACCESS_KEY_ID} aws_secret_access_key = ${AWS_SECRET_ACCESS_KEY} EOF","title":"Configure s5cmd"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#put-or-get-an-object","text":"Upload a file to the newly created bucket 1 2 echo \"Hello Rook\" > /tmp/rookObj s5cmd --endpoint-url http://$AWS_HOST:$PORT cp /tmp/rookObj s3://$BUCKET_NAME Download and verify the file from the bucket 1 2 s5cmd --endpoint-url http://$AWS_HOST:$PORT cp s3://$BUCKET_NAME/rookObj /tmp/rookObj-download cat /tmp/rookObj-download","title":"PUT or GET an object"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#monitoring-health","text":"Rook configures health probes on the deployment created for CephObjectStore gateways. Refer to the CRD document for information about configuring the probes and monitoring the deployment status.","title":"Monitoring health"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#access-external-to-the-cluster","text":"Rook sets up the object storage so pods will have access internal to the cluster. If your applications are running outside the cluster, you will need to setup an external service through a NodePort . First, note the service that exposes RGW internal to the cluster. We will leave this service intact and create a new service for external access. 1 2 3 $ kubectl -n rook-ceph get service rook-ceph-rgw-my-store NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-rgw-my-store 10.3.0.177  80/TCP 2m Save the external service as rgw-external.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : v1 kind : Service metadata : name : rook-ceph-rgw-my-store-external namespace : rook-ceph labels : app : rook-ceph-rgw rook_cluster : rook-ceph rook_object_store : my-store spec : ports : - name : rgw port : 80 protocol : TCP targetPort : 80 selector : app : rook-ceph-rgw rook_cluster : rook-ceph rook_object_store : my-store sessionAffinity : None type : NodePort Now create the external service. 1 kubectl create -f rgw-external.yaml See both rgw services running and notice what port the external service is running on: 1 2 3 4 $ kubectl -n rook-ceph get service rook-ceph-rgw-my-store rook-ceph-rgw-my-store-external NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-rgw-my-store ClusterIP 10.104.82.228  80/TCP 4m rook-ceph-rgw-my-store-external NodePort 10.111.113.237  80:31536/TCP 39s Internally the rgw service is running on port 80 . The external port in this case is 31536 . Now you can access the CephObjectStore from anywhere! All you need is the hostname for any machine in the cluster, the external port, and the user credentials.","title":"Access External to the Cluster"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#create-a-user","text":"If you need to create an independent set of user credentials to access the S3 endpoint, create a CephObjectStoreUser . The user will be used to connect to the RGW service in the cluster using the S3 API. The user will be independent of any object bucket claims that you might have created in the earlier instructions in this document. See the Object Store User CRD for more detail on the settings available for a CephObjectStoreUser . 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : my-user namespace : rook-ceph spec : store : my-store displayName : \"my display name\" When the CephObjectStoreUser is created, the Rook operator will then create the RGW user on the specified CephObjectStore and store the Access Key and Secret Key in a kubernetes secret in the same namespace as the CephObjectStoreUser . 1 2 # Create the object store user kubectl create -f object-user.yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 # To confirm the object store user is configured, describe the secret $ kubectl -n rook-ceph describe secret rook-ceph-object-user-my-store-my-user Name: rook-ceph-object-user-my-store-my-user Namespace: rook-ceph Labels: app=rook-ceph-rgw rook_cluster=rook-ceph rook_object_store=my-store Annotations:  Type: kubernetes.io/rook Data ==== AccessKey: 20 bytes SecretKey: 40 bytes The AccessKey and SecretKey data fields can be mounted in a pod as an environment variable. More information on consuming kubernetes secrets can be found in the K8s secret documentation To directly retrieve the secrets: 1 2 kubectl -n rook-ceph get secret rook-ceph-object-user-my-store-my-user -o jsonpath='{.data.AccessKey}' | base64 --decode kubectl -n rook-ceph get secret rook-ceph-object-user-my-store-my-user -o jsonpath='{.data.SecretKey}' | base64 --decode","title":"Create a User"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#object-multisite","text":"Multisite is a feature of Ceph that allows object stores to replicate its data over multiple Ceph clusters. Multisite also allows object stores to be independent and isolated from other object stores in a cluster. For more information on multisite please read the ceph multisite overview for how to run it.","title":"Object Multisite"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/","text":"Ceph filesystem mirroring is a process of asynchronous replication of snapshots to a remote CephFS file system. Snapshots are synchronized by mirroring snapshot data followed by creating a snapshot with the same name (for a given directory on the remote file system) as the snapshot being synchronized. It is generally useful when planning for Disaster Recovery. Mirroring is for clusters that are geographically distributed and stretching a single cluster is not possible due to high latencies. Prerequisites \u00b6 This guide assumes you have created a Rook cluster as explained in the main quickstart guide Create the Filesystem with Mirroring enabled \u00b6 The following will enable mirroring on the filesystem: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPools : - name : replicated failureDomain : host replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true mirroring : enabled : true # list of Kubernetes Secrets containing the peer token # for more details see: https://docs.ceph.com/en/latest/dev/cephfs-mirroring/#bootstrap-peers # Add the secret name if it already exists else specify the empty list here. peers : secretNames : #- secondary-cluster-peer # specify the schedule(s) on which snapshots should be taken # see the official syntax here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-schedules snapshotSchedules : - path : / interval : 24h # daily snapshots # The startTime should be mentioned in the format YYYY-MM-DDTHH:MM:SS # If startTime is not specified, then by default the start time is considered as midnight UTC. # see usage here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#usage # startTime: 2022-07-15T11:55:00 # manage retention policies # see syntax duration here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-retention-policies snapshotRetention : - path : / duration : \"h 24\" Create the cephfs-mirror daemon \u00b6 Launch the rook-ceph-fs-mirror pod on the source storage cluster, which deploys the cephfs-mirror daemon in the cluster: 1 kubectl create -f deploy/examples/filesystem-mirror.yaml Please refer to Filesystem Mirror CRD for more information. Configuring mirroring peers \u00b6 Once mirroring is enabled, Rook will by default create its own bootstrap peer token so that it can be used by another cluster. The bootstrap peer token can be found in a Kubernetes Secret. The name of the Secret is present in the Status field of the CephFilesystem CR: 1 2 3 status : info : fsMirrorBootstrapPeerSecretName : fs-peer-token-myfs This secret can then be fetched like so: 1 2 # kubectl get secret -n rook-ceph fs-peer-token-myfs -o jsonpath = '{.data.token}' | base64 -d eyJmc2lkIjoiOTFlYWUwZGQtMDZiMS00ZDJjLTkxZjMtMTMxMWM5ZGYzODJiIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFEN1psOWZ3V1VGRHhBQWdmY0gyZi8xeUhYeGZDUTU5L1N0NEE9PSIsIm1vbl9ob3N0IjoiW3YyOjEwLjEwMS4xOC4yMjM6MzMwMCx2MToxMC4xMDEuMTguMjIzOjY3ODldIn0= Import the token in the Destination cluster \u00b6 The decoded secret must be saved in a file before importing. 1 # ceph fs snapshot mirror peer_bootstrap import   See the CephFS mirror documentation on how to add a bootstrap peer . Further refer to CephFS mirror documentation to configure a directory for snapshot mirroring . Verify that the snapshots have synced \u00b6 To check the mirror daemon status , please run the following command from the toolbox pod. For example : 1 # ceph fs snapshot mirror daemon status | jq 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 [ { \"daemon_id\" : 906790 , \"filesystems\" : [ { \"filesystem_id\" : 1 , \"name\" : \"myfs\" , \"directory_count\" : 1 , \"peers\" : [ { \"uuid\" : \"a24a3366-8130-4d55-aada-95fa9d3ff94d\" , \"remote\" : { \"client_name\" : \"client.mirror\" , \"cluster_name\" : \"91046889-a6aa-4f74-9fb0-f7bb111666b4\" , \"fs_name\" : \"myfs\" }, \"stats\" : { \"failure_count\" : 0 , \"recovery_count\" : 0 } } ] } ] } ] Please refer to the --admin-daemon socket commands from the CephFS mirror documentation to verify mirror status and peer synchronization status and run the commands from the rook-ceph-fs-mirror pod: 1 # kubectl -n rook-ceph exec -it deploy/rook-ceph-fs-mirror -- bash Fetch the ceph-client.fs-mirror daemon admin socket file from the /var/run/ceph directory: 1 # ls -lhsa /var/run/ceph/ 1 # ceph --admin-daemon /var/run/ceph/ceph-client.fs-mirror.1.93989418120648.asok fs mirror status myfs@1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 { \"rados_inst\" : \"X.X.X.X:0/2286593433\" , \"peers\" : { \"a24a3366-8130-4d55-aada-95fa9d3ff94d\" : { \"remote\" : { \"client_name\" : \"client.mirror\" , \"cluster_name\" : \"91046889-a6aa-4f74-9fb0-f7bb111666b4\" , \"fs_name\" : \"myfs\" } } }, \"snap_dirs\" : { \"dir_count\" : 1 } } For getting peer synchronization status : 1 # ceph --admin-daemon /var/run/ceph/ceph-client.fs-mirror.1.93989418120648.asok fs mirror peer status myfs@1 a24a3366-8130-4d55-aada-95fa9d3ff94d 1 2 3 4 5 6 7 8 9 10 11 12 { \"/volumes/_nogroup/subvol-1\" : { \"state\" : \"idle\" , \"last_synced_snap\" : { \"id\" : 4 , \"name\" : \"snap2\" }, \"snaps_synced\" : 0 , \"snaps_deleted\" : 0 , \"snaps_renamed\" : 0 } }","title":"Filesystem Mirroring"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#prerequisites","text":"This guide assumes you have created a Rook cluster as explained in the main quickstart guide","title":"Prerequisites"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#create-the-filesystem-with-mirroring-enabled","text":"The following will enable mirroring on the filesystem: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPools : - name : replicated failureDomain : host replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true mirroring : enabled : true # list of Kubernetes Secrets containing the peer token # for more details see: https://docs.ceph.com/en/latest/dev/cephfs-mirroring/#bootstrap-peers # Add the secret name if it already exists else specify the empty list here. peers : secretNames : #- secondary-cluster-peer # specify the schedule(s) on which snapshots should be taken # see the official syntax here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-schedules snapshotSchedules : - path : / interval : 24h # daily snapshots # The startTime should be mentioned in the format YYYY-MM-DDTHH:MM:SS # If startTime is not specified, then by default the start time is considered as midnight UTC. # see usage here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#usage # startTime: 2022-07-15T11:55:00 # manage retention policies # see syntax duration here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-retention-policies snapshotRetention : - path : / duration : \"h 24\"","title":"Create the Filesystem with Mirroring enabled"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#create-the-cephfs-mirror-daemon","text":"Launch the rook-ceph-fs-mirror pod on the source storage cluster, which deploys the cephfs-mirror daemon in the cluster: 1 kubectl create -f deploy/examples/filesystem-mirror.yaml Please refer to Filesystem Mirror CRD for more information.","title":"Create the cephfs-mirror daemon"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#configuring-mirroring-peers","text":"Once mirroring is enabled, Rook will by default create its own bootstrap peer token so that it can be used by another cluster. The bootstrap peer token can be found in a Kubernetes Secret. The name of the Secret is present in the Status field of the CephFilesystem CR: 1 2 3 status : info : fsMirrorBootstrapPeerSecretName : fs-peer-token-myfs This secret can then be fetched like so: 1 2 # kubectl get secret -n rook-ceph fs-peer-token-myfs -o jsonpath = '{.data.token}' | base64 -d eyJmc2lkIjoiOTFlYWUwZGQtMDZiMS00ZDJjLTkxZjMtMTMxMWM5ZGYzODJiIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFEN1psOWZ3V1VGRHhBQWdmY0gyZi8xeUhYeGZDUTU5L1N0NEE9PSIsIm1vbl9ob3N0IjoiW3YyOjEwLjEwMS4xOC4yMjM6MzMwMCx2MToxMC4xMDEuMTguMjIzOjY3ODldIn0=","title":"Configuring mirroring peers"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#import-the-token-in-the-destination-cluster","text":"The decoded secret must be saved in a file before importing. 1 # ceph fs snapshot mirror peer_bootstrap import   See the CephFS mirror documentation on how to add a bootstrap peer . Further refer to CephFS mirror documentation to configure a directory for snapshot mirroring .","title":"Import the token in the Destination cluster"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#verify-that-the-snapshots-have-synced","text":"To check the mirror daemon status , please run the following command from the toolbox pod. For example : 1 # ceph fs snapshot mirror daemon status | jq 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 [ { \"daemon_id\" : 906790 , \"filesystems\" : [ { \"filesystem_id\" : 1 , \"name\" : \"myfs\" , \"directory_count\" : 1 , \"peers\" : [ { \"uuid\" : \"a24a3366-8130-4d55-aada-95fa9d3ff94d\" , \"remote\" : { \"client_name\" : \"client.mirror\" , \"cluster_name\" : \"91046889-a6aa-4f74-9fb0-f7bb111666b4\" , \"fs_name\" : \"myfs\" }, \"stats\" : { \"failure_count\" : 0 , \"recovery_count\" : 0 } } ] } ] } ] Please refer to the --admin-daemon socket commands from the CephFS mirror documentation to verify mirror status and peer synchronization status and run the commands from the rook-ceph-fs-mirror pod: 1 # kubectl -n rook-ceph exec -it deploy/rook-ceph-fs-mirror -- bash Fetch the ceph-client.fs-mirror daemon admin socket file from the /var/run/ceph directory: 1 # ls -lhsa /var/run/ceph/ 1 # ceph --admin-daemon /var/run/ceph/ceph-client.fs-mirror.1.93989418120648.asok fs mirror status myfs@1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 { \"rados_inst\" : \"X.X.X.X:0/2286593433\" , \"peers\" : { \"a24a3366-8130-4d55-aada-95fa9d3ff94d\" : { \"remote\" : { \"client_name\" : \"client.mirror\" , \"cluster_name\" : \"91046889-a6aa-4f74-9fb0-f7bb111666b4\" , \"fs_name\" : \"myfs\" } } }, \"snap_dirs\" : { \"dir_count\" : 1 } } For getting peer synchronization status : 1 # ceph --admin-daemon /var/run/ceph/ceph-client.fs-mirror.1.93989418120648.asok fs mirror peer status myfs@1 a24a3366-8130-4d55-aada-95fa9d3ff94d 1 2 3 4 5 6 7 8 9 10 11 12 { \"/volumes/_nogroup/subvol-1\" : { \"state\" : \"idle\" , \"last_synced_snap\" : { \"id\" : 4 , \"name\" : \"snap2\" }, \"snaps_synced\" : 0 , \"snaps_deleted\" : 0 , \"snaps_renamed\" : 0 } }","title":"Verify that the snapshots have synced"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/","text":"A filesystem storage (also named shared filesystem) can be mounted with read/write permission from multiple pods. This may be useful for applications which can be clustered using a shared filesystem. This example runs a shared filesystem for the kube-registry . Prerequisites \u00b6 This guide assumes you have created a Rook cluster as explained in the main quickstart guide Multiple Filesystems Support \u00b6 Multiple filesystems are supported as of the Ceph Pacific release. Create the Filesystem \u00b6 Create the filesystem by specifying the desired settings for the metadata pool, data pools, and metadata server in the CephFilesystem CRD. In this example we create the metadata pool with replication of three and a single data pool with replication of three. For more options, see the documentation on creating shared filesystems . Save this shared filesystem definition as filesystem.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : replicated : size : 3 dataPools : - name : replicated replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true The Rook operator will create all the pools and other resources necessary to start the service. This may take a minute to complete. 1 2 3 # Create the filesystem kubectl create -f filesystem.yaml [...] To confirm the filesystem is configured, wait for the mds pods to start: 1 2 3 4 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mds NAME READY STATUS RESTARTS AGE rook-ceph-mds-myfs-7d59fdfcf4-h8kw9 1/1 Running 0 12s rook-ceph-mds-myfs-7d59fdfcf4-kgkjp 1/1 Running 0 12s To see detailed status of the filesystem, start and connect to the Rook toolbox . A new line will be shown with ceph status for the mds service. In this example, there is one active instance of MDS which is up, with one MDS instance in standby-replay mode in case of failover. 1 2 3 4 $ ceph status [...] services: mds: myfs-1/1/1 up {[myfs:0]=mzw58b=up:active}, 1 up:standby-replay Provision Storage \u00b6 Before Rook can start provisioning storage, a StorageClass needs to be created based on the filesystem. This is needed for Kubernetes to interoperate with the CSI driver to create persistent volumes. Save this storage class definition as storageclass.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-cephfs # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.cephfs.csi.ceph.com parameters : # clusterID is the namespace where the rook cluster is running # If you change this namespace, also change the namespace below where the secret namespaces are defined clusterID : rook-ceph # CephFS filesystem name into which the volume shall be created fsName : myfs # Ceph pool into which the volume shall be created # Required for provisionVolume: \"true\" pool : myfs-replicated # The secrets contain Ceph admin credentials. These are generated automatically by the operator # in the same namespace as the cluster. csi.storage.k8s.io/provisioner-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-cephfs-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph reclaimPolicy : Delete If you've deployed the Rook operator in a namespace other than \"rook-ceph\" as is common change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in \"rook-op\" the provisioner value should be \"rook-op.rbd.csi.ceph.com\". Create the storage class. 1 kubectl create -f deploy/examples/csi/cephfs/storageclass.yaml Quotas \u00b6 Attention The CephFS CSI driver uses quotas to enforce the PVC size requested. Only newer kernels support CephFS quotas (kernel version of at least 4.17). If you require quotas to be enforced and the kernel driver does not support it, you can disable the kernel driver and use the FUSE client. This can be done by setting CSI_FORCE_CEPHFS_KERNEL_CLIENT: false in the operator deployment ( operator.yaml ). However, it is important to know that when the FUSE client is enabled, there is an issue that during upgrade the application pods will be disconnected from the mount and will need to be restarted. See the upgrade guide for more details. Consume the Shared Filesystem: K8s Registry Sample \u00b6 As an example, we will start the kube-registry pod with the shared filesystem as the backing store. Save the following spec as kube-registry.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 apiVersion : v1 kind : PersistentVolumeClaim metadata : name : cephfs-pvc namespace : kube-system spec : accessModes : - ReadWriteMany resources : requests : storage : 1Gi storageClassName : rook-cephfs --- apiVersion : apps/v1 kind : Deployment metadata : name : kube-registry namespace : kube-system labels : k8s-app : kube-registry kubernetes.io/cluster-service : \"true\" spec : replicas : 3 selector : matchLabels : k8s-app : kube-registry template : metadata : labels : k8s-app : kube-registry kubernetes.io/cluster-service : \"true\" spec : containers : - name : registry image : registry:2 imagePullPolicy : Always resources : limits : cpu : 100m memory : 100Mi env : # Configuration reference: https://docs.docker.com/registry/configuration/ - name : REGISTRY_HTTP_ADDR value : :5000 - name : REGISTRY_HTTP_SECRET value : \"Ple4seCh4ngeThisN0tAVerySecretV4lue\" - name : REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY value : /var/lib/registry volumeMounts : - name : image-store mountPath : /var/lib/registry ports : - containerPort : 5000 name : registry protocol : TCP livenessProbe : httpGet : path : / port : registry readinessProbe : httpGet : path : / port : registry volumes : - name : image-store persistentVolumeClaim : claimName : cephfs-pvc readOnly : false Create the Kube registry deployment: 1 kubectl create -f deploy/examples/csi/cephfs/kube-registry.yaml You now have a docker registry which is HA with persistent storage. Kernel Version Requirement \u00b6 If the Rook cluster has more than one filesystem and the application pod is scheduled to a node with kernel version older than 4.7, inconsistent results may arise since kernels older than 4.7 do not support specifying filesystem namespaces. Consume the Shared Filesystem: Toolbox \u00b6 Once you have pushed an image to the registry (see the instructions to expose and use the kube-registry), verify that kube-registry is using the filesystem that was configured above by mounting the shared filesystem in the toolbox pod. See the Direct Filesystem topic for more details. Consume the Shared Filesystem across namespaces \u00b6 A PVC that you create using the rook-cephfs storageClass can be shared between different Pods simultaneously, either read-write or read-only, but is restricted to a single namespace (a PVC is a namespace-scoped resource, so you cannot use it in another one). However there are some use cases where you want to share the content from a CephFS-based PVC among different Pods in different namespaces, for a shared library for example, or a collaboration workspace between applications running in different namespaces. You can do that using the following recipe. Shared volume creation \u00b6 In the rook namespace, create a copy of the secret rook-csi-cephfs-node , name it rook-csi-cephfs-node-user . Edit your new secret, changing the name of the keys (keep the value as it is): adminID -> userID adminKey -> userKey Create the PVC you want to share, for example: 1 2 3 4 5 6 7 8 9 10 11 12 13 kind : PersistentVolumeClaim apiVersion : v1 metadata : name : base-pvc namespace : first-namespace spec : accessModes : - ReadWriteMany resources : requests : storage : 100Gi storageClassName : rook-cephfs volumeMode : Filesystem The corresponding PV that is created will have all the necessary info to connect to the CephFS volume (all non-necessary information are removed here): 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 kind : PersistentVolume apiVersion : v1 metadata : name : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22 annotations : pv.kubernetes.io/provisioned-by : rook.cephfs.csi.ceph.com finalizers : - kubernetes.io/pv-protection spec : capacity : storage : 100Gi csi : driver : rook.cephfs.csi.ceph.com volumeHandle : >- 0001-0011-rook-0000000000000001-8a528de0-e274-11ec-b069-0a580a800213 volumeAttributes : clusterID : rook fsName : rook-cephfilesystem storage.kubernetes.io/csiProvisionerIdentity : 1654174264855-8081-rook.cephfs.csi.ceph.com subvolumeName : csi-vol-8a528de0-e274-11ec-b069-0a580a800213 subvolumePath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec nodeStageSecretRef : name : rook-csi-cephfs-node namespace : rook controllerExpandSecretRef : name : rook-csi-cephfs-provisioner namespace : rook accessModes : - ReadWriteMany claimRef : kind : PersistentVolumeClaim namespace : first-namespace name : base-pvc apiVersion : v1 resourceVersion : '49728' persistentVolumeReclaimPolicy : Retain storageClassName : rook-cephfs volumeMode : Filesystem On this PV, change the persistentVolumeReclaimPolicy parameter to Retain to avoid it from being deleted when you will delete PVCs. Don't forget to change it back to Delete when you want to remove the shared volume (see full procedure in the next section). Copy the YAML content of the PV, and create a new static PV with the same information and some modifications. From the original YAML, you must: Modify the original name. To keep track, the best solution is to append to the original name the namespace name where you want your new PV. In this example newnamespace . Modify the volumeHandle. Again append the targeted namespace. Add the staticVolume: \"true\" entry to the volumeAttributes. Add the rootPath entry to the volumeAttributes, with the same content as subvolumePath . In the nodeStageSecretRef section, change the name to point to the secret you created earlier, rook-csi-cephfs-node-user . Remove the unnecessary information before applying the YAML (claimRef, managedFields,...): Your YAML should look like this: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 kind : PersistentVolume apiVersion : v1 metadata : name : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22-newnamespace spec : capacity : storage : 100Gi csi : driver : rook.cephfs.csi.ceph.com volumeHandle : >- 0001-0011-rook-0000000000000001-8a528de0-e274-11ec-b069-0a580a800213-newnamespace volumeAttributes : clusterID : rook fsName : rook-cephfilesystem storage.kubernetes.io/csiProvisionerIdentity : 1654174264855-8081-rook.cephfs.csi.ceph.com subvolumeName : csi-vol-8a528de0-e274-11ec-b069-0a580a800213 subvolumePath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec rootPath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec staticVolume : \"true\" nodeStageSecretRef : name : rook-csi-cephfs-node namespace : rook accessModes : - ReadWriteMany persistentVolumeReclaimPolicy : Retain storageClassName : rook-cephfs volumeMode : Filesystem In a new or other namespace, create a new PVC that will use this new PV you created. You simply have to point to it in the volumeName parameter. Make sure you enter the same size as the original PVC!: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 kind : PersistentVolumeClaim apiVersion : v1 metadata : name : second-pvc namespace : newnamespace finalizers : - kubernetes.io/pvc-protection spec : accessModes : - ReadWriteMany resources : requests : storage : 100Gi volumeName : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22-newnamespace storageClassName : rook-cephfs volumeMode : Filesystem You have now access to the same CephFS subvolume from different PVCs in different namespaces. Redo the previous steps (copy PV with a new name, create a PVC pointing to it) in each namespace you want to use this subvolume. Note : the new PVCs/PVs we have created are static. Therefore CephCSI does not support snapshots, clones, resizing or delete operations for them. If those operations are required, you must make them on the original PVC. Shared volume removal \u00b6 As the same CephFS volume is used by different PVCs/PVs, you must proceed very orderly to remove it properly. Delete the static PVCs in the different namespaces, but keep the original one! Delete the corresponding static PVs that should now have been marked as \"Released\". Again, don't delete the original one yet! Edit the original PV, changing back the persistentVolumeReclaimPolicy from Retain to Delete . Delete the original PVC. It will now properly delete the original PV, as well as the subvolume in CephFS. Pending Issue \u00b6 Due to this bug , the global mount for a Volume that is mounted multiple times on the same node will not be unmounted. This does not result in any particular problem, apart from polluting the logs with unmount error messages, or having many different mounts hanging if you create and delete many shared PVCs, or you don't really use them. Until this issue is solved, either on the Rook or Kubelet side, you can always manually unmount the unwanted hanging global mounts on the nodes: Log onto each node where the volume has been mounted. Check for hanging mounts using their volumeHandle . Unmount the unwanted volumes. Teardown \u00b6 To clean up all the artifacts created by the filesystem demo: 1 kubectl delete -f kube-registry.yaml To delete the filesystem components and backing data, delete the Filesystem CRD. Warning Data will be deleted if preserveFilesystemOnDelete=false**. 1 kubectl -n rook-ceph delete cephfilesystem myfs Note: If the \"preserveFilesystemOnDelete\" filesystem attribute is set to true, the above command won't delete the filesystem. Recreating the same CRD will reuse the existing filesystem. Advanced Example: Erasure Coded Filesystem \u00b6 The Ceph filesystem example can be found here: Ceph Shared Filesystem - Samples - Erasure Coded .","title":"Filesystem Storage Overview"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#prerequisites","text":"This guide assumes you have created a Rook cluster as explained in the main quickstart guide","title":"Prerequisites"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#multiple-filesystems-support","text":"Multiple filesystems are supported as of the Ceph Pacific release.","title":"Multiple Filesystems Support"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#create-the-filesystem","text":"Create the filesystem by specifying the desired settings for the metadata pool, data pools, and metadata server in the CephFilesystem CRD. In this example we create the metadata pool with replication of three and a single data pool with replication of three. For more options, see the documentation on creating shared filesystems . Save this shared filesystem definition as filesystem.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : replicated : size : 3 dataPools : - name : replicated replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true The Rook operator will create all the pools and other resources necessary to start the service. This may take a minute to complete. 1 2 3 # Create the filesystem kubectl create -f filesystem.yaml [...] To confirm the filesystem is configured, wait for the mds pods to start: 1 2 3 4 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mds NAME READY STATUS RESTARTS AGE rook-ceph-mds-myfs-7d59fdfcf4-h8kw9 1/1 Running 0 12s rook-ceph-mds-myfs-7d59fdfcf4-kgkjp 1/1 Running 0 12s To see detailed status of the filesystem, start and connect to the Rook toolbox . A new line will be shown with ceph status for the mds service. In this example, there is one active instance of MDS which is up, with one MDS instance in standby-replay mode in case of failover. 1 2 3 4 $ ceph status [...] services: mds: myfs-1/1/1 up {[myfs:0]=mzw58b=up:active}, 1 up:standby-replay","title":"Create the Filesystem"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#provision-storage","text":"Before Rook can start provisioning storage, a StorageClass needs to be created based on the filesystem. This is needed for Kubernetes to interoperate with the CSI driver to create persistent volumes. Save this storage class definition as storageclass.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-cephfs # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.cephfs.csi.ceph.com parameters : # clusterID is the namespace where the rook cluster is running # If you change this namespace, also change the namespace below where the secret namespaces are defined clusterID : rook-ceph # CephFS filesystem name into which the volume shall be created fsName : myfs # Ceph pool into which the volume shall be created # Required for provisionVolume: \"true\" pool : myfs-replicated # The secrets contain Ceph admin credentials. These are generated automatically by the operator # in the same namespace as the cluster. csi.storage.k8s.io/provisioner-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-cephfs-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph reclaimPolicy : Delete If you've deployed the Rook operator in a namespace other than \"rook-ceph\" as is common change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in \"rook-op\" the provisioner value should be \"rook-op.rbd.csi.ceph.com\". Create the storage class. 1 kubectl create -f deploy/examples/csi/cephfs/storageclass.yaml","title":"Provision Storage"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#quotas","text":"Attention The CephFS CSI driver uses quotas to enforce the PVC size requested. Only newer kernels support CephFS quotas (kernel version of at least 4.17). If you require quotas to be enforced and the kernel driver does not support it, you can disable the kernel driver and use the FUSE client. This can be done by setting CSI_FORCE_CEPHFS_KERNEL_CLIENT: false in the operator deployment ( operator.yaml ). However, it is important to know that when the FUSE client is enabled, there is an issue that during upgrade the application pods will be disconnected from the mount and will need to be restarted. See the upgrade guide for more details.","title":"Quotas"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#consume-the-shared-filesystem-k8s-registry-sample","text":"As an example, we will start the kube-registry pod with the shared filesystem as the backing store. Save the following spec as kube-registry.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 apiVersion : v1 kind : PersistentVolumeClaim metadata : name : cephfs-pvc namespace : kube-system spec : accessModes : - ReadWriteMany resources : requests : storage : 1Gi storageClassName : rook-cephfs --- apiVersion : apps/v1 kind : Deployment metadata : name : kube-registry namespace : kube-system labels : k8s-app : kube-registry kubernetes.io/cluster-service : \"true\" spec : replicas : 3 selector : matchLabels : k8s-app : kube-registry template : metadata : labels : k8s-app : kube-registry kubernetes.io/cluster-service : \"true\" spec : containers : - name : registry image : registry:2 imagePullPolicy : Always resources : limits : cpu : 100m memory : 100Mi env : # Configuration reference: https://docs.docker.com/registry/configuration/ - name : REGISTRY_HTTP_ADDR value : :5000 - name : REGISTRY_HTTP_SECRET value : \"Ple4seCh4ngeThisN0tAVerySecretV4lue\" - name : REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY value : /var/lib/registry volumeMounts : - name : image-store mountPath : /var/lib/registry ports : - containerPort : 5000 name : registry protocol : TCP livenessProbe : httpGet : path : / port : registry readinessProbe : httpGet : path : / port : registry volumes : - name : image-store persistentVolumeClaim : claimName : cephfs-pvc readOnly : false Create the Kube registry deployment: 1 kubectl create -f deploy/examples/csi/cephfs/kube-registry.yaml You now have a docker registry which is HA with persistent storage.","title":"Consume the Shared Filesystem: K8s Registry Sample"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#kernel-version-requirement","text":"If the Rook cluster has more than one filesystem and the application pod is scheduled to a node with kernel version older than 4.7, inconsistent results may arise since kernels older than 4.7 do not support specifying filesystem namespaces.","title":"Kernel Version Requirement"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#consume-the-shared-filesystem-toolbox","text":"Once you have pushed an image to the registry (see the instructions to expose and use the kube-registry), verify that kube-registry is using the filesystem that was configured above by mounting the shared filesystem in the toolbox pod. See the Direct Filesystem topic for more details.","title":"Consume the Shared Filesystem: Toolbox"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#consume-the-shared-filesystem-across-namespaces","text":"A PVC that you create using the rook-cephfs storageClass can be shared between different Pods simultaneously, either read-write or read-only, but is restricted to a single namespace (a PVC is a namespace-scoped resource, so you cannot use it in another one). However there are some use cases where you want to share the content from a CephFS-based PVC among different Pods in different namespaces, for a shared library for example, or a collaboration workspace between applications running in different namespaces. You can do that using the following recipe.","title":"Consume the Shared Filesystem across namespaces"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#shared-volume-creation","text":"In the rook namespace, create a copy of the secret rook-csi-cephfs-node , name it rook-csi-cephfs-node-user . Edit your new secret, changing the name of the keys (keep the value as it is): adminID -> userID adminKey -> userKey Create the PVC you want to share, for example: 1 2 3 4 5 6 7 8 9 10 11 12 13 kind : PersistentVolumeClaim apiVersion : v1 metadata : name : base-pvc namespace : first-namespace spec : accessModes : - ReadWriteMany resources : requests : storage : 100Gi storageClassName : rook-cephfs volumeMode : Filesystem The corresponding PV that is created will have all the necessary info to connect to the CephFS volume (all non-necessary information are removed here): 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 kind : PersistentVolume apiVersion : v1 metadata : name : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22 annotations : pv.kubernetes.io/provisioned-by : rook.cephfs.csi.ceph.com finalizers : - kubernetes.io/pv-protection spec : capacity : storage : 100Gi csi : driver : rook.cephfs.csi.ceph.com volumeHandle : >- 0001-0011-rook-0000000000000001-8a528de0-e274-11ec-b069-0a580a800213 volumeAttributes : clusterID : rook fsName : rook-cephfilesystem storage.kubernetes.io/csiProvisionerIdentity : 1654174264855-8081-rook.cephfs.csi.ceph.com subvolumeName : csi-vol-8a528de0-e274-11ec-b069-0a580a800213 subvolumePath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec nodeStageSecretRef : name : rook-csi-cephfs-node namespace : rook controllerExpandSecretRef : name : rook-csi-cephfs-provisioner namespace : rook accessModes : - ReadWriteMany claimRef : kind : PersistentVolumeClaim namespace : first-namespace name : base-pvc apiVersion : v1 resourceVersion : '49728' persistentVolumeReclaimPolicy : Retain storageClassName : rook-cephfs volumeMode : Filesystem On this PV, change the persistentVolumeReclaimPolicy parameter to Retain to avoid it from being deleted when you will delete PVCs. Don't forget to change it back to Delete when you want to remove the shared volume (see full procedure in the next section). Copy the YAML content of the PV, and create a new static PV with the same information and some modifications. From the original YAML, you must: Modify the original name. To keep track, the best solution is to append to the original name the namespace name where you want your new PV. In this example newnamespace . Modify the volumeHandle. Again append the targeted namespace. Add the staticVolume: \"true\" entry to the volumeAttributes. Add the rootPath entry to the volumeAttributes, with the same content as subvolumePath . In the nodeStageSecretRef section, change the name to point to the secret you created earlier, rook-csi-cephfs-node-user . Remove the unnecessary information before applying the YAML (claimRef, managedFields,...): Your YAML should look like this: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 kind : PersistentVolume apiVersion : v1 metadata : name : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22-newnamespace spec : capacity : storage : 100Gi csi : driver : rook.cephfs.csi.ceph.com volumeHandle : >- 0001-0011-rook-0000000000000001-8a528de0-e274-11ec-b069-0a580a800213-newnamespace volumeAttributes : clusterID : rook fsName : rook-cephfilesystem storage.kubernetes.io/csiProvisionerIdentity : 1654174264855-8081-rook.cephfs.csi.ceph.com subvolumeName : csi-vol-8a528de0-e274-11ec-b069-0a580a800213 subvolumePath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec rootPath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec staticVolume : \"true\" nodeStageSecretRef : name : rook-csi-cephfs-node namespace : rook accessModes : - ReadWriteMany persistentVolumeReclaimPolicy : Retain storageClassName : rook-cephfs volumeMode : Filesystem In a new or other namespace, create a new PVC that will use this new PV you created. You simply have to point to it in the volumeName parameter. Make sure you enter the same size as the original PVC!: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 kind : PersistentVolumeClaim apiVersion : v1 metadata : name : second-pvc namespace : newnamespace finalizers : - kubernetes.io/pvc-protection spec : accessModes : - ReadWriteMany resources : requests : storage : 100Gi volumeName : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22-newnamespace storageClassName : rook-cephfs volumeMode : Filesystem You have now access to the same CephFS subvolume from different PVCs in different namespaces. Redo the previous steps (copy PV with a new name, create a PVC pointing to it) in each namespace you want to use this subvolume. Note : the new PVCs/PVs we have created are static. Therefore CephCSI does not support snapshots, clones, resizing or delete operations for them. If those operations are required, you must make them on the original PVC.","title":"Shared volume creation"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#shared-volume-removal","text":"As the same CephFS volume is used by different PVCs/PVs, you must proceed very orderly to remove it properly. Delete the static PVCs in the different namespaces, but keep the original one! Delete the corresponding static PVs that should now have been marked as \"Released\". Again, don't delete the original one yet! Edit the original PV, changing back the persistentVolumeReclaimPolicy from Retain to Delete . Delete the original PVC. It will now properly delete the original PV, as well as the subvolume in CephFS.","title":"Shared volume removal"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#pending-issue","text":"Due to this bug , the global mount for a Volume that is mounted multiple times on the same node will not be unmounted. This does not result in any particular problem, apart from polluting the logs with unmount error messages, or having many different mounts hanging if you create and delete many shared PVCs, or you don't really use them. Until this issue is solved, either on the Rook or Kubelet side, you can always manually unmount the unwanted hanging global mounts on the nodes: Log onto each node where the volume has been mounted. Check for hanging mounts using their volumeHandle . Unmount the unwanted volumes.","title":"Pending Issue"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#teardown","text":"To clean up all the artifacts created by the filesystem demo: 1 kubectl delete -f kube-registry.yaml To delete the filesystem components and backing data, delete the Filesystem CRD. Warning Data will be deleted if preserveFilesystemOnDelete=false**. 1 kubectl -n rook-ceph delete cephfilesystem myfs Note: If the \"preserveFilesystemOnDelete\" filesystem attribute is set to true, the above command won't delete the filesystem. Recreating the same CRD will reuse the existing filesystem.","title":"Teardown"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#advanced-example-erasure-coded-filesystem","text":"The Ceph filesystem example can be found here: Ceph Shared Filesystem - Samples - Erasure Coded .","title":"Advanced Example: Erasure Coded Filesystem"},{"location":"Troubleshooting/ceph-common-issues/","text":"Many of these problem cases are hard to summarize down to a short phrase that adequately describes the problem. Each problem will start with a bulleted list of symptoms. Keep in mind that all symptoms may not apply depending on the configuration of Rook. If the majority of the symptoms are seen there is a fair chance you are experiencing that problem. If after trying the suggestions found on this page and the problem is not resolved, the Rook team is very happy to help you troubleshoot the issues in their Slack channel. Once you have registered for the Rook Slack , proceed to the #ceph channel to ask for assistance. See also the CSI Troubleshooting Guide . Troubleshooting Techniques \u00b6 There are two main categories of information you will need to investigate issues in the cluster: Kubernetes status and logs documented here Ceph cluster status (see upcoming Ceph tools section) Ceph Tools \u00b6 After you verify the basic health of the running pods, next you will want to run Ceph tools for status of the storage components. There are two ways to run the Ceph tools, either in the Rook toolbox or inside other Rook pods that are already running. Logs on a specific node to find why a PVC is failing to mount See the log collection topic for a script that will help you gather the logs Other artifacts: The monitors that are expected to be in quorum: kubectl -n  get configmap rook-ceph-mon-endpoints -o yaml | grep data Tools in the Rook Toolbox \u00b6 The rook-ceph-tools pod provides a simple environment to run Ceph tools. Once the pod is up and running, connect to the pod to execute Ceph commands to evaluate that current state of the cluster. 1 kubectl -n rook-ceph exec -it $(kubectl -n rook-ceph get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[*].metadata.name}') bash Ceph Commands \u00b6 Here are some common commands to troubleshoot a Ceph cluster: ceph status ceph osd status ceph osd df ceph osd utilization ceph osd pool stats ceph osd tree ceph pg stat The first two status commands provide the overall cluster health. The normal state for cluster operations is HEALTH_OK, but will still function when the state is in a HEALTH_WARN state. If you are in a WARN state, then the cluster is in a condition that it may enter the HEALTH_ERROR state at which point all disk I/O operations are halted. If a HEALTH_WARN state is observed, then one should take action to prevent the cluster from halting when it enters the HEALTH_ERROR state. There are many Ceph sub-commands to look at and manipulate Ceph objects, well beyond the scope this document. See the Ceph documentation for more details of gathering information about the health of the cluster. In addition, there are other helpful hints and some best practices located in the Advanced Configuration section . Of particular note, there are scripts for collecting logs and gathering OSD information there. Cluster failing to service requests \u00b6 Symptoms \u00b6 Execution of the ceph command hangs PersistentVolumes are not being created Large amount of slow requests are blocking Large amount of stuck requests are blocking One or more MONs are restarting periodically Investigation \u00b6 Create a rook-ceph-tools pod to investigate the current state of Ceph. Here is an example of what one might see. In this case the ceph status command would just hang so a CTRL-C needed to be sent. 1 2 3 4 kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- ceph status ceph status ^CCluster connection interrupted or timed out Another indication is when one or more of the MON pods restart frequently. Note the 'mon107' that has only been up for 16 minutes in the following output. 1 2 3 4 5 6 7 8 $ kubectl -n rook-ceph get all -o wide --show-all NAME READY STATUS RESTARTS AGE IP NODE po/rook-ceph-mgr0-2487684371-gzlbq 1/1 Running 0 17h 192.168.224.46 k8-host-0402 po/rook-ceph-mon107-p74rj 1/1 Running 0 16m 192.168.224.28 k8-host-0402 rook-ceph-mon1-56fgm 1/1 Running 0 2d 192.168.91.135 k8-host-0404 rook-ceph-mon2-rlxcd 1/1 Running 0 2d 192.168.123.33 k8-host-0403 rook-ceph-osd-bg2vj 1/1 Running 0 2d 192.168.91.177 k8-host-0404 rook-ceph-osd-mwxdm 1/1 Running 0 2d 192.168.123.31 k8-host-0403 Solution \u00b6 What is happening here is that the MON pods are restarting and one or more of the Ceph daemons are not getting configured with the proper cluster information. This is commonly the result of not specifying a value for dataDirHostPath in your Cluster CRD. The dataDirHostPath setting specifies a path on the local host for the Ceph daemons to store configuration and data. Setting this to a path like /var/lib/rook , reapplying your Cluster CRD and restarting all the Ceph daemons (MON, MGR, OSD, RGW) should solve this problem. After the Ceph daemons have been restarted, it is advisable to restart the rook-tools pod . Monitors are the only pods running \u00b6 Symptoms \u00b6 Rook operator is running Either a single mon starts or the mons start very slowly (at least several minutes apart) The crash-collector pods are crashing No mgr, osd, or other daemons are created except the CSI driver Investigation \u00b6 When the operator is starting a cluster, the operator will start one mon at a time and check that they are healthy before continuing to bring up all three mons. If the first mon is not detected healthy, the operator will continue to check until it is healthy. If the first mon fails to start, a second and then a third mon may attempt to start. However, they will never form quorum and the orchestration will be blocked from proceeding. The crash-collector pods will be blocked from starting until the mons have formed quorum the first time. There are several common causes for the mons failing to form quorum: The operator pod does not have network connectivity to the mon pod(s). The network may be configured incorrectly. One or more mon pods are in running state, but the operator log shows they are not able to form quorum A mon is using configuration from a previous installation. See the cleanup guide for cleaning the previous cluster. A firewall may be blocking the ports required for the Ceph mons to form quorum. Ensure ports 6789 and 3300 are enabled. See the Ceph networking guide for more details. There may be MTU mismatch between different networking components. Some networks may be more susceptible to mismatch than others. If Kubernetes CNI or hosts enable jumbo frames (MTU 9000), Ceph will use large packets to maximize network bandwidth. If other parts of the networking chain don't support jumbo frames, this could result in lost or rejected packets unexpectedly. Operator fails to connect to the mon \u00b6 First look at the logs of the operator to confirm if it is able to connect to the mons. 1 kubectl -n rook-ceph logs -l app=rook-ceph-operator Likely you will see an error similar to the following that the operator is timing out when connecting to the mon. The last command is ceph mon_status , followed by a timeout message five minutes later. 1 2 3 4 5 6 2018-01-21 21:47:32.375833 I | exec: Running command: ceph mon_status --cluster=rook --conf=/var/lib/rook/rook-ceph/rook.config --keyring=/var/lib/rook/rook-ceph/client.admin.keyring --format json --out-file /tmp/442263890 2018-01-21 21:52:35.370533 I | exec: 2018-01-21 21:52:35.071462 7f96a3b82700 0 monclient(hunting): authenticate timed out after 300 2018-01-21 21:52:35.071462 7f96a3b82700 0 monclient(hunting): authenticate timed out after 300 2018-01-21 21:52:35.071524 7f96a3b82700 0 librados: client.admin authentication error (110) Connection timed out 2018-01-21 21:52:35.071524 7f96a3b82700 0 librados: client.admin authentication error (110) Connection timed out [errno 110] error connecting to the cluster The error would appear to be an authentication error, but it is misleading. The real issue is a timeout. Solution \u00b6 If you see the timeout in the operator log, verify if the mon pod is running (see the next section). If the mon pod is running, check the network connectivity between the operator pod and the mon pod. A common issue is that the CNI is not configured correctly. To verify the network connectivity: Get the endpoint for a mon Curl the mon from the operator pod For example, this command will curl the first mon from the operator: 1 2 $ kubectl -n rook-ceph exec deploy/rook-ceph-operator -- curl $( kubectl -n rook-ceph get svc -l app = rook-ceph-mon -o jsonpath = '{.items[0].spec.clusterIP}' ) :3300 2 >/dev/null ceph v2 If \"ceph v2\" is printed to the console, the connection was successful. If the command does not respond or otherwise fails, the network connection cannot be established. Failing mon pod \u00b6 Second we need to verify if the mon pod started successfully. 1 2 3 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-69fb9c78cd-58szd 1/1 CrashLoopBackOff 2 47s If the mon pod is failing as in this example, you will need to look at the mon pod status or logs to determine the cause. If the pod is in a crash loop backoff state, you should see the reason by describing the pod. 1 2 3 4 5 6 7 8 # The pod shows a termination status that the keyring does not match the existing keyring $ kubectl -n rook-ceph describe pod -l mon = rook-ceph-mon0 ... Last State: Terminated Reason: Error Message: The keyring does not match the existing keyring in /var/lib/rook/rook-ceph-mon0/data/keyring. You may need to delete the contents of dataDirHostPath on the host from a previous deployment. ... See the solution in the next section regarding cleaning up the dataDirHostPath on the nodes. Solution \u00b6 This is a common problem reinitializing the Rook cluster when the local directory used for persistence has not been purged. This directory is the dataDirHostPath setting in the cluster CRD and is typically set to /var/lib/rook . To fix the issue you will need to delete all components of Rook and then delete the contents of /var/lib/rook (or the directory specified by dataDirHostPath ) on each of the hosts in the cluster. Then when the cluster CRD is applied to start a new cluster, the rook-operator should start all the pods as expected. Caution Deleting the dataDirHostPath folder is destructive to the storage. Only delete the folder if you are trying to permanently purge the Rook cluster. See the Cleanup Guide for more details. PVCs stay in pending state \u00b6 Symptoms \u00b6 When you create a PVC based on a rook storage class, it stays pending indefinitely For the Wordpress example, you might see two PVCs in pending state. 1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE mysql-pv-claim Pending rook-ceph-block 8s wp-pv-claim Pending rook-ceph-block 16s Investigation \u00b6 There are two common causes for the PVCs staying in pending state: There are no OSDs in the cluster The CSI provisioner pod is not running or is not responding to the request to provision the storage Confirm if there are OSDs \u00b6 To confirm if you have OSDs in your cluster, connect to the Rook Toolbox and run the ceph status command. You should see that you have at least one OSD up and in . The minimum number of OSDs required depends on the replicated.size setting in the pool created for the storage class. In a \"test\" cluster, only one OSD is required (see storageclass-test.yaml ). In the production storage class example ( storageclass.yaml ), three OSDs would be required. 1 2 3 4 5 6 7 8 9 $ ceph status cluster: id: a0452c76-30d9-4c1a-a948-5d8405f19a7c health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 11m) mgr: a(active, since 10m) osd: 1 osds: 1 up (since 46s), 1 in (since 109m) OSD Prepare Logs \u00b6 If you don't see the expected number of OSDs, let's investigate why they weren't created. On each node where Rook looks for OSDs to configure, you will see an \"osd prepare\" pod. 1 2 3 $ kubectl -n rook-ceph get pod -l app = rook-ceph-osd-prepare NAME ... READY STATUS RESTARTS AGE rook-ceph-osd-prepare-minikube-9twvk 0/2 Completed 0 30m See the section on why OSDs are not getting created to investigate the logs. CSI Driver \u00b6 The CSI driver may not be responding to the requests. Look in the logs of the CSI provisioner pod to see if there are any errors during the provisioning. There are two provisioner pods: 1 kubectl -n rook-ceph get pod -l app=csi-rbdplugin-provisioner Get the logs of each of the pods. One of them should be the \"leader\" and be responding to requests. 1 kubectl -n rook-ceph logs csi-cephfsplugin-provisioner-d77bb49c6-q9hwq csi-provisioner See also the CSI Troubleshooting Guide . Operator unresponsiveness \u00b6 Lastly, if you have OSDs up and in , the next step is to confirm the operator is responding to the requests. Look in the Operator pod logs around the time when the PVC was created to confirm if the request is being raised. If the operator does not show requests to provision the block image, the operator may be stuck on some other operation. In this case, restart the operator pod to get things going again. Solution \u00b6 If the \"osd prepare\" logs didn't give you enough clues about why the OSDs were not being created, please review your cluster.yaml configuration. The common misconfigurations include: If useAllDevices: true , Rook expects to find local devices attached to the nodes. If no devices are found, no OSDs will be created. If useAllDevices: false , OSDs will only be created if deviceFilter is specified. Only local devices attached to the nodes will be configurable by Rook. In other words, the devices must show up under /dev . The devices must not have any partitions or filesystems on them. Rook will only configure raw devices. Partitions are not yet supported. OSD pods are failing to start \u00b6 Symptoms \u00b6 OSD pods are failing to start You have started a cluster after tearing down another cluster Investigation \u00b6 When an OSD starts, the device or directory will be configured for consumption. If there is an error with the configuration, the pod will crash and you will see the CrashLoopBackoff status for the pod. Look in the osd pod logs for an indication of the failure. 1 2 $ kubectl -n rook-ceph logs rook-ceph-osd-fl8fs ... One common case for failure is that you have re-deployed a test cluster and some state may remain from a previous deployment. If your cluster is larger than a few nodes, you may get lucky enough that the monitors were able to start and form quorum. However, now the OSDs pods may fail to start due to the old state. Looking at the OSD pod logs you will see an error about the file already existing. 1 2 3 4 5 6 7 $ kubectl -n rook-ceph logs rook-ceph-osd-fl8fs ... 2017-10-31 20:13:11.187106 I | mkfs-osd0: 2017-10-31 20:13:11.186992 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) _read_fsid unparsable uuid 2017-10-31 20:13:11.187208 I | mkfs-osd0: 2017-10-31 20:13:11.187026 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) _setup_block_symlink_or_file failed to create block symlink to /dev/disk/by-partuuid/651153ba-2dfc-4231-ba06-94759e5ba273: (17) File exists 2017-10-31 20:13:11.187233 I | mkfs-osd0: 2017-10-31 20:13:11.187038 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) mkfs failed, (17) File exists 2017-10-31 20:13:11.187254 I | mkfs-osd0: 2017-10-31 20:13:11.187042 7f0059d62e00 -1 OSD::mkfs: ObjectStore::mkfs failed with error (17) File exists 2017-10-31 20:13:11.187275 I | mkfs-osd0: 2017-10-31 20:13:11.187121 7f0059d62e00 -1 ** ERROR: error creating empty object store in /var/lib/rook/osd0: (17) File exists Solution \u00b6 If the error is from the file that already exists, this is a common problem reinitializing the Rook cluster when the local directory used for persistence has not been purged. This directory is the dataDirHostPath setting in the cluster CRD and is typically set to /var/lib/rook . To fix the issue you will need to delete all components of Rook and then delete the contents of /var/lib/rook (or the directory specified by dataDirHostPath ) on each of the hosts in the cluster. Then when the cluster CRD is applied to start a new cluster, the rook-operator should start all the pods as expected. OSD pods are not created on my devices \u00b6 Symptoms \u00b6 No OSD pods are started in the cluster Devices are not configured with OSDs even though specified in the Cluster CRD One OSD pod is started on each node instead of multiple pods for each device Investigation \u00b6 First, ensure that you have specified the devices correctly in the CRD. The Cluster CRD has several ways to specify the devices that are to be consumed by the Rook storage: useAllDevices: true : Rook will consume all devices it determines to be available deviceFilter : Consume all devices that match this regular expression devices : Explicit list of device names on each node to consume Second, if Rook determines that a device is not available (has existing partitions or a formatted filesystem), Rook will skip consuming the devices. If Rook is not starting OSDs on the devices you expect, Rook may have skipped it for this reason. To see if a device was skipped, view the OSD preparation log on the node where the device was skipped. Note that it is completely normal and expected for OSD prepare pod to be in the completed state. After the job is complete, Rook leaves the pod around in case the logs need to be investigated. 1 2 3 4 5 6 # Get the prepare pods in the cluster $ kubectl -n rook-ceph get pod -l app = rook-ceph-osd-prepare NAME READY STATUS RESTARTS AGE rook-ceph-osd-prepare-node1-fvmrp 0/1 Completed 0 18m rook-ceph-osd-prepare-node2-w9xv9 0/1 Completed 0 22m rook-ceph-osd-prepare-node3-7rgnv 0/1 Completed 0 22m 1 2 3 # view the logs for the node of interest in the \"provision\" container $ kubectl -n rook-ceph logs rook-ceph-osd-prepare-node1-fvmrp provision [...] Here are some key lines to look for in the log: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 # A device will be skipped if Rook sees it has partitions or a filesystem 2019-05-30 19:02:57.353171 W | cephosd: skipping device sda that is in use 2019-05-30 19:02:57.452168 W | skipping device \"sdb5\": [\"Used by ceph-disk\"] # Other messages about a disk being unusable by ceph include: Insufficient space (<5GB) on vgs Insufficient space (<5GB) LVM detected Has BlueStore device label locked read-only # A device is going to be configured 2019-05-30 19:02:57.535598 I | cephosd: device sdc to be configured by ceph-volume # For each device configured you will see a report printed to the log 2019-05-30 19:02:59.844642 I | Type Path LV Size % of device 2019-05-30 19:02:59.844651 I | ---------------------------------------------------------------------------------------------------- 2019-05-30 19:02:59.844677 I | [data] /dev/sdc 7.00 GB 100% Solution \u00b6 Either update the CR with the correct settings, or clean the partitions or filesystem from your devices. To clean devices from a previous install see the cleanup guide . After the settings are updated or the devices are cleaned, trigger the operator to analyze the devices again by restarting the operator. Each time the operator starts, it will ensure all the desired devices are configured. The operator does automatically deploy OSDs in most scenarios, but an operator restart will cover any scenarios that the operator doesn't detect automatically. 1 2 3 # Restart the operator to ensure devices are configured. A new pod will automatically be started when the current operator pod is deleted. $ kubectl -n rook-ceph delete pod -l app = rook-ceph-operator [...] Node hangs after reboot \u00b6 This issue is fixed in Rook v1.3 or later. Symptoms \u00b6 After issuing a reboot command, node never returned online Only a power cycle helps Investigation \u00b6 On a node running a pod with a Ceph persistent volume 1 2 3 4 mount | grep rbd # _netdev mount option is absent, also occurs for cephfs # OS is not aware PV is mounted over network /dev/rbdx on ... (rw,relatime, ..., noquota) When the reboot command is issued, network interfaces are terminated before disks are unmounted. This results in the node hanging as repeated attempts to unmount Ceph persistent volumes fail with the following error: 1 libceph: connect [monitor-ip]:6789 error -101 Solution \u00b6 The node needs to be drained before reboot. After the successful drain, the node can be rebooted as usual. Because kubectl drain command automatically marks the node as unschedulable ( kubectl cordon effect), the node needs to be uncordoned once it's back online. Drain the node: 1 kubectl drain  --ignore-daemonsets --delete-local-data Uncordon the node: 1 kubectl uncordon  Using multiple shared filesystem (CephFS) is attempted on a kernel version older than 4.7 \u00b6 Symptoms \u00b6 More than one shared filesystem (CephFS) has been created in the cluster A pod attempts to mount any other shared filesystem besides the first one that was created The pod incorrectly gets the first filesystem mounted instead of the intended filesystem Solution \u00b6 The only solution to this problem is to upgrade your kernel to 4.7 or higher. This is due to a mount flag added in the kernel version 4.7 which allows to chose the filesystem by name. For additional info on the kernel version requirement for multiple shared filesystems (CephFS), see Filesystem - Kernel version requirement . Set debug log level for all Ceph daemons \u00b6 You can set a given log level and apply it to all the Ceph daemons at the same time. For this, make sure the toolbox pod is running, then determine the level you want (between 0 and 20). You can find the list of all subsystems and their default values in Ceph logging and debug official guide . Be careful when increasing the level as it will produce very verbose logs. Assuming you want a log level of 1, you will run: 1 2 3 4 $ kubectl -n rook-ceph exec deploy/rook-ceph-tools -- set-ceph-debug-level 1 ceph config set global debug_context 1 ceph config set global debug_lockdep 1 [...] Once you are done debugging, you can revert all the debug flag to their default value by running the following: 1 kubectl -n rook-ceph exec deploy/rook-ceph-tools -- set-ceph-debug-level default Activate log to file for a particular Ceph daemon \u00b6 They are cases where looking at Kubernetes logs is not enough for diverse reasons, but just to name a few: not everyone is familiar for Kubernetes logging and expects to find logs in traditional directories logs get eaten (buffer limit from the log engine) and thus not requestable from Kubernetes So for each daemon, dataDirHostPath is used to store logs, if logging is activated. Rook will bindmount dataDirHostPath for every pod. Let's say you want to enable logging for mon.a , but only for this daemon. Using the toolbox or from inside the operator run: 1 ceph config set mon.a log_to_file true This will activate logging on the filesystem, you will be able to find logs in dataDirHostPath/$NAMESPACE/log , so typically this would mean /var/lib/rook/rook-ceph/log . You don't need to restart the pod, the effect will be immediate. To disable the logging on file, simply set log_to_file to false . A worker node using RBD devices hangs up \u00b6 Symptoms \u00b6 There is no progress on I/O from/to one of RBD devices ( /dev/rbd* or /dev/nbd* ). After that, the whole worker node hangs up. Investigation \u00b6 This happens when the following conditions are satisfied. The problematic RBD device and the corresponding OSDs are co-located. There is an XFS filesystem on top of this device. In addition, when this problem happens, you can see the following messages in dmesg . 1 2 3 4 5 6 $ dmesg ... [51717.039319] INFO: task kworker/2:1:5938 blocked for more than 120 seconds. [51717.039361] Not tainted 4.15.0-72-generic #81-Ubuntu [51717.039388] \"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\" disables this message. ... It's so-called hung_task problem and means that there is a deadlock in the kernel. For more detail, please refer to the corresponding issue comment . Solution \u00b6 This problem will be solve by the following two fixes. Linux kernel: A minor feature that is introduced by this commit . It will be included in Linux v5.6. Ceph: A fix that uses the above-mentioned kernel's feature. The Ceph community will probably discuss this fix after releasing Linux v5.6. You can bypass this problem by using ext4 or any other filesystems rather than XFS. Filesystem type can be specified with csi.storage.k8s.io/fstype in StorageClass resource. Too few PGs per OSD warning is shown \u00b6 Symptoms \u00b6 ceph status shows \"too few PGs per OSD\" warning as follows. 1 2 3 4 5 6 $ ceph status cluster: id: fd06d7c3-5c5c-45ca-bdea-1cf26b783065 health: HEALTH_WARN too few PGs per OSD (16 < min 30) [...] Solution \u00b6 The meaning of this warning is written in the document . However, in many cases it is benign. For more information, please see the blog entry . Please refer to Configuring Pools if you want to know the proper pg_num of pools and change these values. LVM metadata can be corrupted with OSD on LV-backed PVC \u00b6 Symptoms \u00b6 There is a critical flaw in OSD on LV-backed PVC. LVM metadata can be corrupted if both the host and OSD container modify it simultaneously. For example, the administrator might modify it on the host, while the OSD initialization process in a container could modify it too. In addition, if lvmetad is running, the possibility of occurrence gets higher. In this case, the change of LVM metadata in OSD container is not reflected to LVM metadata cache in host for a while. If you still decide to configure an OSD on LVM, please keep the following in mind to reduce the probability of this issue. Solution \u00b6 Disable lvmetad. Avoid configuration of LVs from the host. In addition, don't touch the VGs and physical volumes that back these LVs. Avoid incrementing the count field of storageClassDeviceSets and create a new LV that backs an OSD simultaneously. You can know whether the above-mentioned tag exists with the command: sudo lvs -o lv_name,lv_tags . If the lv_tag field is empty in an LV corresponding to the OSD lv_tags, this OSD encountered the problem. In this case, please retire this OSD or replace with other new OSD before restarting. This problem doesn't happen in newly created LV-backed PVCs because OSD container doesn't modify LVM metadata anymore. The existing lvm mode OSDs work continuously even thought upgrade your Rook. However, using the raw mode OSDs is recommended because of the above-mentioned problem. You can replace the existing OSDs with raw mode OSDs by retiring them and adding new OSDs one by one. See the documents Remove an OSD and Add an OSD on a PVC . OSD prepare job fails due to low aio-max-nr setting \u00b6 If the Kernel is configured with a low aio-max-nr setting , the OSD prepare job might fail with the following error: 1 exec: stderr: 2020-09-17T00:30:12.145+0000 7f0c17632f40 -1 bdev(0x56212de88700 /var/lib/ceph/osd/ceph-0//block) _aio_start io_setup(2) failed with EAGAIN; try increasing /proc/sys/fs/aio-max-nr To overcome this, you need to increase the value of fs.aio-max-nr of your sysctl configuration (typically /etc/sysctl.conf ). You can do this with your favorite configuration management system. Alternatively, you can have a DaemonSet to apply the configuration for you on all your nodes. Unexpected partitions created \u00b6 Symptoms \u00b6 Users running Rook versions v1.6.0-v1.6.7 may observe unwanted OSDs on partitions that appear unexpectedly and seemingly randomly, which can corrupt existing OSDs. Unexpected partitions are created on host disks that are used by Ceph OSDs. This happens more often on SSDs than HDDs and usually only on disks that are 875GB or larger. Many tools like lsblk , blkid , udevadm , and parted will not show a partition table type for the partition. Newer versions of blkid are generally able to recognize the type as \"atari\". The underlying issue causing this is Atari partition (sometimes identified as AHDI) support in the Linux kernel. Atari partitions have very relaxed specifications compared to other partition types, and it is relatively easy for random data written to a disk to appear as an Atari partition to the Linux kernel. Ceph's Bluestore OSDs have an anecdotally high probability of writing data on to disks that can appear to the kernel as an Atari partition. Below is an example of lsblk output from a node where phantom Atari partitions are present. Note that sdX1 is never present for the phantom partitions, and sdX2 is 48G on all disks. sdX3 is a variable size and may not always be present. It is possible for sdX4 to appear, though it is an anecdotally rare event. 1 2 3 4 5 6 7 8 9 10 11 # lsblk NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT sdb 8:16 0 3T 0 disk \u251c\u2500sdb2 8:18 0 48G 0 part \u2514\u2500sdb3 8:19 0 6.1M 0 part sdc 8:32 0 3T 0 disk \u251c\u2500sdc2 8:34 0 48G 0 part \u2514\u2500sdc3 8:35 0 6.2M 0 part sdd 8:48 0 3T 0 disk \u251c\u2500sdd2 8:50 0 48G 0 part \u2514\u2500sdd3 8:51 0 6.3M 0 part You can see GitHub rook/rook - Issue 7940 unexpected partition on disks >= 1TB (atari partitions) for more detailed information and discussion. Solution \u00b6 Recover from corruption (v1.6.0-v1.6.7) \u00b6 If you are using Rook v1.6, you must first update to v1.6.8 or higher to avoid further incidents of OSD corruption caused by these Atari partitions. An old workaround suggested using deviceFilter: ^sd[a-z]+$ , but this still results in unexpected partitions. Rook will merely stop creating new OSDs on the partitions. It does not fix a related issue that ceph-volume that is unaware of the Atari partition problem. Users who used this workaround are still at risk for OSD failures in the future. To resolve the issue, immediately update to v1.6.8 or higher. After the update, no corruption should occur on OSDs created in the future. Next, to get back to a healthy Ceph cluster state, focus on one corrupted disk at a time and remove all OSDs on each corrupted disk one disk at a time. As an example, you may have /dev/sdb with two unexpected partitions ( /dev/sdb2 and /dev/sdb3 ) as well as a second corrupted disk /dev/sde with one unexpected partition ( /dev/sde2 ). First, remove the OSDs associated with /dev/sdb , /dev/sdb2 , and /dev/sdb3 . There might be only one, or up to 3 OSDs depending on how your system was affected. Again see the OSD management doc . Use dd to wipe the first sectors of the partitions followed by the disk itself. E.g., dd if=/dev/zero of=/dev/sdb2 bs=1M dd if=/dev/zero of=/dev/sdb3 bs=1M dd if=/dev/zero of=/dev/sdb bs=1M Then wipe clean /dev/sdb to prepare it for a new OSD. See the teardown document for details. After this, scale up the Rook operator to deploy a new OSD to /dev/sdb . This will allow Ceph to use /dev/sdb for data recovery and replication while the next OSDs are removed. Now Repeat steps 1-4 for /dev/sde and /dev/sde2 , and continue for any other corrupted disks. If your Rook cluster does not have any critical data stored in it, it may be simpler to uninstall Rook completely and redeploy with v1.6.8 or higher. Operator environment variables are ignored \u00b6 Symptoms \u00b6 Configuration settings passed as environment variables do not take effect as expected. For example, the discover daemonset is not created, even though ROOK_ENABLE_DISCOVERY_DAEMON=\"true\" is set. Investigation \u00b6 Inspect the rook-ceph-operator-config ConfigMap for conflicting settings. The ConfigMap takes precedence over the environment. The ConfigMap must exist , even if all actual configuration is supplied through the environment. Look for lines with the op-k8sutil prefix in the operator logs. These lines detail the final values, and source, of the different configuration variables. Verify that both of the following messages are present in the operator logs: 1 2 rook-ceph-operator-config-controller successfully started rook-ceph-operator-config-controller done reconciling Solution \u00b6 If it does not exist, create an empty ConfigMap: 1 2 3 4 5 6 kind : ConfigMap apiVersion : v1 metadata : name : rook-ceph-operator-config namespace : rook-ceph # namespace:operator data : {} If the ConfigMap exists, remove any keys that you wish to configure through the environment.","title":"Ceph Common Issues"},{"location":"Troubleshooting/ceph-common-issues/#troubleshooting-techniques","text":"There are two main categories of information you will need to investigate issues in the cluster: Kubernetes status and logs documented here Ceph cluster status (see upcoming Ceph tools section)","title":"Troubleshooting Techniques"},{"location":"Troubleshooting/ceph-common-issues/#ceph-tools","text":"After you verify the basic health of the running pods, next you will want to run Ceph tools for status of the storage components. There are two ways to run the Ceph tools, either in the Rook toolbox or inside other Rook pods that are already running. Logs on a specific node to find why a PVC is failing to mount See the log collection topic for a script that will help you gather the logs Other artifacts: The monitors that are expected to be in quorum: kubectl -n  get configmap rook-ceph-mon-endpoints -o yaml | grep data","title":"Ceph Tools"},{"location":"Troubleshooting/ceph-common-issues/#tools-in-the-rook-toolbox","text":"The rook-ceph-tools pod provides a simple environment to run Ceph tools. Once the pod is up and running, connect to the pod to execute Ceph commands to evaluate that current state of the cluster. 1 kubectl -n rook-ceph exec -it $(kubectl -n rook-ceph get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[*].metadata.name}') bash","title":"Tools in the Rook Toolbox"},{"location":"Troubleshooting/ceph-common-issues/#ceph-commands","text":"Here are some common commands to troubleshoot a Ceph cluster: ceph status ceph osd status ceph osd df ceph osd utilization ceph osd pool stats ceph osd tree ceph pg stat The first two status commands provide the overall cluster health. The normal state for cluster operations is HEALTH_OK, but will still function when the state is in a HEALTH_WARN state. If you are in a WARN state, then the cluster is in a condition that it may enter the HEALTH_ERROR state at which point all disk I/O operations are halted. If a HEALTH_WARN state is observed, then one should take action to prevent the cluster from halting when it enters the HEALTH_ERROR state. There are many Ceph sub-commands to look at and manipulate Ceph objects, well beyond the scope this document. See the Ceph documentation for more details of gathering information about the health of the cluster. In addition, there are other helpful hints and some best practices located in the Advanced Configuration section . Of particular note, there are scripts for collecting logs and gathering OSD information there.","title":"Ceph Commands"},{"location":"Troubleshooting/ceph-common-issues/#cluster-failing-to-service-requests","text":"","title":"Cluster failing to service requests"},{"location":"Troubleshooting/ceph-common-issues/#symptoms","text":"Execution of the ceph command hangs PersistentVolumes are not being created Large amount of slow requests are blocking Large amount of stuck requests are blocking One or more MONs are restarting periodically","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation","text":"Create a rook-ceph-tools pod to investigate the current state of Ceph. Here is an example of what one might see. In this case the ceph status command would just hang so a CTRL-C needed to be sent. 1 2 3 4 kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- ceph status ceph status ^CCluster connection interrupted or timed out Another indication is when one or more of the MON pods restart frequently. Note the 'mon107' that has only been up for 16 minutes in the following output. 1 2 3 4 5 6 7 8 $ kubectl -n rook-ceph get all -o wide --show-all NAME READY STATUS RESTARTS AGE IP NODE po/rook-ceph-mgr0-2487684371-gzlbq 1/1 Running 0 17h 192.168.224.46 k8-host-0402 po/rook-ceph-mon107-p74rj 1/1 Running 0 16m 192.168.224.28 k8-host-0402 rook-ceph-mon1-56fgm 1/1 Running 0 2d 192.168.91.135 k8-host-0404 rook-ceph-mon2-rlxcd 1/1 Running 0 2d 192.168.123.33 k8-host-0403 rook-ceph-osd-bg2vj 1/1 Running 0 2d 192.168.91.177 k8-host-0404 rook-ceph-osd-mwxdm 1/1 Running 0 2d 192.168.123.31 k8-host-0403","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution","text":"What is happening here is that the MON pods are restarting and one or more of the Ceph daemons are not getting configured with the proper cluster information. This is commonly the result of not specifying a value for dataDirHostPath in your Cluster CRD. The dataDirHostPath setting specifies a path on the local host for the Ceph daemons to store configuration and data. Setting this to a path like /var/lib/rook , reapplying your Cluster CRD and restarting all the Ceph daemons (MON, MGR, OSD, RGW) should solve this problem. After the Ceph daemons have been restarted, it is advisable to restart the rook-tools pod .","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#monitors-are-the-only-pods-running","text":"","title":"Monitors are the only pods running"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_1","text":"Rook operator is running Either a single mon starts or the mons start very slowly (at least several minutes apart) The crash-collector pods are crashing No mgr, osd, or other daemons are created except the CSI driver","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_1","text":"When the operator is starting a cluster, the operator will start one mon at a time and check that they are healthy before continuing to bring up all three mons. If the first mon is not detected healthy, the operator will continue to check until it is healthy. If the first mon fails to start, a second and then a third mon may attempt to start. However, they will never form quorum and the orchestration will be blocked from proceeding. The crash-collector pods will be blocked from starting until the mons have formed quorum the first time. There are several common causes for the mons failing to form quorum: The operator pod does not have network connectivity to the mon pod(s). The network may be configured incorrectly. One or more mon pods are in running state, but the operator log shows they are not able to form quorum A mon is using configuration from a previous installation. See the cleanup guide for cleaning the previous cluster. A firewall may be blocking the ports required for the Ceph mons to form quorum. Ensure ports 6789 and 3300 are enabled. See the Ceph networking guide for more details. There may be MTU mismatch between different networking components. Some networks may be more susceptible to mismatch than others. If Kubernetes CNI or hosts enable jumbo frames (MTU 9000), Ceph will use large packets to maximize network bandwidth. If other parts of the networking chain don't support jumbo frames, this could result in lost or rejected packets unexpectedly.","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#operator-fails-to-connect-to-the-mon","text":"First look at the logs of the operator to confirm if it is able to connect to the mons. 1 kubectl -n rook-ceph logs -l app=rook-ceph-operator Likely you will see an error similar to the following that the operator is timing out when connecting to the mon. The last command is ceph mon_status , followed by a timeout message five minutes later. 1 2 3 4 5 6 2018-01-21 21:47:32.375833 I | exec: Running command: ceph mon_status --cluster=rook --conf=/var/lib/rook/rook-ceph/rook.config --keyring=/var/lib/rook/rook-ceph/client.admin.keyring --format json --out-file /tmp/442263890 2018-01-21 21:52:35.370533 I | exec: 2018-01-21 21:52:35.071462 7f96a3b82700 0 monclient(hunting): authenticate timed out after 300 2018-01-21 21:52:35.071462 7f96a3b82700 0 monclient(hunting): authenticate timed out after 300 2018-01-21 21:52:35.071524 7f96a3b82700 0 librados: client.admin authentication error (110) Connection timed out 2018-01-21 21:52:35.071524 7f96a3b82700 0 librados: client.admin authentication error (110) Connection timed out [errno 110] error connecting to the cluster The error would appear to be an authentication error, but it is misleading. The real issue is a timeout.","title":"Operator fails to connect to the mon"},{"location":"Troubleshooting/ceph-common-issues/#solution_1","text":"If you see the timeout in the operator log, verify if the mon pod is running (see the next section). If the mon pod is running, check the network connectivity between the operator pod and the mon pod. A common issue is that the CNI is not configured correctly. To verify the network connectivity: Get the endpoint for a mon Curl the mon from the operator pod For example, this command will curl the first mon from the operator: 1 2 $ kubectl -n rook-ceph exec deploy/rook-ceph-operator -- curl $( kubectl -n rook-ceph get svc -l app = rook-ceph-mon -o jsonpath = '{.items[0].spec.clusterIP}' ) :3300 2 >/dev/null ceph v2 If \"ceph v2\" is printed to the console, the connection was successful. If the command does not respond or otherwise fails, the network connection cannot be established.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#failing-mon-pod","text":"Second we need to verify if the mon pod started successfully. 1 2 3 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-69fb9c78cd-58szd 1/1 CrashLoopBackOff 2 47s If the mon pod is failing as in this example, you will need to look at the mon pod status or logs to determine the cause. If the pod is in a crash loop backoff state, you should see the reason by describing the pod. 1 2 3 4 5 6 7 8 # The pod shows a termination status that the keyring does not match the existing keyring $ kubectl -n rook-ceph describe pod -l mon = rook-ceph-mon0 ... Last State: Terminated Reason: Error Message: The keyring does not match the existing keyring in /var/lib/rook/rook-ceph-mon0/data/keyring. You may need to delete the contents of dataDirHostPath on the host from a previous deployment. ... See the solution in the next section regarding cleaning up the dataDirHostPath on the nodes.","title":"Failing mon pod"},{"location":"Troubleshooting/ceph-common-issues/#solution_2","text":"This is a common problem reinitializing the Rook cluster when the local directory used for persistence has not been purged. This directory is the dataDirHostPath setting in the cluster CRD and is typically set to /var/lib/rook . To fix the issue you will need to delete all components of Rook and then delete the contents of /var/lib/rook (or the directory specified by dataDirHostPath ) on each of the hosts in the cluster. Then when the cluster CRD is applied to start a new cluster, the rook-operator should start all the pods as expected. Caution Deleting the dataDirHostPath folder is destructive to the storage. Only delete the folder if you are trying to permanently purge the Rook cluster. See the Cleanup Guide for more details.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#pvcs-stay-in-pending-state","text":"","title":"PVCs stay in pending state"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_2","text":"When you create a PVC based on a rook storage class, it stays pending indefinitely For the Wordpress example, you might see two PVCs in pending state. 1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE mysql-pv-claim Pending rook-ceph-block 8s wp-pv-claim Pending rook-ceph-block 16s","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_2","text":"There are two common causes for the PVCs staying in pending state: There are no OSDs in the cluster The CSI provisioner pod is not running or is not responding to the request to provision the storage","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#confirm-if-there-are-osds","text":"To confirm if you have OSDs in your cluster, connect to the Rook Toolbox and run the ceph status command. You should see that you have at least one OSD up and in . The minimum number of OSDs required depends on the replicated.size setting in the pool created for the storage class. In a \"test\" cluster, only one OSD is required (see storageclass-test.yaml ). In the production storage class example ( storageclass.yaml ), three OSDs would be required. 1 2 3 4 5 6 7 8 9 $ ceph status cluster: id: a0452c76-30d9-4c1a-a948-5d8405f19a7c health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 11m) mgr: a(active, since 10m) osd: 1 osds: 1 up (since 46s), 1 in (since 109m)","title":"Confirm if there are OSDs"},{"location":"Troubleshooting/ceph-common-issues/#osd-prepare-logs","text":"If you don't see the expected number of OSDs, let's investigate why they weren't created. On each node where Rook looks for OSDs to configure, you will see an \"osd prepare\" pod. 1 2 3 $ kubectl -n rook-ceph get pod -l app = rook-ceph-osd-prepare NAME ... READY STATUS RESTARTS AGE rook-ceph-osd-prepare-minikube-9twvk 0/2 Completed 0 30m See the section on why OSDs are not getting created to investigate the logs.","title":"OSD Prepare Logs"},{"location":"Troubleshooting/ceph-common-issues/#csi-driver","text":"The CSI driver may not be responding to the requests. Look in the logs of the CSI provisioner pod to see if there are any errors during the provisioning. There are two provisioner pods: 1 kubectl -n rook-ceph get pod -l app=csi-rbdplugin-provisioner Get the logs of each of the pods. One of them should be the \"leader\" and be responding to requests. 1 kubectl -n rook-ceph logs csi-cephfsplugin-provisioner-d77bb49c6-q9hwq csi-provisioner See also the CSI Troubleshooting Guide .","title":"CSI Driver"},{"location":"Troubleshooting/ceph-common-issues/#operator-unresponsiveness","text":"Lastly, if you have OSDs up and in , the next step is to confirm the operator is responding to the requests. Look in the Operator pod logs around the time when the PVC was created to confirm if the request is being raised. If the operator does not show requests to provision the block image, the operator may be stuck on some other operation. In this case, restart the operator pod to get things going again.","title":"Operator unresponsiveness"},{"location":"Troubleshooting/ceph-common-issues/#solution_3","text":"If the \"osd prepare\" logs didn't give you enough clues about why the OSDs were not being created, please review your cluster.yaml configuration. The common misconfigurations include: If useAllDevices: true , Rook expects to find local devices attached to the nodes. If no devices are found, no OSDs will be created. If useAllDevices: false , OSDs will only be created if deviceFilter is specified. Only local devices attached to the nodes will be configurable by Rook. In other words, the devices must show up under /dev . The devices must not have any partitions or filesystems on them. Rook will only configure raw devices. Partitions are not yet supported.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#osd-pods-are-failing-to-start","text":"","title":"OSD pods are failing to start"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_3","text":"OSD pods are failing to start You have started a cluster after tearing down another cluster","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_3","text":"When an OSD starts, the device or directory will be configured for consumption. If there is an error with the configuration, the pod will crash and you will see the CrashLoopBackoff status for the pod. Look in the osd pod logs for an indication of the failure. 1 2 $ kubectl -n rook-ceph logs rook-ceph-osd-fl8fs ... One common case for failure is that you have re-deployed a test cluster and some state may remain from a previous deployment. If your cluster is larger than a few nodes, you may get lucky enough that the monitors were able to start and form quorum. However, now the OSDs pods may fail to start due to the old state. Looking at the OSD pod logs you will see an error about the file already existing. 1 2 3 4 5 6 7 $ kubectl -n rook-ceph logs rook-ceph-osd-fl8fs ... 2017-10-31 20:13:11.187106 I | mkfs-osd0: 2017-10-31 20:13:11.186992 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) _read_fsid unparsable uuid 2017-10-31 20:13:11.187208 I | mkfs-osd0: 2017-10-31 20:13:11.187026 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) _setup_block_symlink_or_file failed to create block symlink to /dev/disk/by-partuuid/651153ba-2dfc-4231-ba06-94759e5ba273: (17) File exists 2017-10-31 20:13:11.187233 I | mkfs-osd0: 2017-10-31 20:13:11.187038 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) mkfs failed, (17) File exists 2017-10-31 20:13:11.187254 I | mkfs-osd0: 2017-10-31 20:13:11.187042 7f0059d62e00 -1 OSD::mkfs: ObjectStore::mkfs failed with error (17) File exists 2017-10-31 20:13:11.187275 I | mkfs-osd0: 2017-10-31 20:13:11.187121 7f0059d62e00 -1 ** ERROR: error creating empty object store in /var/lib/rook/osd0: (17) File exists","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution_4","text":"If the error is from the file that already exists, this is a common problem reinitializing the Rook cluster when the local directory used for persistence has not been purged. This directory is the dataDirHostPath setting in the cluster CRD and is typically set to /var/lib/rook . To fix the issue you will need to delete all components of Rook and then delete the contents of /var/lib/rook (or the directory specified by dataDirHostPath ) on each of the hosts in the cluster. Then when the cluster CRD is applied to start a new cluster, the rook-operator should start all the pods as expected.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#osd-pods-are-not-created-on-my-devices","text":"","title":"OSD pods are not created on my devices"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_4","text":"No OSD pods are started in the cluster Devices are not configured with OSDs even though specified in the Cluster CRD One OSD pod is started on each node instead of multiple pods for each device","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_4","text":"First, ensure that you have specified the devices correctly in the CRD. The Cluster CRD has several ways to specify the devices that are to be consumed by the Rook storage: useAllDevices: true : Rook will consume all devices it determines to be available deviceFilter : Consume all devices that match this regular expression devices : Explicit list of device names on each node to consume Second, if Rook determines that a device is not available (has existing partitions or a formatted filesystem), Rook will skip consuming the devices. If Rook is not starting OSDs on the devices you expect, Rook may have skipped it for this reason. To see if a device was skipped, view the OSD preparation log on the node where the device was skipped. Note that it is completely normal and expected for OSD prepare pod to be in the completed state. After the job is complete, Rook leaves the pod around in case the logs need to be investigated. 1 2 3 4 5 6 # Get the prepare pods in the cluster $ kubectl -n rook-ceph get pod -l app = rook-ceph-osd-prepare NAME READY STATUS RESTARTS AGE rook-ceph-osd-prepare-node1-fvmrp 0/1 Completed 0 18m rook-ceph-osd-prepare-node2-w9xv9 0/1 Completed 0 22m rook-ceph-osd-prepare-node3-7rgnv 0/1 Completed 0 22m 1 2 3 # view the logs for the node of interest in the \"provision\" container $ kubectl -n rook-ceph logs rook-ceph-osd-prepare-node1-fvmrp provision [...] Here are some key lines to look for in the log: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 # A device will be skipped if Rook sees it has partitions or a filesystem 2019-05-30 19:02:57.353171 W | cephosd: skipping device sda that is in use 2019-05-30 19:02:57.452168 W | skipping device \"sdb5\": [\"Used by ceph-disk\"] # Other messages about a disk being unusable by ceph include: Insufficient space (<5GB) on vgs Insufficient space (<5GB) LVM detected Has BlueStore device label locked read-only # A device is going to be configured 2019-05-30 19:02:57.535598 I | cephosd: device sdc to be configured by ceph-volume # For each device configured you will see a report printed to the log 2019-05-30 19:02:59.844642 I | Type Path LV Size % of device 2019-05-30 19:02:59.844651 I | ---------------------------------------------------------------------------------------------------- 2019-05-30 19:02:59.844677 I | [data] /dev/sdc 7.00 GB 100%","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution_5","text":"Either update the CR with the correct settings, or clean the partitions or filesystem from your devices. To clean devices from a previous install see the cleanup guide . After the settings are updated or the devices are cleaned, trigger the operator to analyze the devices again by restarting the operator. Each time the operator starts, it will ensure all the desired devices are configured. The operator does automatically deploy OSDs in most scenarios, but an operator restart will cover any scenarios that the operator doesn't detect automatically. 1 2 3 # Restart the operator to ensure devices are configured. A new pod will automatically be started when the current operator pod is deleted. $ kubectl -n rook-ceph delete pod -l app = rook-ceph-operator [...]","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#node-hangs-after-reboot","text":"This issue is fixed in Rook v1.3 or later.","title":"Node hangs after reboot"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_5","text":"After issuing a reboot command, node never returned online Only a power cycle helps","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_5","text":"On a node running a pod with a Ceph persistent volume 1 2 3 4 mount | grep rbd # _netdev mount option is absent, also occurs for cephfs # OS is not aware PV is mounted over network /dev/rbdx on ... (rw,relatime, ..., noquota) When the reboot command is issued, network interfaces are terminated before disks are unmounted. This results in the node hanging as repeated attempts to unmount Ceph persistent volumes fail with the following error: 1 libceph: connect [monitor-ip]:6789 error -101","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution_6","text":"The node needs to be drained before reboot. After the successful drain, the node can be rebooted as usual. Because kubectl drain command automatically marks the node as unschedulable ( kubectl cordon effect), the node needs to be uncordoned once it's back online. Drain the node: 1 kubectl drain  --ignore-daemonsets --delete-local-data Uncordon the node: 1 kubectl uncordon ","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#using-multiple-shared-filesystem-cephfs-is-attempted-on-a-kernel-version-older-than-47","text":"","title":"Using multiple shared filesystem (CephFS) is attempted on a kernel version older than 4.7"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_6","text":"More than one shared filesystem (CephFS) has been created in the cluster A pod attempts to mount any other shared filesystem besides the first one that was created The pod incorrectly gets the first filesystem mounted instead of the intended filesystem","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#solution_7","text":"The only solution to this problem is to upgrade your kernel to 4.7 or higher. This is due to a mount flag added in the kernel version 4.7 which allows to chose the filesystem by name. For additional info on the kernel version requirement for multiple shared filesystems (CephFS), see Filesystem - Kernel version requirement .","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#set-debug-log-level-for-all-ceph-daemons","text":"You can set a given log level and apply it to all the Ceph daemons at the same time. For this, make sure the toolbox pod is running, then determine the level you want (between 0 and 20). You can find the list of all subsystems and their default values in Ceph logging and debug official guide . Be careful when increasing the level as it will produce very verbose logs. Assuming you want a log level of 1, you will run: 1 2 3 4 $ kubectl -n rook-ceph exec deploy/rook-ceph-tools -- set-ceph-debug-level 1 ceph config set global debug_context 1 ceph config set global debug_lockdep 1 [...] Once you are done debugging, you can revert all the debug flag to their default value by running the following: 1 kubectl -n rook-ceph exec deploy/rook-ceph-tools -- set-ceph-debug-level default","title":"Set debug log level for all Ceph daemons"},{"location":"Troubleshooting/ceph-common-issues/#activate-log-to-file-for-a-particular-ceph-daemon","text":"They are cases where looking at Kubernetes logs is not enough for diverse reasons, but just to name a few: not everyone is familiar for Kubernetes logging and expects to find logs in traditional directories logs get eaten (buffer limit from the log engine) and thus not requestable from Kubernetes So for each daemon, dataDirHostPath is used to store logs, if logging is activated. Rook will bindmount dataDirHostPath for every pod. Let's say you want to enable logging for mon.a , but only for this daemon. Using the toolbox or from inside the operator run: 1 ceph config set mon.a log_to_file true This will activate logging on the filesystem, you will be able to find logs in dataDirHostPath/$NAMESPACE/log , so typically this would mean /var/lib/rook/rook-ceph/log . You don't need to restart the pod, the effect will be immediate. To disable the logging on file, simply set log_to_file to false .","title":"Activate log to file for a particular Ceph daemon"},{"location":"Troubleshooting/ceph-common-issues/#a-worker-node-using-rbd-devices-hangs-up","text":"","title":"A worker node using RBD devices hangs up"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_7","text":"There is no progress on I/O from/to one of RBD devices ( /dev/rbd* or /dev/nbd* ). After that, the whole worker node hangs up.","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_6","text":"This happens when the following conditions are satisfied. The problematic RBD device and the corresponding OSDs are co-located. There is an XFS filesystem on top of this device. In addition, when this problem happens, you can see the following messages in dmesg . 1 2 3 4 5 6 $ dmesg ... [51717.039319] INFO: task kworker/2:1:5938 blocked for more than 120 seconds. [51717.039361] Not tainted 4.15.0-72-generic #81-Ubuntu [51717.039388] \"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\" disables this message. ... It's so-called hung_task problem and means that there is a deadlock in the kernel. For more detail, please refer to the corresponding issue comment .","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution_8","text":"This problem will be solve by the following two fixes. Linux kernel: A minor feature that is introduced by this commit . It will be included in Linux v5.6. Ceph: A fix that uses the above-mentioned kernel's feature. The Ceph community will probably discuss this fix after releasing Linux v5.6. You can bypass this problem by using ext4 or any other filesystems rather than XFS. Filesystem type can be specified with csi.storage.k8s.io/fstype in StorageClass resource.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#too-few-pgs-per-osd-warning-is-shown","text":"","title":"Too few PGs per OSD warning is shown"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_8","text":"ceph status shows \"too few PGs per OSD\" warning as follows. 1 2 3 4 5 6 $ ceph status cluster: id: fd06d7c3-5c5c-45ca-bdea-1cf26b783065 health: HEALTH_WARN too few PGs per OSD (16 < min 30) [...]","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#solution_9","text":"The meaning of this warning is written in the document . However, in many cases it is benign. For more information, please see the blog entry . Please refer to Configuring Pools if you want to know the proper pg_num of pools and change these values.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#lvm-metadata-can-be-corrupted-with-osd-on-lv-backed-pvc","text":"","title":"LVM metadata can be corrupted with OSD on LV-backed PVC"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_9","text":"There is a critical flaw in OSD on LV-backed PVC. LVM metadata can be corrupted if both the host and OSD container modify it simultaneously. For example, the administrator might modify it on the host, while the OSD initialization process in a container could modify it too. In addition, if lvmetad is running, the possibility of occurrence gets higher. In this case, the change of LVM metadata in OSD container is not reflected to LVM metadata cache in host for a while. If you still decide to configure an OSD on LVM, please keep the following in mind to reduce the probability of this issue.","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#solution_10","text":"Disable lvmetad. Avoid configuration of LVs from the host. In addition, don't touch the VGs and physical volumes that back these LVs. Avoid incrementing the count field of storageClassDeviceSets and create a new LV that backs an OSD simultaneously. You can know whether the above-mentioned tag exists with the command: sudo lvs -o lv_name,lv_tags . If the lv_tag field is empty in an LV corresponding to the OSD lv_tags, this OSD encountered the problem. In this case, please retire this OSD or replace with other new OSD before restarting. This problem doesn't happen in newly created LV-backed PVCs because OSD container doesn't modify LVM metadata anymore. The existing lvm mode OSDs work continuously even thought upgrade your Rook. However, using the raw mode OSDs is recommended because of the above-mentioned problem. You can replace the existing OSDs with raw mode OSDs by retiring them and adding new OSDs one by one. See the documents Remove an OSD and Add an OSD on a PVC .","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#osd-prepare-job-fails-due-to-low-aio-max-nr-setting","text":"If the Kernel is configured with a low aio-max-nr setting , the OSD prepare job might fail with the following error: 1 exec: stderr: 2020-09-17T00:30:12.145+0000 7f0c17632f40 -1 bdev(0x56212de88700 /var/lib/ceph/osd/ceph-0//block) _aio_start io_setup(2) failed with EAGAIN; try increasing /proc/sys/fs/aio-max-nr To overcome this, you need to increase the value of fs.aio-max-nr of your sysctl configuration (typically /etc/sysctl.conf ). You can do this with your favorite configuration management system. Alternatively, you can have a DaemonSet to apply the configuration for you on all your nodes.","title":"OSD prepare job fails due to low aio-max-nr setting"},{"location":"Troubleshooting/ceph-common-issues/#unexpected-partitions-created","text":"","title":"Unexpected partitions created"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_10","text":"Users running Rook versions v1.6.0-v1.6.7 may observe unwanted OSDs on partitions that appear unexpectedly and seemingly randomly, which can corrupt existing OSDs. Unexpected partitions are created on host disks that are used by Ceph OSDs. This happens more often on SSDs than HDDs and usually only on disks that are 875GB or larger. Many tools like lsblk , blkid , udevadm , and parted will not show a partition table type for the partition. Newer versions of blkid are generally able to recognize the type as \"atari\". The underlying issue causing this is Atari partition (sometimes identified as AHDI) support in the Linux kernel. Atari partitions have very relaxed specifications compared to other partition types, and it is relatively easy for random data written to a disk to appear as an Atari partition to the Linux kernel. Ceph's Bluestore OSDs have an anecdotally high probability of writing data on to disks that can appear to the kernel as an Atari partition. Below is an example of lsblk output from a node where phantom Atari partitions are present. Note that sdX1 is never present for the phantom partitions, and sdX2 is 48G on all disks. sdX3 is a variable size and may not always be present. It is possible for sdX4 to appear, though it is an anecdotally rare event. 1 2 3 4 5 6 7 8 9 10 11 # lsblk NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT sdb 8:16 0 3T 0 disk \u251c\u2500sdb2 8:18 0 48G 0 part \u2514\u2500sdb3 8:19 0 6.1M 0 part sdc 8:32 0 3T 0 disk \u251c\u2500sdc2 8:34 0 48G 0 part \u2514\u2500sdc3 8:35 0 6.2M 0 part sdd 8:48 0 3T 0 disk \u251c\u2500sdd2 8:50 0 48G 0 part \u2514\u2500sdd3 8:51 0 6.3M 0 part You can see GitHub rook/rook - Issue 7940 unexpected partition on disks >= 1TB (atari partitions) for more detailed information and discussion.","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#solution_11","text":"","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#recover-from-corruption-v160-v167","text":"If you are using Rook v1.6, you must first update to v1.6.8 or higher to avoid further incidents of OSD corruption caused by these Atari partitions. An old workaround suggested using deviceFilter: ^sd[a-z]+$ , but this still results in unexpected partitions. Rook will merely stop creating new OSDs on the partitions. It does not fix a related issue that ceph-volume that is unaware of the Atari partition problem. Users who used this workaround are still at risk for OSD failures in the future. To resolve the issue, immediately update to v1.6.8 or higher. After the update, no corruption should occur on OSDs created in the future. Next, to get back to a healthy Ceph cluster state, focus on one corrupted disk at a time and remove all OSDs on each corrupted disk one disk at a time. As an example, you may have /dev/sdb with two unexpected partitions ( /dev/sdb2 and /dev/sdb3 ) as well as a second corrupted disk /dev/sde with one unexpected partition ( /dev/sde2 ). First, remove the OSDs associated with /dev/sdb , /dev/sdb2 , and /dev/sdb3 . There might be only one, or up to 3 OSDs depending on how your system was affected. Again see the OSD management doc . Use dd to wipe the first sectors of the partitions followed by the disk itself. E.g., dd if=/dev/zero of=/dev/sdb2 bs=1M dd if=/dev/zero of=/dev/sdb3 bs=1M dd if=/dev/zero of=/dev/sdb bs=1M Then wipe clean /dev/sdb to prepare it for a new OSD. See the teardown document for details. After this, scale up the Rook operator to deploy a new OSD to /dev/sdb . This will allow Ceph to use /dev/sdb for data recovery and replication while the next OSDs are removed. Now Repeat steps 1-4 for /dev/sde and /dev/sde2 , and continue for any other corrupted disks. If your Rook cluster does not have any critical data stored in it, it may be simpler to uninstall Rook completely and redeploy with v1.6.8 or higher.","title":"Recover from corruption (v1.6.0-v1.6.7)"},{"location":"Troubleshooting/ceph-common-issues/#operator-environment-variables-are-ignored","text":"","title":"Operator environment variables are ignored"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_11","text":"Configuration settings passed as environment variables do not take effect as expected. For example, the discover daemonset is not created, even though ROOK_ENABLE_DISCOVERY_DAEMON=\"true\" is set.","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_7","text":"Inspect the rook-ceph-operator-config ConfigMap for conflicting settings. The ConfigMap takes precedence over the environment. The ConfigMap must exist , even if all actual configuration is supplied through the environment. Look for lines with the op-k8sutil prefix in the operator logs. These lines detail the final values, and source, of the different configuration variables. Verify that both of the following messages are present in the operator logs: 1 2 rook-ceph-operator-config-controller successfully started rook-ceph-operator-config-controller done reconciling","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution_12","text":"If it does not exist, create an empty ConfigMap: 1 2 3 4 5 6 kind : ConfigMap apiVersion : v1 metadata : name : rook-ceph-operator-config namespace : rook-ceph # namespace:operator data : {} If the ConfigMap exists, remove any keys that you wish to configure through the environment.","title":"Solution"},{"location":"Troubleshooting/ceph-csi-common-issues/","text":"Issues when provisioning volumes with the Ceph CSI driver can happen for many reasons such as: Network connectivity between CSI pods and ceph Cluster health issues Slow operations Kubernetes issues Ceph-CSI configuration or bugs The following troubleshooting steps can help identify a number of issues. Block (RBD) \u00b6 If you are mounting block volumes (usually RWO), these are referred to as RBD volumes in Ceph. See the sections below for RBD if you are having block volume issues. Shared Filesystem (CephFS) \u00b6 If you are mounting shared filesystem volumes (usually RWX), these are referred to as CephFS volumes in Ceph. See the sections below for CephFS if you are having filesystem volume issues. Network Connectivity \u00b6 The Ceph monitors are the most critical component of the cluster to check first. Retrieve the mon endpoints from the services: 1 2 3 4 5 $ kubectl -n rook-ceph get svc -l app = rook-ceph-mon NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mon-a ClusterIP 10.104.165.31  6789/TCP,3300/TCP 18h rook-ceph-mon-b ClusterIP 10.97.244.93  6789/TCP,3300/TCP 21s rook-ceph-mon-c ClusterIP 10.99.248.163  6789/TCP,3300/TCP 8s If host networking is enabled in the CephCluster CR, you will instead need to find the node IPs for the hosts where the mons are running. The clusterIP is the mon IP and 3300 is the port that will be used by Ceph-CSI to connect to the ceph cluster. These endpoints must be accessible by all clients in the cluster, including the CSI driver. If you are seeing issues provisioning the PVC then you need to check the network connectivity from the provisioner pods. For CephFS PVCs, check network connectivity from the csi-cephfsplugin container of the csi-cephfsplugin-provisioner pods For Block PVCs, check network connectivity from the csi-rbdplugin container of the csi-rbdplugin-provisioner pods For redundancy, there are two provisioner pods for each type. Make sure to test connectivity from all provisioner pods. Connect to the provisioner pods and verify the connection to the mon endpoints such as the following: 1 2 3 4 5 6 # Connect to the csi-cephfsplugin container in the provisioner pod kubectl -n rook-ceph exec -ti deploy/csi-cephfsplugin-provisioner -c csi-cephfsplugin -- bash # Test the network connection to the mon endpoint curl 10.104.165.31:3300 2>/dev/null ceph v2 If you see the response \"ceph v2\", the connection succeeded. If there is no response then there is a network issue connecting to the ceph cluster. Check network connectivity for all monitor IP\u2019s and ports which are passed to ceph-csi. Ceph Health \u00b6 Sometimes an unhealthy Ceph cluster can contribute to the issues in creating or mounting the PVC. Check that your Ceph cluster is healthy by connecting to the Toolbox and running the ceph commands: 1 ceph health detail 1 HEALTH_OK Slow Operations \u00b6 Even slow ops in the ceph cluster can contribute to the issues. In the toolbox, make sure that no slow ops are present and the ceph cluster is healthy 1 2 3 4 5 6 $ ceph -s cluster: id: ba41ac93-3b55-4f32-9e06-d3d8c6ff7334 health: HEALTH_WARN 30 slow ops, oldest one blocked for 10624 sec, mon.a has slow ops [...] If Ceph is not healthy, check the following health for more clues: The Ceph monitor logs for errors The OSD logs for errors Disk Health Network Health Ceph Troubleshooting \u00b6 Check if the RBD Pool exists \u00b6 Make sure the pool you have specified in the storageclass.yaml exists in the ceph cluster. Suppose the pool name mentioned in the storageclass.yaml is replicapool . It can be verified to exist in the toolbox: 1 2 3 $ ceph osd lspools 1 device_health_metrics 2 replicapool If the pool is not in the list, create the CephBlockPool CR for the pool if you have not already. If you have already created the pool, check the Rook operator log for errors creating the pool. Check if the Filesystem exists \u00b6 For the shared filesystem (CephFS), check that the filesystem and pools you have specified in the storageclass.yaml exist in the Ceph cluster. Suppose the fsName name mentioned in the storageclass.yaml is myfs . It can be verified in the toolbox: 1 2 $ ceph fs ls name: myfs, metadata pool: myfs-metadata, data pools: [myfs-data0 ] Now verify the pool mentioned in the storageclass.yaml exists, such as the example myfs-data0 . 1 2 3 4 5 ceph osd lspools 1 device_health_metrics 2 replicapool 3 myfs-metadata0 4 myfs-data0 The pool for the filesystem will have the suffix -data0 compared the filesystem name that is created by the CephFilesystem CR. subvolumegroups \u00b6 If the subvolumegroup is not specified in the ceph-csi configmap (where you have passed the ceph monitor information), Ceph-CSI creates the default subvolumegroup with the name csi. Verify that the subvolumegroup exists: 1 2 3 4 5 6 $ ceph fs subvolumegroup ls myfs [ { \"name\": \"csi\" } ] If you don\u2019t see any issues with your Ceph cluster, the following sections will start debugging the issue from the CSI side. Provisioning Volumes \u00b6 At times the issue can also exist in the Ceph-CSI or the sidecar containers used in Ceph-CSI. Ceph-CSI has included number of sidecar containers in the provisioner pods such as: csi-attacher , csi-resizer , csi-provisioner , csi-cephfsplugin , csi-snapshotter , and liveness-prometheus . The CephFS provisioner core CSI driver container name is csi-cephfsplugin as one of the container names. For the RBD (Block) provisioner you will see csi-rbdplugin as the container name. Here is a summary of the sidecar containers: csi-provisioner \u00b6 The external-provisioner is a sidecar container that dynamically provisions volumes by calling ControllerCreateVolume() and ControllerDeleteVolume() functions of CSI drivers. More details about external-provisioner can be found here. If there is an issue with PVC Create or Delete, check the logs of the csi-provisioner sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-provisioner csi-resizer \u00b6 The CSI external-resizer is a sidecar container that watches the Kubernetes API server for PersistentVolumeClaim updates and triggers ControllerExpandVolume operations against a CSI endpoint if the user requested more storage on the PersistentVolumeClaim object. More details about external-provisioner can be found here. If any issue exists in PVC expansion you can check the logs of the csi-resizer sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-resizer csi-snapshotter \u00b6 The CSI external-snapshotter sidecar only watches for VolumeSnapshotContent create/update/delete events. It will talk to ceph-csi containers to create or delete snapshots. More details about external-snapshotter can be found here . In Kubernetes 1.17 the volume snapshot feature was promoted to beta. In Kubernetes 1.20, the feature gate is enabled by default on standard Kubernetes deployments and cannot be turned off. Make sure you have installed the correct snapshotter CRD version. If you have not installed the snapshotter controller, see the Snapshots guide . 1 2 3 4 $ kubectl get crd | grep snapshot volumesnapshotclasses.snapshot.storage.k8s.io 2021-01-25T11:19:38Z volumesnapshotcontents.snapshot.storage.k8s.io 2021-01-25T11:19:39Z volumesnapshots.snapshot.storage.k8s.io 2021-01-25T11:19:40Z The above CRDs must have the matching version in your snapshotclass.yaml or snapshot.yaml . Otherwise, the VolumeSnapshot and VolumesnapshotContent will not be created. The snapshot controller is responsible for creating both VolumeSnapshot and VolumesnapshotContent object. If the objects are not getting created, you may need to check the logs of the snapshot-controller container. Rook only installs the snapshotter sidecar container, not the controller. It is recommended that Kubernetes distributors bundle and deploy the controller and CRDs as part of their Kubernetes cluster management process (independent of any CSI Driver). If your Kubernetes distribution does not bundle the snapshot controller, you may manually install these components. If any issue exists in the snapshot Create/Delete operation you can check the logs of the csi-snapshotter sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-snapshotter If you see an error about a volume already existing such as: 1 2 GRPC error: rpc error: code = Aborted desc = an operation with the given Volume ID 0001-0009-rook-ceph-0000000000000001-8d0ba728-0e17-11eb-a680-ce6eecc894de already exists. The issue typically is in the Ceph cluster or network connectivity. If the issue is in Provisioning the PVC Restarting the Provisioner pods help(for CephFS issue restart csi-cephfsplugin-provisioner-xxxxxx CephFS Provisioner. For RBD, restart the csi-rbdplugin-provisioner-xxxxxx pod. If the issue is in mounting the PVC, restart the csi-rbdplugin-xxxxx pod (for RBD) and the csi-cephfsplugin-xxxxx pod for CephFS issue. Mounting the volume to application pods \u00b6 When a user requests to create the application pod with PVC, there is a three-step process CSI driver registration Create volume attachment object Stage and publish the volume csi-driver registration \u00b6 csi-cephfsplugin-xxxx or csi-rbdplugin-xxxx is a daemonset pod running on all the nodes where your application gets scheduled. If the plugin pods are not running on the node where your application is scheduled might cause the issue, make sure plugin pods are always running. Each plugin pod has two important containers: one is driver-registrar and csi-rbdplugin or csi-cephfsplugin . Sometimes there is also a liveness-prometheus container. driver-registrar \u00b6 The node-driver-registrar is a sidecar container that registers the CSI driver with Kubelet. More details can be found here . If any issue exists in attaching the PVC to the application pod check logs from driver-registrar sidecar container in plugin pod where your application pod is scheduled. 1 2 3 4 5 6 7 8 9 10 11 12 $ kubectl -n rook-ceph logs deploy/csi-rbdplugin -c driver-registrar [...] I0120 12:28:34.231761 124018 main.go:112] Version: v2.0.1 I0120 12:28:34.233910 124018 connection.go:151] Connecting to unix:///csi/csi.sock I0120 12:28:35.242469 124018 node_register.go:55] Starting Registration Server at: /registration/rook-ceph.rbd.csi.ceph.com-reg.sock I0120 12:28:35.243364 124018 node_register.go:64] Registration Server started at: /registration/rook-ceph.rbd.csi.ceph.com-reg.sock I0120 12:28:35.243673 124018 node_register.go:86] Skipping healthz server because port set to: 0 I0120 12:28:36.318482 124018 main.go:79] Received GetInfo call: &InfoRequest{} I0120 12:28:37.455211 124018 main.go:89] Received NotifyRegistrationStatus call: &RegistrationStatus{PluginRegistered:true,Error:,} E0121 05:19:28.658390 124018 connection.go:129] Lost connection to unix:///csi/csi.sock. E0125 07:11:42.926133 124018 connection.go:129] Lost connection to unix:///csi/csi.sock. [...] You should see the response RegistrationStatus{PluginRegistered:true,Error:,} in the logs to confirm that plugin is registered with kubelet. If you see a driver not found an error in the application pod describe output. Restarting the csi-xxxxplugin-xxx pod on the node may help. Volume Attachment \u00b6 Each provisioner pod also has a sidecar container called csi-attacher . csi-attacher \u00b6 The external-attacher is a sidecar container that attaches volumes to nodes by calling ControllerPublish and ControllerUnpublish functions of CSI drivers. It is necessary because the internal Attach/Detach controller running in Kubernetes controller-manager does not have any direct interfaces to CSI drivers. More details can be found here . If any issue exists in attaching the PVC to the application pod first check the volumeattachment object created and also log from csi-attacher sidecar container in provisioner pod. 1 2 3 $ kubectl get volumeattachment NAME ATTACHER PV NODE ATTACHED AGE csi-75903d8a902744853900d188f12137ea1cafb6c6f922ebc1c116fd58e950fc92 rook-ceph.cephfs.csi.ceph.com pvc-5c547d2a-fdb8-4cb2-b7fe-e0f30b88d454 minikube true 4m26s 1 kubectl logs po/csi-rbdplugin-provisioner-d857bfb5f-ddctl -c csi-attacher CephFS Stale operations \u00b6 Check for any stale mount commands on the csi-cephfsplugin-xxxx pod on the node where your application pod is scheduled. You need to exec in the csi-cephfsplugin-xxxx pod and grep for stale mount operators. Identify the csi-cephfsplugin-xxxx pod running on the node where your application is scheduled with kubectl get po -o wide and match the node names. 1 2 3 4 $ kubectl exec -it csi-cephfsplugin-tfk2g -c csi-cephfsplugin -- sh $ ps -ef | grep mount [...] root 67 60 0 11:55 pts/0 00:00:00 grep mount 1 2 3 4 ps -ef |grep ceph [...] root 1 0 0 Jan20 ? 00:00:26 /usr/local/bin/cephcsi --nodeid=minikube --type=cephfs --endpoint=unix:///csi/csi.sock --v=0 --nodeserver=true --drivername=rook-ceph.cephfs.csi.ceph.com --pidlimit=-1 --metricsport=9091 --forcecephkernelclient=true --metricspath=/metrics --enablegrpcmetrics=true root 69 60 0 11:55 pts/0 00:00:00 grep ceph If any commands are stuck check the dmesg logs from the node. Restarting the csi-cephfsplugin pod may also help sometimes. If you don\u2019t see any stuck messages, confirm the network connectivity, Ceph health, and slow ops. RBD Stale operations \u00b6 Check for any stale map/mkfs/mount commands on the csi-rbdplugin-xxxx pod on the node where your application pod is scheduled. You need to exec in the csi-rbdplugin-xxxx pod and grep for stale operators like ( rbd map, rbd unmap, mkfs, mount and umount ). Identify the csi-rbdplugin-xxxx pod running on the node where your application is scheduled with kubectl get po -o wide and match the node names. 1 2 3 4 $ kubectl exec -it csi-rbdplugin-vh8d5 -c csi-rbdplugin -- sh $ ps -ef | grep map [...] root 1297024 1296907 0 12:00 pts/0 00:00:00 grep map 1 2 3 4 5 $ ps -ef | grep mount [...] root 1824 1 0 Jan19 ? 00:00:00 /usr/sbin/rpc.mountd ceph 1041020 1040955 1 07:11 ? 00:03:43 ceph-mgr --fsid=ba41ac93-3b55-4f32-9e06-d3d8c6ff7334 --keyring=/etc/ceph/keyring-store/keyring --log-to-stderr=true --err-to-stderr=true --mon-cluster-log-to-stderr=true --log-stderr-prefix=debug --default-log-to-file=false --default-mon-cluster-log-to-file=false --mon-host=[v2:10.111.136.166:3300,v1:10.111.136.166:6789] --mon-initial-members=a --id=a --setuser=ceph --setgroup=ceph --client-mount-uid=0 --client-mount-gid=0 --foreground --public-addr=172.17.0.6 root 1297115 1296907 0 12:00 pts/0 00:00:00 grep mount 1 2 3 $ ps -ef | grep mkfs [...] root 1297291 1296907 0 12:00 pts/0 00:00:00 grep mkfs 1 2 3 $ ps -ef | grep umount [...] root 1298500 1296907 0 12:01 pts/0 00:00:00 grep umount 1 2 3 $ ps -ef | grep unmap [...] root 1298578 1296907 0 12:01 pts/0 00:00:00 grep unmap If any commands are stuck check the dmesg logs from the node. Restarting the csi-rbdplugin pod also may help sometimes. If you don\u2019t see any stuck messages, confirm the network connectivity, Ceph health, and slow ops. dmesg logs \u00b6 Check the dmesg logs on the node where pvc mounting is failing or the csi-rbdplugin container of the csi-rbdplugin-xxxx pod on that node. 1 dmesg RBD Commands \u00b6 If nothing else helps, get the last executed command from the ceph-csi pod logs and run it manually inside the provisioner or plugin pod to see if there are errors returned even if they couldn't be seen in the logs. 1 rbd ls --id=csi-rbd-node -m=10.111.136.166:6789 --key=AQDpIQhg+v83EhAAgLboWIbl+FL/nThJzoI3Fg== Where -m is one of the mon endpoints and the --key is the key used by the CSI driver for accessing the Ceph cluster. Node Loss \u00b6 When a node is lost, you will see application pods on the node stuck in the Terminating state while another pod is rescheduled and is in the ContainerCreating state. Important For clusters with Kubernetes version 1.26 or greater, see the improved automation to recover from the node loss. If using K8s 1.25 or older, continue with these instructions. Force deleting the pod \u00b6 To force delete the pod stuck in the Terminating state: 1 kubectl -n rook-ceph delete pod my-app-69cd495f9b-nl6hf --grace-period 0 --force After the force delete, wait for a timeout of about 8-10 minutes. If the pod still not in the running state, continue with the next section to blocklist the node. Blocklisting a node \u00b6 To shorten the timeout, you can mark the node as \"blocklisted\" from the Rook toolbox so Rook can safely failover the pod sooner. 1 2 $ ceph osd blocklist add  # get the node IP you want to blocklist blocklisting  After running the above command within a few minutes the pod will be running. Removing a node blocklist \u00b6 After you are absolutely sure the node is permanently offline and that the node no longer needs to be blocklisted, remove the node from the blocklist. 1 2 $ ceph osd blocklist rm  un-blocklisting ","title":"CSI Common Issues"},{"location":"Troubleshooting/ceph-csi-common-issues/#block-rbd","text":"If you are mounting block volumes (usually RWO), these are referred to as RBD volumes in Ceph. See the sections below for RBD if you are having block volume issues.","title":"Block (RBD)"},{"location":"Troubleshooting/ceph-csi-common-issues/#shared-filesystem-cephfs","text":"If you are mounting shared filesystem volumes (usually RWX), these are referred to as CephFS volumes in Ceph. See the sections below for CephFS if you are having filesystem volume issues.","title":"Shared Filesystem (CephFS)"},{"location":"Troubleshooting/ceph-csi-common-issues/#network-connectivity","text":"The Ceph monitors are the most critical component of the cluster to check first. Retrieve the mon endpoints from the services: 1 2 3 4 5 $ kubectl -n rook-ceph get svc -l app = rook-ceph-mon NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mon-a ClusterIP 10.104.165.31  6789/TCP,3300/TCP 18h rook-ceph-mon-b ClusterIP 10.97.244.93  6789/TCP,3300/TCP 21s rook-ceph-mon-c ClusterIP 10.99.248.163  6789/TCP,3300/TCP 8s If host networking is enabled in the CephCluster CR, you will instead need to find the node IPs for the hosts where the mons are running. The clusterIP is the mon IP and 3300 is the port that will be used by Ceph-CSI to connect to the ceph cluster. These endpoints must be accessible by all clients in the cluster, including the CSI driver. If you are seeing issues provisioning the PVC then you need to check the network connectivity from the provisioner pods. For CephFS PVCs, check network connectivity from the csi-cephfsplugin container of the csi-cephfsplugin-provisioner pods For Block PVCs, check network connectivity from the csi-rbdplugin container of the csi-rbdplugin-provisioner pods For redundancy, there are two provisioner pods for each type. Make sure to test connectivity from all provisioner pods. Connect to the provisioner pods and verify the connection to the mon endpoints such as the following: 1 2 3 4 5 6 # Connect to the csi-cephfsplugin container in the provisioner pod kubectl -n rook-ceph exec -ti deploy/csi-cephfsplugin-provisioner -c csi-cephfsplugin -- bash # Test the network connection to the mon endpoint curl 10.104.165.31:3300 2>/dev/null ceph v2 If you see the response \"ceph v2\", the connection succeeded. If there is no response then there is a network issue connecting to the ceph cluster. Check network connectivity for all monitor IP\u2019s and ports which are passed to ceph-csi.","title":"Network Connectivity"},{"location":"Troubleshooting/ceph-csi-common-issues/#ceph-health","text":"Sometimes an unhealthy Ceph cluster can contribute to the issues in creating or mounting the PVC. Check that your Ceph cluster is healthy by connecting to the Toolbox and running the ceph commands: 1 ceph health detail 1 HEALTH_OK","title":"Ceph Health"},{"location":"Troubleshooting/ceph-csi-common-issues/#slow-operations","text":"Even slow ops in the ceph cluster can contribute to the issues. In the toolbox, make sure that no slow ops are present and the ceph cluster is healthy 1 2 3 4 5 6 $ ceph -s cluster: id: ba41ac93-3b55-4f32-9e06-d3d8c6ff7334 health: HEALTH_WARN 30 slow ops, oldest one blocked for 10624 sec, mon.a has slow ops [...] If Ceph is not healthy, check the following health for more clues: The Ceph monitor logs for errors The OSD logs for errors Disk Health Network Health","title":"Slow Operations"},{"location":"Troubleshooting/ceph-csi-common-issues/#ceph-troubleshooting","text":"","title":"Ceph Troubleshooting"},{"location":"Troubleshooting/ceph-csi-common-issues/#check-if-the-rbd-pool-exists","text":"Make sure the pool you have specified in the storageclass.yaml exists in the ceph cluster. Suppose the pool name mentioned in the storageclass.yaml is replicapool . It can be verified to exist in the toolbox: 1 2 3 $ ceph osd lspools 1 device_health_metrics 2 replicapool If the pool is not in the list, create the CephBlockPool CR for the pool if you have not already. If you have already created the pool, check the Rook operator log for errors creating the pool.","title":"Check if the RBD Pool exists"},{"location":"Troubleshooting/ceph-csi-common-issues/#check-if-the-filesystem-exists","text":"For the shared filesystem (CephFS), check that the filesystem and pools you have specified in the storageclass.yaml exist in the Ceph cluster. Suppose the fsName name mentioned in the storageclass.yaml is myfs . It can be verified in the toolbox: 1 2 $ ceph fs ls name: myfs, metadata pool: myfs-metadata, data pools: [myfs-data0 ] Now verify the pool mentioned in the storageclass.yaml exists, such as the example myfs-data0 . 1 2 3 4 5 ceph osd lspools 1 device_health_metrics 2 replicapool 3 myfs-metadata0 4 myfs-data0 The pool for the filesystem will have the suffix -data0 compared the filesystem name that is created by the CephFilesystem CR.","title":"Check if the Filesystem exists"},{"location":"Troubleshooting/ceph-csi-common-issues/#subvolumegroups","text":"If the subvolumegroup is not specified in the ceph-csi configmap (where you have passed the ceph monitor information), Ceph-CSI creates the default subvolumegroup with the name csi. Verify that the subvolumegroup exists: 1 2 3 4 5 6 $ ceph fs subvolumegroup ls myfs [ { \"name\": \"csi\" } ] If you don\u2019t see any issues with your Ceph cluster, the following sections will start debugging the issue from the CSI side.","title":"subvolumegroups"},{"location":"Troubleshooting/ceph-csi-common-issues/#provisioning-volumes","text":"At times the issue can also exist in the Ceph-CSI or the sidecar containers used in Ceph-CSI. Ceph-CSI has included number of sidecar containers in the provisioner pods such as: csi-attacher , csi-resizer , csi-provisioner , csi-cephfsplugin , csi-snapshotter , and liveness-prometheus . The CephFS provisioner core CSI driver container name is csi-cephfsplugin as one of the container names. For the RBD (Block) provisioner you will see csi-rbdplugin as the container name. Here is a summary of the sidecar containers:","title":"Provisioning Volumes"},{"location":"Troubleshooting/ceph-csi-common-issues/#csi-provisioner","text":"The external-provisioner is a sidecar container that dynamically provisions volumes by calling ControllerCreateVolume() and ControllerDeleteVolume() functions of CSI drivers. More details about external-provisioner can be found here. If there is an issue with PVC Create or Delete, check the logs of the csi-provisioner sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-provisioner","title":"csi-provisioner"},{"location":"Troubleshooting/ceph-csi-common-issues/#csi-resizer","text":"The CSI external-resizer is a sidecar container that watches the Kubernetes API server for PersistentVolumeClaim updates and triggers ControllerExpandVolume operations against a CSI endpoint if the user requested more storage on the PersistentVolumeClaim object. More details about external-provisioner can be found here. If any issue exists in PVC expansion you can check the logs of the csi-resizer sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-resizer","title":"csi-resizer"},{"location":"Troubleshooting/ceph-csi-common-issues/#csi-snapshotter","text":"The CSI external-snapshotter sidecar only watches for VolumeSnapshotContent create/update/delete events. It will talk to ceph-csi containers to create or delete snapshots. More details about external-snapshotter can be found here . In Kubernetes 1.17 the volume snapshot feature was promoted to beta. In Kubernetes 1.20, the feature gate is enabled by default on standard Kubernetes deployments and cannot be turned off. Make sure you have installed the correct snapshotter CRD version. If you have not installed the snapshotter controller, see the Snapshots guide . 1 2 3 4 $ kubectl get crd | grep snapshot volumesnapshotclasses.snapshot.storage.k8s.io 2021-01-25T11:19:38Z volumesnapshotcontents.snapshot.storage.k8s.io 2021-01-25T11:19:39Z volumesnapshots.snapshot.storage.k8s.io 2021-01-25T11:19:40Z The above CRDs must have the matching version in your snapshotclass.yaml or snapshot.yaml . Otherwise, the VolumeSnapshot and VolumesnapshotContent will not be created. The snapshot controller is responsible for creating both VolumeSnapshot and VolumesnapshotContent object. If the objects are not getting created, you may need to check the logs of the snapshot-controller container. Rook only installs the snapshotter sidecar container, not the controller. It is recommended that Kubernetes distributors bundle and deploy the controller and CRDs as part of their Kubernetes cluster management process (independent of any CSI Driver). If your Kubernetes distribution does not bundle the snapshot controller, you may manually install these components. If any issue exists in the snapshot Create/Delete operation you can check the logs of the csi-snapshotter sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-snapshotter If you see an error about a volume already existing such as: 1 2 GRPC error: rpc error: code = Aborted desc = an operation with the given Volume ID 0001-0009-rook-ceph-0000000000000001-8d0ba728-0e17-11eb-a680-ce6eecc894de already exists. The issue typically is in the Ceph cluster or network connectivity. If the issue is in Provisioning the PVC Restarting the Provisioner pods help(for CephFS issue restart csi-cephfsplugin-provisioner-xxxxxx CephFS Provisioner. For RBD, restart the csi-rbdplugin-provisioner-xxxxxx pod. If the issue is in mounting the PVC, restart the csi-rbdplugin-xxxxx pod (for RBD) and the csi-cephfsplugin-xxxxx pod for CephFS issue.","title":"csi-snapshotter"},{"location":"Troubleshooting/ceph-csi-common-issues/#mounting-the-volume-to-application-pods","text":"When a user requests to create the application pod with PVC, there is a three-step process CSI driver registration Create volume attachment object Stage and publish the volume","title":"Mounting the volume to application pods"},{"location":"Troubleshooting/ceph-csi-common-issues/#csi-driver-registration","text":"csi-cephfsplugin-xxxx or csi-rbdplugin-xxxx is a daemonset pod running on all the nodes where your application gets scheduled. If the plugin pods are not running on the node where your application is scheduled might cause the issue, make sure plugin pods are always running. Each plugin pod has two important containers: one is driver-registrar and csi-rbdplugin or csi-cephfsplugin . Sometimes there is also a liveness-prometheus container.","title":"csi-driver registration"},{"location":"Troubleshooting/ceph-csi-common-issues/#driver-registrar","text":"The node-driver-registrar is a sidecar container that registers the CSI driver with Kubelet. More details can be found here . If any issue exists in attaching the PVC to the application pod check logs from driver-registrar sidecar container in plugin pod where your application pod is scheduled. 1 2 3 4 5 6 7 8 9 10 11 12 $ kubectl -n rook-ceph logs deploy/csi-rbdplugin -c driver-registrar [...] I0120 12:28:34.231761 124018 main.go:112] Version: v2.0.1 I0120 12:28:34.233910 124018 connection.go:151] Connecting to unix:///csi/csi.sock I0120 12:28:35.242469 124018 node_register.go:55] Starting Registration Server at: /registration/rook-ceph.rbd.csi.ceph.com-reg.sock I0120 12:28:35.243364 124018 node_register.go:64] Registration Server started at: /registration/rook-ceph.rbd.csi.ceph.com-reg.sock I0120 12:28:35.243673 124018 node_register.go:86] Skipping healthz server because port set to: 0 I0120 12:28:36.318482 124018 main.go:79] Received GetInfo call: &InfoRequest{} I0120 12:28:37.455211 124018 main.go:89] Received NotifyRegistrationStatus call: &RegistrationStatus{PluginRegistered:true,Error:,} E0121 05:19:28.658390 124018 connection.go:129] Lost connection to unix:///csi/csi.sock. E0125 07:11:42.926133 124018 connection.go:129] Lost connection to unix:///csi/csi.sock. [...] You should see the response RegistrationStatus{PluginRegistered:true,Error:,} in the logs to confirm that plugin is registered with kubelet. If you see a driver not found an error in the application pod describe output. Restarting the csi-xxxxplugin-xxx pod on the node may help.","title":"driver-registrar"},{"location":"Troubleshooting/ceph-csi-common-issues/#volume-attachment","text":"Each provisioner pod also has a sidecar container called csi-attacher .","title":"Volume Attachment"},{"location":"Troubleshooting/ceph-csi-common-issues/#csi-attacher","text":"The external-attacher is a sidecar container that attaches volumes to nodes by calling ControllerPublish and ControllerUnpublish functions of CSI drivers. It is necessary because the internal Attach/Detach controller running in Kubernetes controller-manager does not have any direct interfaces to CSI drivers. More details can be found here . If any issue exists in attaching the PVC to the application pod first check the volumeattachment object created and also log from csi-attacher sidecar container in provisioner pod. 1 2 3 $ kubectl get volumeattachment NAME ATTACHER PV NODE ATTACHED AGE csi-75903d8a902744853900d188f12137ea1cafb6c6f922ebc1c116fd58e950fc92 rook-ceph.cephfs.csi.ceph.com pvc-5c547d2a-fdb8-4cb2-b7fe-e0f30b88d454 minikube true 4m26s 1 kubectl logs po/csi-rbdplugin-provisioner-d857bfb5f-ddctl -c csi-attacher","title":"csi-attacher"},{"location":"Troubleshooting/ceph-csi-common-issues/#cephfs-stale-operations","text":"Check for any stale mount commands on the csi-cephfsplugin-xxxx pod on the node where your application pod is scheduled. You need to exec in the csi-cephfsplugin-xxxx pod and grep for stale mount operators. Identify the csi-cephfsplugin-xxxx pod running on the node where your application is scheduled with kubectl get po -o wide and match the node names. 1 2 3 4 $ kubectl exec -it csi-cephfsplugin-tfk2g -c csi-cephfsplugin -- sh $ ps -ef | grep mount [...] root 67 60 0 11:55 pts/0 00:00:00 grep mount 1 2 3 4 ps -ef |grep ceph [...] root 1 0 0 Jan20 ? 00:00:26 /usr/local/bin/cephcsi --nodeid=minikube --type=cephfs --endpoint=unix:///csi/csi.sock --v=0 --nodeserver=true --drivername=rook-ceph.cephfs.csi.ceph.com --pidlimit=-1 --metricsport=9091 --forcecephkernelclient=true --metricspath=/metrics --enablegrpcmetrics=true root 69 60 0 11:55 pts/0 00:00:00 grep ceph If any commands are stuck check the dmesg logs from the node. Restarting the csi-cephfsplugin pod may also help sometimes. If you don\u2019t see any stuck messages, confirm the network connectivity, Ceph health, and slow ops.","title":"CephFS Stale operations"},{"location":"Troubleshooting/ceph-csi-common-issues/#rbd-stale-operations","text":"Check for any stale map/mkfs/mount commands on the csi-rbdplugin-xxxx pod on the node where your application pod is scheduled. You need to exec in the csi-rbdplugin-xxxx pod and grep for stale operators like ( rbd map, rbd unmap, mkfs, mount and umount ). Identify the csi-rbdplugin-xxxx pod running on the node where your application is scheduled with kubectl get po -o wide and match the node names. 1 2 3 4 $ kubectl exec -it csi-rbdplugin-vh8d5 -c csi-rbdplugin -- sh $ ps -ef | grep map [...] root 1297024 1296907 0 12:00 pts/0 00:00:00 grep map 1 2 3 4 5 $ ps -ef | grep mount [...] root 1824 1 0 Jan19 ? 00:00:00 /usr/sbin/rpc.mountd ceph 1041020 1040955 1 07:11 ? 00:03:43 ceph-mgr --fsid=ba41ac93-3b55-4f32-9e06-d3d8c6ff7334 --keyring=/etc/ceph/keyring-store/keyring --log-to-stderr=true --err-to-stderr=true --mon-cluster-log-to-stderr=true --log-stderr-prefix=debug --default-log-to-file=false --default-mon-cluster-log-to-file=false --mon-host=[v2:10.111.136.166:3300,v1:10.111.136.166:6789] --mon-initial-members=a --id=a --setuser=ceph --setgroup=ceph --client-mount-uid=0 --client-mount-gid=0 --foreground --public-addr=172.17.0.6 root 1297115 1296907 0 12:00 pts/0 00:00:00 grep mount 1 2 3 $ ps -ef | grep mkfs [...] root 1297291 1296907 0 12:00 pts/0 00:00:00 grep mkfs 1 2 3 $ ps -ef | grep umount [...] root 1298500 1296907 0 12:01 pts/0 00:00:00 grep umount 1 2 3 $ ps -ef | grep unmap [...] root 1298578 1296907 0 12:01 pts/0 00:00:00 grep unmap If any commands are stuck check the dmesg logs from the node. Restarting the csi-rbdplugin pod also may help sometimes. If you don\u2019t see any stuck messages, confirm the network connectivity, Ceph health, and slow ops.","title":"RBD Stale operations"},{"location":"Troubleshooting/ceph-csi-common-issues/#dmesg-logs","text":"Check the dmesg logs on the node where pvc mounting is failing or the csi-rbdplugin container of the csi-rbdplugin-xxxx pod on that node. 1 dmesg","title":"dmesg logs"},{"location":"Troubleshooting/ceph-csi-common-issues/#rbd-commands","text":"If nothing else helps, get the last executed command from the ceph-csi pod logs and run it manually inside the provisioner or plugin pod to see if there are errors returned even if they couldn't be seen in the logs. 1 rbd ls --id=csi-rbd-node -m=10.111.136.166:6789 --key=AQDpIQhg+v83EhAAgLboWIbl+FL/nThJzoI3Fg== Where -m is one of the mon endpoints and the --key is the key used by the CSI driver for accessing the Ceph cluster.","title":"RBD Commands"},{"location":"Troubleshooting/ceph-csi-common-issues/#node-loss","text":"When a node is lost, you will see application pods on the node stuck in the Terminating state while another pod is rescheduled and is in the ContainerCreating state. Important For clusters with Kubernetes version 1.26 or greater, see the improved automation to recover from the node loss. If using K8s 1.25 or older, continue with these instructions.","title":"Node Loss"},{"location":"Troubleshooting/ceph-csi-common-issues/#force-deleting-the-pod","text":"To force delete the pod stuck in the Terminating state: 1 kubectl -n rook-ceph delete pod my-app-69cd495f9b-nl6hf --grace-period 0 --force After the force delete, wait for a timeout of about 8-10 minutes. If the pod still not in the running state, continue with the next section to blocklist the node.","title":"Force deleting the pod"},{"location":"Troubleshooting/ceph-csi-common-issues/#blocklisting-a-node","text":"To shorten the timeout, you can mark the node as \"blocklisted\" from the Rook toolbox so Rook can safely failover the pod sooner. 1 2 $ ceph osd blocklist add  # get the node IP you want to blocklist blocklisting  After running the above command within a few minutes the pod will be running.","title":"Blocklisting a node"},{"location":"Troubleshooting/ceph-csi-common-issues/#removing-a-node-blocklist","text":"After you are absolutely sure the node is permanently offline and that the node no longer needs to be blocklisted, remove the node from the blocklist. 1 2 $ ceph osd blocklist rm  un-blocklisting ","title":"Removing a node blocklist"},{"location":"Troubleshooting/ceph-toolbox/","text":"The Rook toolbox is a container with common tools used for rook debugging and testing. The toolbox is based on CentOS, so more tools of your choosing can be easily installed with yum . The toolbox can be run in two modes: Interactive : Start a toolbox pod where you can connect and execute Ceph commands from a shell One-time job : Run a script with Ceph commands and collect the results from the job log Hint Before running the toolbox you should have a running Rook cluster deployed (see the Quickstart Guide ). Note The toolbox is not necessary if you are using Krew plugin to execute Ceph commands. Interactive Toolbox \u00b6 The rook toolbox can run as a deployment in a Kubernetes cluster where you can connect and run arbitrary Ceph commands. Launch the rook-ceph-tools pod: 1 kubectl create -f deploy/examples/toolbox.yaml Wait for the toolbox pod to download its container and get to the running state: 1 kubectl -n rook-ceph rollout status deploy/rook-ceph-tools Once the rook-ceph-tools pod is running, you can connect to it with: 1 kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash All available tools in the toolbox are ready for your troubleshooting needs. Example : ceph status ceph osd status ceph df rados df When you are done with the toolbox, you can remove the deployment: 1 kubectl -n rook-ceph delete deploy/rook-ceph-tools Toolbox Job \u00b6 If you want to run Ceph commands as a one-time operation and collect the results later from the logs, you can run a script as a Kubernetes Job. The toolbox job will run a script that is embedded in the job spec. The script has the full flexibility of a bash script. In this example, the ceph status command is executed when the job is created. Create the toolbox job: 1 kubectl create -f deploy/examples/toolbox-job.yaml After the job completes, see the results of the script: 1 kubectl -n rook-ceph logs -l job-name=rook-ceph-toolbox-job","title":"Toolbox"},{"location":"Troubleshooting/ceph-toolbox/#interactive-toolbox","text":"The rook toolbox can run as a deployment in a Kubernetes cluster where you can connect and run arbitrary Ceph commands. Launch the rook-ceph-tools pod: 1 kubectl create -f deploy/examples/toolbox.yaml Wait for the toolbox pod to download its container and get to the running state: 1 kubectl -n rook-ceph rollout status deploy/rook-ceph-tools Once the rook-ceph-tools pod is running, you can connect to it with: 1 kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash All available tools in the toolbox are ready for your troubleshooting needs. Example : ceph status ceph osd status ceph df rados df When you are done with the toolbox, you can remove the deployment: 1 kubectl -n rook-ceph delete deploy/rook-ceph-tools","title":"Interactive Toolbox"},{"location":"Troubleshooting/ceph-toolbox/#toolbox-job","text":"If you want to run Ceph commands as a one-time operation and collect the results later from the logs, you can run a script as a Kubernetes Job. The toolbox job will run a script that is embedded in the job spec. The script has the full flexibility of a bash script. In this example, the ceph status command is executed when the job is created. Create the toolbox job: 1 kubectl create -f deploy/examples/toolbox-job.yaml After the job completes, see the results of the script: 1 kubectl -n rook-ceph logs -l job-name=rook-ceph-toolbox-job","title":"Toolbox Job"},{"location":"Troubleshooting/common-issues/","text":"To help troubleshoot your Rook clusters, here are some tips on what information will help solve the issues you might be seeing. If after trying the suggestions found on this page and the problem is not resolved, the Rook team is very happy to help you troubleshoot the issues in their Slack channel. Once you have registered for the Rook Slack , proceed to the General channel to ask for assistance. Ceph Common Issues \u00b6 For common issues specific to Ceph, see the Ceph Common Issues page. Troubleshooting Techniques \u00b6 Kubernetes status and logs are the main resources needed to investigate issues in any Rook cluster. Kubernetes Tools \u00b6 Kubernetes status is the first line of investigating when something goes wrong with the cluster. Here are a few artifacts that are helpful to gather: Rook pod status: kubectl get pod -n  -o wide e.g., kubectl get pod -n rook-ceph -o wide Logs for Rook pods Logs for the operator: kubectl logs -n  -l app= e.g., kubectl logs -n rook-ceph -l app=rook-ceph-operator Logs for a specific pod: kubectl logs -n   , or a pod using a label such as mon1: kubectl logs -n  -l  e.g., kubectl logs -n rook-ceph -l mon=a Logs on a specific node to find why a PVC is failing to mount: Connect to the node, then get kubelet logs (if your distro is using systemd): journalctl -u kubelet Pods with multiple containers For all containers, in order: kubectl -n  logs  --all-containers For a single container: kubectl -n  logs  -c  Logs for pods which are no longer running: kubectl -n  logs --previous  Some pods have specialized init containers, so you may need to look at logs for different containers within the pod. kubectl -n  logs  -c  Other Rook artifacts: kubectl -n  get all","title":"Common Issues"},{"location":"Troubleshooting/common-issues/#ceph-common-issues","text":"For common issues specific to Ceph, see the Ceph Common Issues page.","title":"Ceph Common Issues"},{"location":"Troubleshooting/common-issues/#troubleshooting-techniques","text":"Kubernetes status and logs are the main resources needed to investigate issues in any Rook cluster.","title":"Troubleshooting Techniques"},{"location":"Troubleshooting/common-issues/#kubernetes-tools","text":"Kubernetes status is the first line of investigating when something goes wrong with the cluster. Here are a few artifacts that are helpful to gather: Rook pod status: kubectl get pod -n  -o wide e.g., kubectl get pod -n rook-ceph -o wide Logs for Rook pods Logs for the operator: kubectl logs -n  -l app= e.g., kubectl logs -n rook-ceph -l app=rook-ceph-operator Logs for a specific pod: kubectl logs -n   , or a pod using a label such as mon1: kubectl logs -n  -l  e.g., kubectl logs -n rook-ceph -l mon=a Logs on a specific node to find why a PVC is failing to mount: Connect to the node, then get kubelet logs (if your distro is using systemd): journalctl -u kubelet Pods with multiple containers For all containers, in order: kubectl -n  logs  --all-containers For a single container: kubectl -n  logs  -c  Logs for pods which are no longer running: kubectl -n  logs --previous  Some pods have specialized init containers, so you may need to look at logs for different containers within the pod. kubectl -n  logs  -c  Other Rook artifacts: kubectl -n  get all","title":"Kubernetes Tools"},{"location":"Troubleshooting/direct-tools/","text":"Rook is designed with Kubernetes design principles from the ground up. This topic is going to escape the bounds of Kubernetes storage and show you how to use block and file storage directly from a pod without any of the Kubernetes magic. The purpose of this topic is to help you quickly test a new configuration, although it is not meant to be used in production. All of the benefits of Kubernetes storage including failover, detach, and attach will not be available. If your pod dies, your mount will die with it. Start the Direct Mount Pod \u00b6 To test mounting your Ceph volumes, start a pod with the necessary mounts. An example is provided in the examples test directory: 1 kubectl create -f deploy/examples/direct-mount.yaml After the pod is started, connect to it like this: 1 2 kubectl -n rook-ceph get pod -l app=rook-direct-mount $ kubectl -n rook-ceph exec -it  bash Block Storage Tools \u00b6 After you have created a pool as described in the Block Storage topic, you can create a block image and mount it directly in a pod. This example will show how the Ceph rbd volume can be mounted in the direct mount pod. Create the Direct Mount Pod . Create a volume image (10MB): 1 2 3 4 5 rbd create replicapool/test --size 10 rbd info replicapool/test # Disable the rbd features that are not in the kernel module rbd feature disable replicapool/test fast-diff deep-flatten object-map Map the block volume and format it and mount it: 1 2 3 4 5 6 7 8 9 10 11 12 13 # Map the rbd device. If the Direct Mount Pod was started with \"hostNetwork: false\" this hangs and you have to stop it with Ctrl-C, # however the command still succeeds ; see https://github.com/rook/rook/issues/2021 rbd map replicapool/test # Find the device name, such as rbd0 lsblk | grep rbd # Format the volume ( only do this the first time or you will lose data ) mkfs.ext4 -m0 /dev/rbd0 # Mount the block device mkdir /tmp/rook-volume mount /dev/rbd0 /tmp/rook-volume Write and read a file: 1 2 echo \"Hello Rook\" > /tmp/rook-volume/hello cat /tmp/rook-volume/hello Unmount the Block device \u00b6 Unmount the volume and unmap the kernel device: 1 2 umount /tmp/rook-volume rbd unmap /dev/rbd0 Shared Filesystem Tools \u00b6 After you have created a filesystem as described in the Shared Filesystem topic, you can mount the filesystem from multiple pods. The the other topic you may have mounted the filesystem already in the registry pod. Now we will mount the same filesystem in the Direct Mount pod. This is just a simple way to validate the Ceph filesystem and is not recommended for production Kubernetes pods. Follow Direct Mount Pod to start a pod with the necessary mounts and then proceed with the following commands after connecting to the pod. 1 2 3 4 5 6 7 8 9 10 11 12 # Create the directory mkdir /tmp/registry # Detect the mon endpoints and the user secret for the connection mon_endpoints=$(grep mon_host /etc/ceph/ceph.conf | awk '{print $3}') my_secret=$(grep key /etc/ceph/keyring | awk '{print $3}') # Mount the filesystem mount -t ceph -o mds_namespace=myfs,name=admin,secret=$my_secret $mon_endpoints:/ /tmp/registry # See your mounted filesystem df -h Now you should have a mounted filesystem. If you have pushed images to the registry you will see a directory called docker . 1 ls /tmp/registry Try writing and reading a file to the shared filesystem. 1 2 3 4 5 echo \"Hello Rook\" > /tmp/registry/hello cat /tmp/registry/hello # delete the file when you ' re done rm -f /tmp/registry/hello Unmount the Filesystem \u00b6 To unmount the shared filesystem from the Direct Mount Pod: 1 2 umount /tmp/registry rmdir /tmp/registry No data will be deleted by unmounting the filesystem.","title":"Direct Tools"},{"location":"Troubleshooting/direct-tools/#start-the-direct-mount-pod","text":"To test mounting your Ceph volumes, start a pod with the necessary mounts. An example is provided in the examples test directory: 1 kubectl create -f deploy/examples/direct-mount.yaml After the pod is started, connect to it like this: 1 2 kubectl -n rook-ceph get pod -l app=rook-direct-mount $ kubectl -n rook-ceph exec -it  bash","title":"Start the Direct Mount Pod"},{"location":"Troubleshooting/direct-tools/#block-storage-tools","text":"After you have created a pool as described in the Block Storage topic, you can create a block image and mount it directly in a pod. This example will show how the Ceph rbd volume can be mounted in the direct mount pod. Create the Direct Mount Pod . Create a volume image (10MB): 1 2 3 4 5 rbd create replicapool/test --size 10 rbd info replicapool/test # Disable the rbd features that are not in the kernel module rbd feature disable replicapool/test fast-diff deep-flatten object-map Map the block volume and format it and mount it: 1 2 3 4 5 6 7 8 9 10 11 12 13 # Map the rbd device. If the Direct Mount Pod was started with \"hostNetwork: false\" this hangs and you have to stop it with Ctrl-C, # however the command still succeeds ; see https://github.com/rook/rook/issues/2021 rbd map replicapool/test # Find the device name, such as rbd0 lsblk | grep rbd # Format the volume ( only do this the first time or you will lose data ) mkfs.ext4 -m0 /dev/rbd0 # Mount the block device mkdir /tmp/rook-volume mount /dev/rbd0 /tmp/rook-volume Write and read a file: 1 2 echo \"Hello Rook\" > /tmp/rook-volume/hello cat /tmp/rook-volume/hello","title":"Block Storage Tools"},{"location":"Troubleshooting/direct-tools/#unmount-the-block-device","text":"Unmount the volume and unmap the kernel device: 1 2 umount /tmp/rook-volume rbd unmap /dev/rbd0","title":"Unmount the Block device"},{"location":"Troubleshooting/direct-tools/#shared-filesystem-tools","text":"After you have created a filesystem as described in the Shared Filesystem topic, you can mount the filesystem from multiple pods. The the other topic you may have mounted the filesystem already in the registry pod. Now we will mount the same filesystem in the Direct Mount pod. This is just a simple way to validate the Ceph filesystem and is not recommended for production Kubernetes pods. Follow Direct Mount Pod to start a pod with the necessary mounts and then proceed with the following commands after connecting to the pod. 1 2 3 4 5 6 7 8 9 10 11 12 # Create the directory mkdir /tmp/registry # Detect the mon endpoints and the user secret for the connection mon_endpoints=$(grep mon_host /etc/ceph/ceph.conf | awk '{print $3}') my_secret=$(grep key /etc/ceph/keyring | awk '{print $3}') # Mount the filesystem mount -t ceph -o mds_namespace=myfs,name=admin,secret=$my_secret $mon_endpoints:/ /tmp/registry # See your mounted filesystem df -h Now you should have a mounted filesystem. If you have pushed images to the registry you will see a directory called docker . 1 ls /tmp/registry Try writing and reading a file to the shared filesystem. 1 2 3 4 5 echo \"Hello Rook\" > /tmp/registry/hello cat /tmp/registry/hello # delete the file when you ' re done rm -f /tmp/registry/hello","title":"Shared Filesystem Tools"},{"location":"Troubleshooting/direct-tools/#unmount-the-filesystem","text":"To unmount the shared filesystem from the Direct Mount Pod: 1 2 umount /tmp/registry rmdir /tmp/registry No data will be deleted by unmounting the filesystem.","title":"Unmount the Filesystem"},{"location":"Troubleshooting/disaster-recovery/","text":"Under extenuating circumstances, steps may be necessary to recover the cluster health. There are several types of recovery addressed in this document. Restoring Mon Quorum \u00b6 Under extenuating circumstances, the mons may lose quorum. If the mons cannot form quorum again, there is a manual procedure to get the quorum going again. The only requirement is that at least one mon is still healthy. The following steps will remove the unhealthy mons from quorum and allow you to form a quorum again with a single mon, then grow the quorum back to the original size. The Rook Krew Plugin has a command restore-quorum that will walk you through the mon quorum automated restoration process. If the name of the healthy mon is c , you would run the command: 1 kubectl rook-ceph mons restore-quorum c See the restore-quorum documentation for more details. Restoring CRDs After Deletion \u00b6 When the Rook CRDs are deleted, the Rook operator will respond to the deletion event to attempt to clean up the cluster resources. If any data appears present in the cluster, Rook will refuse to allow the resources to be deleted since the operator will refuse to remove the finalizer on the CRs until the underlying data is deleted. For more details, see the dependency design doc . While it is good that the CRs will not be deleted and the underlying Ceph data and daemons continue to be available, the CRs will be stuck indefinitely in a Deleting state in which the operator will not continue to ensure cluster health. Upgrades will be blocked, further updates to the CRs are prevented, and so on. Since Kubernetes does not allow undeleting resources, the following procedure will allow you to restore the CRs to their prior state without even necessarily suffering cluster downtime. Note In the following commands, the affected CephCluster resource is called rook-ceph . If yours is named differently, the commands will need to be adjusted. Scale down the operator. 1 kubectl -n rook-ceph scale --replicas=0 deploy/rook-ceph-operator Backup all Rook CRs and critical metadata 1 2 3 4 5 6 # Store the ` CephCluster ` CR settings. Also, save other Rook CRs that are in terminating state. kubectl -n rook-ceph get cephcluster rook-ceph -o yaml > cluster.yaml # Backup critical secrets and configmaps in case something goes wrong later in the procedure kubectl -n rook-ceph get secret -o yaml > secrets.yaml kubectl -n rook-ceph get configmap -o yaml > configmaps.yaml (Optional, if webhook is enabled) Delete the ValidatingWebhookConfiguration . This is the resource which connects Rook custom resources to the operator pod's validating webhook. Because the operator is unavailable, we must temporarily disable the valdiating webhook in order to make changes. 1 2 3 ```console kubectl delete ValidatingWebhookConfiguration rook-ceph-webhook ``` Remove the owner references from all critical Rook resources that were referencing the CephCluster CR. Programmatically determine all such resources, using this command: 1 2 3 4 5 6 # Determine the ` CephCluster ` UID ROOK_UID=$(kubectl -n rook-ceph get cephcluster rook-ceph -o 'jsonpath={.metadata.uid}') # List all secrets, configmaps, services, deployments, and PVCs with that ownership UID. RESOURCES=$(kubectl -n rook-ceph get secret,configmap,service,deployment,pvc -o jsonpath='{range .items[?(@.metadata.ownerReferences[*].uid==\"'\"$ROOK_UID\"'\")]}{.kind}{\"/\"}{.metadata.name}{\"\\n\"}{end}') # Show the collected resources. kubectl -n rook-ceph get $RESOURCES Verify that all critical resources are shown in the output. The critical resources are these: Secrets: rook-ceph-admin-keyring , rook-ceph-config , rook-ceph-mon , rook-ceph-mons-keyring ConfigMap: rook-ceph-mon-endpoints Services: rook-ceph-mon-* , rook-ceph-mgr-* Deployments: rook-ceph-mon-* , rook-ceph-osd-* , rook-ceph-mgr-* PVCs (if applicable): rook-ceph-mon-* and the OSD PVCs (named -* , for example set1-data-* ) For each listed resource, remove the ownerReferences metadata field, in order to unlink it from the deleting CephCluster CR. To do so programmatically, use the command: 1 2 3 for resource in $(kubectl -n rook-ceph get $RESOURCES -o name); do kubectl -n rook-ceph patch $resource -p '{\"metadata\": {\"ownerReferences\":null}}' done For a manual alternative, issue kubectl edit on each resource, and remove the block matching: 1 2 3 4 5 6 7 ownerReferences : - apiVersion : ceph.rook.io/v1 blockOwnerDeletion : true controller : true kind : ` CephCluster` name : rook-ceph uid :  Before completing this step, validate these things. Failing to do so could result in data loss. Confirm that cluster.yaml contains the CephCluster CR. Confirm all critical resources listed above have had the ownerReference to the CephCluster CR removed. Remove the finalizer from the CephCluster resource. This will cause the resource to be immediately deleted by Kubernetes. 1 kubectl -n rook-ceph patch cephcluster/rook-ceph --type json --patch='[ { \"op\": \"remove\", \"path\": \"/metadata/finalizers\" } ]' After the finalizer is removed, the CephCluster will be immediately deleted. If all owner references were properly removed, all ceph daemons will continue running and there will be no downtime. Create the CephCluster CR with the same settings as previously 1 2 # Use the same cluster settings as exported in step 2 . kubectl create -f cluster.yaml If there are other CRs in terminating state such as CephBlockPools, CephObjectStores, or CephFilesystems, follow the above steps as well for those CRs: Backup the CR Remove the finalizer and confirm the CR is deleted (the underlying Ceph resources will be preserved) Create the CR again Scale up the operator 1 kubectl -n rook-ceph scale --replicas=1 deploy/rook-ceph-operator Watch the operator log to confirm that the reconcile completes successfully. 1 kubectl -n rook-ceph logs -f deployment/rook-ceph-operator Adopt an existing Rook Ceph cluster into a new Kubernetes cluster \u00b6 Situations this section can help resolve: The Kubernetes environment underlying a running Rook Ceph cluster failed catastrophically, requiring a new Kubernetes environment in which the user wishes to recover the previous Rook Ceph cluster. The user wishes to migrate their existing Rook Ceph cluster to a new Kubernetes environment, and downtime can be tolerated. Prerequisites \u00b6 A working Kubernetes cluster to which we will migrate the previous Rook Ceph cluster. At least one Ceph mon db is in quorum, and sufficient number of Ceph OSD is up and in before disaster. The previous Rook Ceph cluster is not running. Overview for Steps below \u00b6 Start a new and clean Rook Ceph cluster, with old CephCluster CephBlockPool CephFilesystem CephNFS CephObjectStore . Shut the new cluster down when it has been created successfully. Replace ceph-mon data with that of the old cluster. Replace fsid in secrets/rook-ceph-mon with that of the old one. Fix monmap in ceph-mon db. Fix ceph mon auth key. Disable auth. Start the new cluster, watch it resurrect. Fix admin auth key, and enable auth. Restart cluster for the final time. Steps \u00b6 Assuming dataHostPathData is /var/lib/rook , and the CephCluster trying to adopt is named rook-ceph . Make sure the old Kubernetes cluster is completely torn down and the new Kubernetes cluster is up and running without Rook Ceph. Backup /var/lib/rook in all the Rook Ceph nodes to a different directory. Backups will be used later. Pick a /var/lib/rook/rook-ceph/rook-ceph.config from any previous Rook Ceph node and save the old cluster fsid from its content. Remove /var/lib/rook from all the Rook Ceph nodes. Add identical CephCluster descriptor to the new Kubernetes cluster, especially identical spec.storage.config and spec.storage.nodes , except mon.count , which should be set to 1 . Add identical CephFilesystem CephBlockPool CephNFS CephObjectStore descriptors (if any) to the new Kubernetes cluster. Install Rook Ceph in the new Kubernetes cluster. Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the cluster will have rook-ceph-mon-a , rook-ceph-mgr-a , and all the auxiliary pods up and running, and zero (hopefully) rook-ceph-osd-ID-xxxxxx running. ceph -s output should report 1 mon, 1 mgr running, and all of the OSDs down, all PGs are in unknown state. Rook should not start any OSD daemon since all devices belongs to the old cluster (which have a different fsid ). Run kubectl -n rook-ceph exec -it rook-ceph-mon-a-xxxxxxxx bash to enter the rook-ceph-mon-a pod, 1 2 mon-a# cat /etc/ceph/keyring-store/keyring # save this keyring content for later use mon-a# exit Stop the Rook operator by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 0 . Stop cluster daemons by running kubectl -n rook-ceph delete deploy/X where X is every deployment in namespace rook-ceph , except rook-ceph-operator and rook-ceph-tools . Save the rook-ceph-mon-a address with kubectl -n rook-ceph get cm/rook-ceph-mon-endpoints -o yaml in the new Kubernetes cluster for later use. SSH to the host where rook-ceph-mon-a in the new Kubernetes cluster resides. Remove /var/lib/rook/mon-a Pick a healthy rook-ceph-mon-ID directory ( /var/lib/rook/mon-ID ) in the previous backup, copy to /var/lib/rook/mon-a . ID is any healthy mon node ID of the old cluster. Replace /var/lib/rook/mon-a/keyring with the saved keyring, preserving only the [mon.] section, remove [client.admin] section. Run docker run -it --rm -v /var/lib/rook:/var/lib/rook ceph/ceph:v14.2.1-20190430 bash . The Docker image tag should match the Ceph version used in the Rook cluster. The /etc/ceph/ceph.conf file needs to exist for ceph-mon to work. 1 2 3 4 5 6 7 8 9 10 11 12 13 touch /etc/ceph/ceph.conf cd /var/lib/rook ceph-mon --extract-monmap monmap --mon-data ./mon-a/data # Extract monmap from old ceph-mon db and save as monmap monmaptool --print monmap # Print the monmap content, which reflects the old cluster ceph-mon configuration. monmaptool --rm a monmap # Delete `a` from monmap. monmaptool --rm b monmap # Repeat, and delete `b` from monmap. monmaptool --rm c monmap # Repeat this pattern until all the old ceph-mons are removed monmaptool --rm d monmap monmaptool --rm e monmap monmaptool --addv a [v2:10.77.2.216:3300,v1:10.77.2.216:6789] monmap # Replace it with the rook-ceph-mon-a address you got from previous command. ceph-mon --inject-monmap monmap --mon-data ./mon-a/data # Replace monmap in ceph-mon db with our modified version. rm monmap exit Tell Rook to run as old cluster by running kubectl -n rook-ceph edit secret/rook-ceph-mon and changing fsid to the original fsid . Note that the fsid is base64 encoded and must not contain a trailing carriage return. For example: 1 echo -n a811f99a-d865-46b7-8f2c-f94c064e4356 | base64 # Replace with the fsid from your old cluster. Disable authentication by running kubectl -n rook-ceph edit cm/rook-config-override and adding content below: 1 2 3 4 5 6 7 data : config : | [global] auth cluster required = none auth service required = none auth client required = none auth supported = none Bring the Rook Ceph operator back online by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 1 . Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the new cluster should be up and running with authentication disabled. ceph -s should report 1 mon & 1 mgr & all of the OSDs up and running, and all PGs in either active or degraded state. Run kubectl -n rook-ceph exec -it rook-ceph-tools-XXXXXXX bash to enter tools pod: 1 2 3 4 vi key # [ paste keyring content saved before, preserving only ` [ client admin ] ` section ] ceph auth import -i key rm key Re-enable authentication by running kubectl -n rook-ceph edit cm/rook-config-override and removing auth configuration added in previous steps. Stop the Rook operator by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 0 . Shut down entire new cluster by running kubectl -n rook-ceph delete deploy/X where X is every deployment in namespace rook-ceph , except rook-ceph-operator and rook-ceph-tools , again. This time OSD daemons are present and should be removed too. Bring the Rook Ceph operator back online by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 1 . Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the new cluster should be up and running with authentication enabled. ceph -s output should not change much comparing to previous steps. Backing up and restoring a cluster based on PVCs into a new Kubernetes cluster \u00b6 It is possible to migrate/restore an rook/ceph cluster from an existing Kubernetes cluster to a new one without resorting to SSH access or ceph tooling. This allows doing the migration using standard kubernetes resources only. This guide assumes the following: You have a CephCluster that uses PVCs to persist mon and osd data. Let's call it the \"old cluster\" You can restore the PVCs as-is in the new cluster. Usually this is done by taking regular snapshots of the PVC volumes and using a tool that can re-create PVCs from these snapshots in the underlying cloud provider. Velero is one such tool. You have regular backups of the secrets and configmaps in the rook-ceph namespace. Velero provides this functionality too. Do the following in the new cluster: Stop the rook operator by scaling the deployment rook-ceph-operator down to zero: kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas 0 and deleting the other deployments. An example command to do this is k -n rook-ceph delete deployment -l operator!=rook Restore the rook PVCs to the new cluster. Copy the keyring and fsid secrets from the old cluster: rook-ceph-mgr-a-keyring , rook-ceph-mon , rook-ceph-mons-keyring , rook-ceph-osd-0-keyring , ... Delete mon services and copy them from the old cluster: rook-ceph-mon-a , rook-ceph-mon-b , ... Note that simply re-applying won't work because the goal here is to restore the clusterIP in each service and this field is immutable in Service resources. Copy the endpoints configmap from the old cluster: rook-ceph-mon-endpoints Scale the rook operator up again : kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas 1 Wait until the reconciliation is over. Restoring the Rook cluster after the Rook namespace is deleted \u00b6 When the rook-ceph namespace is accidentally deleted, the good news is that the cluster can be restored. With the content in the directory dataDirHostPath and the original OSD disks, the ceph cluster could be restored with this guide. You need to manually create a ConfigMap and a Secret to make it work. The information required for the ConfigMap and Secret can be found in the dataDirHostPath directory. The first resource is the secret named rook-ceph-mon as seen in this example below: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 apiVersion : v1 data : ceph-secret : QVFCZ0h6VmorcVNhSGhBQXVtVktNcjcrczNOWW9Oa2psYkErS0E9PQ== ceph-username : Y2xpZW50LmFkbWlu fsid : M2YyNzE4NDEtNjE4OC00N2MxLWIzZmQtOTBmZDRmOTc4Yzc2 mon-secret : QVFCZ0h6VmorcVNhSGhBQXVtVktNcjcrczNOWW9Oa2psYkErS0E9PQ== kind : Secret metadata : finalizers : - ceph.rook.io/disaster-protection name : rook-ceph-mon namespace : rook-ceph ownerReferences : null type : kubernetes.io/rook The values for the secret can be found in $dataDirHostPath/rook-ceph/client.admin.keyring and $dataDirHostPath/rook-ceph/rook-ceph.config . - ceph-secret and mon-secret are to be filled with the client.admin 's keyring contents. - ceph-username : set to the string client.admin - fsid : set to the original ceph cluster id. All the fields in data section need to be encoded in base64. Coding could be done like this: 1 echo -n \"string to code\" | base64 -i - Now save the secret as rook-ceph-mon.yaml , to be created later in the restore. The second resource is the configmap named rook-ceph-mon-endpoints as seen in this example below: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : v1 data : csi-cluster-config-json : '[{\"clusterID\":\"rook-ceph\",\"monitors\":[\"169.169.241.153:6789\",\"169.169.82.57:6789\",\"169.169.7.81:6789\"],\"namespace\":\"\"}]' data : k=169.169.241.153:6789,m=169.169.82.57:6789,o=169.169.7.81:6789 mapping : '{\"node\":{\"k\":{\"Name\":\"10.138.55.111\",\"Hostname\":\"10.138.55.111\",\"Address\":\"10.138.55.111\"},\"m\":{\"Name\":\"10.138.55.120\",\"Hostname\":\"10.138.55.120\",\"Address\":\"10.138.55.120\"},\"o\":{\"Name\":\"10.138.55.112\",\"Hostname\":\"10.138.55.112\",\"Address\":\"10.138.55.112\"}}}' maxMonId : \"15\" kind : ConfigMap metadata : finalizers : - ceph.rook.io/disaster-protection name : rook-ceph-mon-endpoints namespace : rook-ceph ownerReferences : null The Monitor's service IPs are kept in the monitor data store and you need to create them by original ones. After you create this configmap with the original service IPs, the rook operator will create the correct services for you with IPs matching in the monitor data store. Along with monitor ids, their service IPs and mapping relationship of them can be found in dataDirHostPath/rook-ceph/rook-ceph.config, for example: 1 2 3 4 [global] fsid = 3f271841-6188-47c1-b3fd-90fd4f978c76 mon initial members = m o k mon host = [v2:169.169.82.57:3300,v1:169.169.82.57:6789],[v2:169.169.7.81:3300,v1:169.169.7.81:6789],[v2:169.169.241.153:3300,v1:169.169.241.153:6789] mon initial members and mon host are holding sequences of monitors' id and IP respectively; the sequence are going in the same order among monitors as a result you can tell which monitors have which service IP addresses. Modify your rook-ceph-mon-endpoints.yaml on fields csi-cluster-config-json and data based on the understanding of rook-ceph.config above. The field mapping tells rook where to schedule monitor's pods. you could search in dataDirHostPath in all Ceph cluster hosts for mon-m,mon-o,mon-k . If you find mon-m in host 10.138.55.120 , you should fill 10.138.55.120 in field mapping for m . Others are the same. Update the maxMonId to be the max numeric ID of the highest monitor ID. For example, 15 is the 0-based ID for mon o . Now save this configmap in the file rook-ceph-mon-endpoints.yaml, to be created later in the restore. Now that you have the info for the secret and the configmap, you are ready to restore the running cluster. Deploy Rook Ceph using the YAML files or Helm, with the same settings you had previously. 1 kubectl create -f crds.yaml -f common.yaml -f operator.yaml After the operator is running, create the configmap and secret you have just crafted: 1 kubectl create -f rook-ceph-mon.yaml -f rook-ceph-mon-endpoints.yaml Create your Ceph cluster CR (if possible, with the same settings as existed previously): 1 kubectl create -f cluster.yaml Now your Rook Ceph cluster should be running again.","title":"Disaster Recovery"},{"location":"Troubleshooting/disaster-recovery/#restoring-mon-quorum","text":"Under extenuating circumstances, the mons may lose quorum. If the mons cannot form quorum again, there is a manual procedure to get the quorum going again. The only requirement is that at least one mon is still healthy. The following steps will remove the unhealthy mons from quorum and allow you to form a quorum again with a single mon, then grow the quorum back to the original size. The Rook Krew Plugin has a command restore-quorum that will walk you through the mon quorum automated restoration process. If the name of the healthy mon is c , you would run the command: 1 kubectl rook-ceph mons restore-quorum c See the restore-quorum documentation for more details.","title":"Restoring Mon Quorum"},{"location":"Troubleshooting/disaster-recovery/#restoring-crds-after-deletion","text":"When the Rook CRDs are deleted, the Rook operator will respond to the deletion event to attempt to clean up the cluster resources. If any data appears present in the cluster, Rook will refuse to allow the resources to be deleted since the operator will refuse to remove the finalizer on the CRs until the underlying data is deleted. For more details, see the dependency design doc . While it is good that the CRs will not be deleted and the underlying Ceph data and daemons continue to be available, the CRs will be stuck indefinitely in a Deleting state in which the operator will not continue to ensure cluster health. Upgrades will be blocked, further updates to the CRs are prevented, and so on. Since Kubernetes does not allow undeleting resources, the following procedure will allow you to restore the CRs to their prior state without even necessarily suffering cluster downtime. Note In the following commands, the affected CephCluster resource is called rook-ceph . If yours is named differently, the commands will need to be adjusted. Scale down the operator. 1 kubectl -n rook-ceph scale --replicas=0 deploy/rook-ceph-operator Backup all Rook CRs and critical metadata 1 2 3 4 5 6 # Store the ` CephCluster ` CR settings. Also, save other Rook CRs that are in terminating state. kubectl -n rook-ceph get cephcluster rook-ceph -o yaml > cluster.yaml # Backup critical secrets and configmaps in case something goes wrong later in the procedure kubectl -n rook-ceph get secret -o yaml > secrets.yaml kubectl -n rook-ceph get configmap -o yaml > configmaps.yaml (Optional, if webhook is enabled) Delete the ValidatingWebhookConfiguration . This is the resource which connects Rook custom resources to the operator pod's validating webhook. Because the operator is unavailable, we must temporarily disable the valdiating webhook in order to make changes. 1 2 3 ```console kubectl delete ValidatingWebhookConfiguration rook-ceph-webhook ``` Remove the owner references from all critical Rook resources that were referencing the CephCluster CR. Programmatically determine all such resources, using this command: 1 2 3 4 5 6 # Determine the ` CephCluster ` UID ROOK_UID=$(kubectl -n rook-ceph get cephcluster rook-ceph -o 'jsonpath={.metadata.uid}') # List all secrets, configmaps, services, deployments, and PVCs with that ownership UID. RESOURCES=$(kubectl -n rook-ceph get secret,configmap,service,deployment,pvc -o jsonpath='{range .items[?(@.metadata.ownerReferences[*].uid==\"'\"$ROOK_UID\"'\")]}{.kind}{\"/\"}{.metadata.name}{\"\\n\"}{end}') # Show the collected resources. kubectl -n rook-ceph get $RESOURCES Verify that all critical resources are shown in the output. The critical resources are these: Secrets: rook-ceph-admin-keyring , rook-ceph-config , rook-ceph-mon , rook-ceph-mons-keyring ConfigMap: rook-ceph-mon-endpoints Services: rook-ceph-mon-* , rook-ceph-mgr-* Deployments: rook-ceph-mon-* , rook-ceph-osd-* , rook-ceph-mgr-* PVCs (if applicable): rook-ceph-mon-* and the OSD PVCs (named -* , for example set1-data-* ) For each listed resource, remove the ownerReferences metadata field, in order to unlink it from the deleting CephCluster CR. To do so programmatically, use the command: 1 2 3 for resource in $(kubectl -n rook-ceph get $RESOURCES -o name); do kubectl -n rook-ceph patch $resource -p '{\"metadata\": {\"ownerReferences\":null}}' done For a manual alternative, issue kubectl edit on each resource, and remove the block matching: 1 2 3 4 5 6 7 ownerReferences : - apiVersion : ceph.rook.io/v1 blockOwnerDeletion : true controller : true kind : ` CephCluster` name : rook-ceph uid :  Before completing this step, validate these things. Failing to do so could result in data loss. Confirm that cluster.yaml contains the CephCluster CR. Confirm all critical resources listed above have had the ownerReference to the CephCluster CR removed. Remove the finalizer from the CephCluster resource. This will cause the resource to be immediately deleted by Kubernetes. 1 kubectl -n rook-ceph patch cephcluster/rook-ceph --type json --patch='[ { \"op\": \"remove\", \"path\": \"/metadata/finalizers\" } ]' After the finalizer is removed, the CephCluster will be immediately deleted. If all owner references were properly removed, all ceph daemons will continue running and there will be no downtime. Create the CephCluster CR with the same settings as previously 1 2 # Use the same cluster settings as exported in step 2 . kubectl create -f cluster.yaml If there are other CRs in terminating state such as CephBlockPools, CephObjectStores, or CephFilesystems, follow the above steps as well for those CRs: Backup the CR Remove the finalizer and confirm the CR is deleted (the underlying Ceph resources will be preserved) Create the CR again Scale up the operator 1 kubectl -n rook-ceph scale --replicas=1 deploy/rook-ceph-operator Watch the operator log to confirm that the reconcile completes successfully. 1 kubectl -n rook-ceph logs -f deployment/rook-ceph-operator","title":"Restoring CRDs After Deletion"},{"location":"Troubleshooting/disaster-recovery/#adopt-an-existing-rook-ceph-cluster-into-a-new-kubernetes-cluster","text":"Situations this section can help resolve: The Kubernetes environment underlying a running Rook Ceph cluster failed catastrophically, requiring a new Kubernetes environment in which the user wishes to recover the previous Rook Ceph cluster. The user wishes to migrate their existing Rook Ceph cluster to a new Kubernetes environment, and downtime can be tolerated.","title":"Adopt an existing Rook Ceph cluster into a new Kubernetes cluster"},{"location":"Troubleshooting/disaster-recovery/#prerequisites","text":"A working Kubernetes cluster to which we will migrate the previous Rook Ceph cluster. At least one Ceph mon db is in quorum, and sufficient number of Ceph OSD is up and in before disaster. The previous Rook Ceph cluster is not running.","title":"Prerequisites"},{"location":"Troubleshooting/disaster-recovery/#overview-for-steps-below","text":"Start a new and clean Rook Ceph cluster, with old CephCluster CephBlockPool CephFilesystem CephNFS CephObjectStore . Shut the new cluster down when it has been created successfully. Replace ceph-mon data with that of the old cluster. Replace fsid in secrets/rook-ceph-mon with that of the old one. Fix monmap in ceph-mon db. Fix ceph mon auth key. Disable auth. Start the new cluster, watch it resurrect. Fix admin auth key, and enable auth. Restart cluster for the final time.","title":"Overview for Steps below"},{"location":"Troubleshooting/disaster-recovery/#steps","text":"Assuming dataHostPathData is /var/lib/rook , and the CephCluster trying to adopt is named rook-ceph . Make sure the old Kubernetes cluster is completely torn down and the new Kubernetes cluster is up and running without Rook Ceph. Backup /var/lib/rook in all the Rook Ceph nodes to a different directory. Backups will be used later. Pick a /var/lib/rook/rook-ceph/rook-ceph.config from any previous Rook Ceph node and save the old cluster fsid from its content. Remove /var/lib/rook from all the Rook Ceph nodes. Add identical CephCluster descriptor to the new Kubernetes cluster, especially identical spec.storage.config and spec.storage.nodes , except mon.count , which should be set to 1 . Add identical CephFilesystem CephBlockPool CephNFS CephObjectStore descriptors (if any) to the new Kubernetes cluster. Install Rook Ceph in the new Kubernetes cluster. Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the cluster will have rook-ceph-mon-a , rook-ceph-mgr-a , and all the auxiliary pods up and running, and zero (hopefully) rook-ceph-osd-ID-xxxxxx running. ceph -s output should report 1 mon, 1 mgr running, and all of the OSDs down, all PGs are in unknown state. Rook should not start any OSD daemon since all devices belongs to the old cluster (which have a different fsid ). Run kubectl -n rook-ceph exec -it rook-ceph-mon-a-xxxxxxxx bash to enter the rook-ceph-mon-a pod, 1 2 mon-a# cat /etc/ceph/keyring-store/keyring # save this keyring content for later use mon-a# exit Stop the Rook operator by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 0 . Stop cluster daemons by running kubectl -n rook-ceph delete deploy/X where X is every deployment in namespace rook-ceph , except rook-ceph-operator and rook-ceph-tools . Save the rook-ceph-mon-a address with kubectl -n rook-ceph get cm/rook-ceph-mon-endpoints -o yaml in the new Kubernetes cluster for later use. SSH to the host where rook-ceph-mon-a in the new Kubernetes cluster resides. Remove /var/lib/rook/mon-a Pick a healthy rook-ceph-mon-ID directory ( /var/lib/rook/mon-ID ) in the previous backup, copy to /var/lib/rook/mon-a . ID is any healthy mon node ID of the old cluster. Replace /var/lib/rook/mon-a/keyring with the saved keyring, preserving only the [mon.] section, remove [client.admin] section. Run docker run -it --rm -v /var/lib/rook:/var/lib/rook ceph/ceph:v14.2.1-20190430 bash . The Docker image tag should match the Ceph version used in the Rook cluster. The /etc/ceph/ceph.conf file needs to exist for ceph-mon to work. 1 2 3 4 5 6 7 8 9 10 11 12 13 touch /etc/ceph/ceph.conf cd /var/lib/rook ceph-mon --extract-monmap monmap --mon-data ./mon-a/data # Extract monmap from old ceph-mon db and save as monmap monmaptool --print monmap # Print the monmap content, which reflects the old cluster ceph-mon configuration. monmaptool --rm a monmap # Delete `a` from monmap. monmaptool --rm b monmap # Repeat, and delete `b` from monmap. monmaptool --rm c monmap # Repeat this pattern until all the old ceph-mons are removed monmaptool --rm d monmap monmaptool --rm e monmap monmaptool --addv a [v2:10.77.2.216:3300,v1:10.77.2.216:6789] monmap # Replace it with the rook-ceph-mon-a address you got from previous command. ceph-mon --inject-monmap monmap --mon-data ./mon-a/data # Replace monmap in ceph-mon db with our modified version. rm monmap exit Tell Rook to run as old cluster by running kubectl -n rook-ceph edit secret/rook-ceph-mon and changing fsid to the original fsid . Note that the fsid is base64 encoded and must not contain a trailing carriage return. For example: 1 echo -n a811f99a-d865-46b7-8f2c-f94c064e4356 | base64 # Replace with the fsid from your old cluster. Disable authentication by running kubectl -n rook-ceph edit cm/rook-config-override and adding content below: 1 2 3 4 5 6 7 data : config : | [global] auth cluster required = none auth service required = none auth client required = none auth supported = none Bring the Rook Ceph operator back online by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 1 . Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the new cluster should be up and running with authentication disabled. ceph -s should report 1 mon & 1 mgr & all of the OSDs up and running, and all PGs in either active or degraded state. Run kubectl -n rook-ceph exec -it rook-ceph-tools-XXXXXXX bash to enter tools pod: 1 2 3 4 vi key # [ paste keyring content saved before, preserving only ` [ client admin ] ` section ] ceph auth import -i key rm key Re-enable authentication by running kubectl -n rook-ceph edit cm/rook-config-override and removing auth configuration added in previous steps. Stop the Rook operator by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 0 . Shut down entire new cluster by running kubectl -n rook-ceph delete deploy/X where X is every deployment in namespace rook-ceph , except rook-ceph-operator and rook-ceph-tools , again. This time OSD daemons are present and should be removed too. Bring the Rook Ceph operator back online by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 1 . Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the new cluster should be up and running with authentication enabled. ceph -s output should not change much comparing to previous steps.","title":"Steps"},{"location":"Troubleshooting/disaster-recovery/#backing-up-and-restoring-a-cluster-based-on-pvcs-into-a-new-kubernetes-cluster","text":"It is possible to migrate/restore an rook/ceph cluster from an existing Kubernetes cluster to a new one without resorting to SSH access or ceph tooling. This allows doing the migration using standard kubernetes resources only. This guide assumes the following: You have a CephCluster that uses PVCs to persist mon and osd data. Let's call it the \"old cluster\" You can restore the PVCs as-is in the new cluster. Usually this is done by taking regular snapshots of the PVC volumes and using a tool that can re-create PVCs from these snapshots in the underlying cloud provider. Velero is one such tool. You have regular backups of the secrets and configmaps in the rook-ceph namespace. Velero provides this functionality too. Do the following in the new cluster: Stop the rook operator by scaling the deployment rook-ceph-operator down to zero: kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas 0 and deleting the other deployments. An example command to do this is k -n rook-ceph delete deployment -l operator!=rook Restore the rook PVCs to the new cluster. Copy the keyring and fsid secrets from the old cluster: rook-ceph-mgr-a-keyring , rook-ceph-mon , rook-ceph-mons-keyring , rook-ceph-osd-0-keyring , ... Delete mon services and copy them from the old cluster: rook-ceph-mon-a , rook-ceph-mon-b , ... Note that simply re-applying won't work because the goal here is to restore the clusterIP in each service and this field is immutable in Service resources. Copy the endpoints configmap from the old cluster: rook-ceph-mon-endpoints Scale the rook operator up again : kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas 1 Wait until the reconciliation is over.","title":"Backing up and restoring a cluster based on PVCs into a new Kubernetes cluster"},{"location":"Troubleshooting/disaster-recovery/#restoring-the-rook-cluster-after-the-rook-namespace-is-deleted","text":"When the rook-ceph namespace is accidentally deleted, the good news is that the cluster can be restored. With the content in the directory dataDirHostPath and the original OSD disks, the ceph cluster could be restored with this guide. You need to manually create a ConfigMap and a Secret to make it work. The information required for the ConfigMap and Secret can be found in the dataDirHostPath directory. The first resource is the secret named rook-ceph-mon as seen in this example below: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 apiVersion : v1 data : ceph-secret : QVFCZ0h6VmorcVNhSGhBQXVtVktNcjcrczNOWW9Oa2psYkErS0E9PQ== ceph-username : Y2xpZW50LmFkbWlu fsid : M2YyNzE4NDEtNjE4OC00N2MxLWIzZmQtOTBmZDRmOTc4Yzc2 mon-secret : QVFCZ0h6VmorcVNhSGhBQXVtVktNcjcrczNOWW9Oa2psYkErS0E9PQ== kind : Secret metadata : finalizers : - ceph.rook.io/disaster-protection name : rook-ceph-mon namespace : rook-ceph ownerReferences : null type : kubernetes.io/rook The values for the secret can be found in $dataDirHostPath/rook-ceph/client.admin.keyring and $dataDirHostPath/rook-ceph/rook-ceph.config . - ceph-secret and mon-secret are to be filled with the client.admin 's keyring contents. - ceph-username : set to the string client.admin - fsid : set to the original ceph cluster id. All the fields in data section need to be encoded in base64. Coding could be done like this: 1 echo -n \"string to code\" | base64 -i - Now save the secret as rook-ceph-mon.yaml , to be created later in the restore. The second resource is the configmap named rook-ceph-mon-endpoints as seen in this example below: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : v1 data : csi-cluster-config-json : '[{\"clusterID\":\"rook-ceph\",\"monitors\":[\"169.169.241.153:6789\",\"169.169.82.57:6789\",\"169.169.7.81:6789\"],\"namespace\":\"\"}]' data : k=169.169.241.153:6789,m=169.169.82.57:6789,o=169.169.7.81:6789 mapping : '{\"node\":{\"k\":{\"Name\":\"10.138.55.111\",\"Hostname\":\"10.138.55.111\",\"Address\":\"10.138.55.111\"},\"m\":{\"Name\":\"10.138.55.120\",\"Hostname\":\"10.138.55.120\",\"Address\":\"10.138.55.120\"},\"o\":{\"Name\":\"10.138.55.112\",\"Hostname\":\"10.138.55.112\",\"Address\":\"10.138.55.112\"}}}' maxMonId : \"15\" kind : ConfigMap metadata : finalizers : - ceph.rook.io/disaster-protection name : rook-ceph-mon-endpoints namespace : rook-ceph ownerReferences : null The Monitor's service IPs are kept in the monitor data store and you need to create them by original ones. After you create this configmap with the original service IPs, the rook operator will create the correct services for you with IPs matching in the monitor data store. Along with monitor ids, their service IPs and mapping relationship of them can be found in dataDirHostPath/rook-ceph/rook-ceph.config, for example: 1 2 3 4 [global] fsid = 3f271841-6188-47c1-b3fd-90fd4f978c76 mon initial members = m o k mon host = [v2:169.169.82.57:3300,v1:169.169.82.57:6789],[v2:169.169.7.81:3300,v1:169.169.7.81:6789],[v2:169.169.241.153:3300,v1:169.169.241.153:6789] mon initial members and mon host are holding sequences of monitors' id and IP respectively; the sequence are going in the same order among monitors as a result you can tell which monitors have which service IP addresses. Modify your rook-ceph-mon-endpoints.yaml on fields csi-cluster-config-json and data based on the understanding of rook-ceph.config above. The field mapping tells rook where to schedule monitor's pods. you could search in dataDirHostPath in all Ceph cluster hosts for mon-m,mon-o,mon-k . If you find mon-m in host 10.138.55.120 , you should fill 10.138.55.120 in field mapping for m . Others are the same. Update the maxMonId to be the max numeric ID of the highest monitor ID. For example, 15 is the 0-based ID for mon o . Now save this configmap in the file rook-ceph-mon-endpoints.yaml, to be created later in the restore. Now that you have the info for the secret and the configmap, you are ready to restore the running cluster. Deploy Rook Ceph using the YAML files or Helm, with the same settings you had previously. 1 kubectl create -f crds.yaml -f common.yaml -f operator.yaml After the operator is running, create the configmap and secret you have just crafted: 1 kubectl create -f rook-ceph-mon.yaml -f rook-ceph-mon-endpoints.yaml Create your Ceph cluster CR (if possible, with the same settings as existed previously): 1 kubectl create -f cluster.yaml Now your Rook Ceph cluster should be running again.","title":"Restoring the Rook cluster after the Rook namespace is deleted"},{"location":"Troubleshooting/krew-plugin/","text":"The Rook Krew plugin is a tool to help troubleshoot your Rook cluster. Here are a few of the operations that the plugin will assist with: - Health of the Rook pods - Health of the Ceph cluster - Create \"debug\" pods for mons and OSDs that are in need of special Ceph maintenance operations - Restart the operator - Purge an OSD - Run any ceph command See the kubectl-rook-ceph documentation for more details. Installation \u00b6 Install Krew Install Rook plugin 1 kubectl krew install rook-ceph Ceph Commands \u00b6 Run any ceph command with kubectl rook-ceph ceph  . For example, get the Ceph status: 1 kubectl rook-ceph ceph status Output: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 cluster: id: a1ac6554-4cc8-4c3b-a8a3-f17f5ec6f529 health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 11m) mgr: a(active, since 10m) mds: 1/1 daemons up, 1 hot standby osd: 3 osds: 3 up (since 10m), 3 in (since 8d) data: volumes: 1/1 healthy pools: 6 pools, 137 pgs objects: 34 objects, 4.1 KiB usage: 58 MiB used, 59 GiB / 59 GiB avail pgs: 137 active+clean io: client: 1.2 KiB/s rd, 2 op/s rd, 0 op/s wr Reference: Ceph Status Debug Mode \u00b6 Debug mode can be useful when a MON or OSD needs advanced maintenance operations that require the daemon to be stopped. Ceph tools such as ceph-objectstore-tool , ceph-bluestore-tool , or ceph-monstore-tool are commonly used in these scenarios. Debug mode will set up the MON or OSD so that these commands can be run. Start the debug pod for mon b 1 kubectl rook-ceph debug start rook-ceph-mon-b Stop the debug pod for mon b 1 kubectl rook-ceph debug stop rook-ceph-mon-b Reference: Debug Mode","title":"Krew Plugin"},{"location":"Troubleshooting/krew-plugin/#installation","text":"Install Krew Install Rook plugin 1 kubectl krew install rook-ceph","title":"Installation"},{"location":"Troubleshooting/krew-plugin/#ceph-commands","text":"Run any ceph command with kubectl rook-ceph ceph  . For example, get the Ceph status: 1 kubectl rook-ceph ceph status Output: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 cluster: id: a1ac6554-4cc8-4c3b-a8a3-f17f5ec6f529 health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 11m) mgr: a(active, since 10m) mds: 1/1 daemons up, 1 hot standby osd: 3 osds: 3 up (since 10m), 3 in (since 8d) data: volumes: 1/1 healthy pools: 6 pools, 137 pgs objects: 34 objects, 4.1 KiB usage: 58 MiB used, 59 GiB / 59 GiB avail pgs: 137 active+clean io: client: 1.2 KiB/s rd, 2 op/s rd, 0 op/s wr Reference: Ceph Status","title":"Ceph Commands"},{"location":"Troubleshooting/krew-plugin/#debug-mode","text":"Debug mode can be useful when a MON or OSD needs advanced maintenance operations that require the daemon to be stopped. Ceph tools such as ceph-objectstore-tool , ceph-bluestore-tool , or ceph-monstore-tool are commonly used in these scenarios. Debug mode will set up the MON or OSD so that these commands can be run. Start the debug pod for mon b 1 kubectl rook-ceph debug start rook-ceph-mon-b Stop the debug pod for mon b 1 kubectl rook-ceph debug stop rook-ceph-mon-b Reference: Debug Mode","title":"Debug Mode"},{"location":"Troubleshooting/openshift-common-issues/","text":"Enable Monitoring in the Storage Dashboard \u00b6 OpenShift Console uses OpenShift Prometheus for monitoring and populating data in Storage Dashboard. Additional configuration is required to monitor the Ceph Cluster from the storage dashboard. Change the monitoring namespace to openshift-monitoring Change the namespace of the RoleBinding rook-ceph-metrics from rook-ceph to openshift-monitoring for the prometheus-k8s ServiceAccount in rbac.yaml . 1 2 3 4 subjects : - kind : ServiceAccount name : prometheus-k8s namespace : openshift-monitoring Enable Ceph Cluster monitoring Follow ceph-monitoring/prometheus-alerts . Set the required label on the namespace 1 oc label namespace rook-ceph \"openshift.io/cluster-monitoring=true\" Troubleshoot Monitoring Issues \u00b6 Attention Switch to rook-ceph namespace using oc project rook-ceph . Ensure ceph-mgr pod is Running 1 2 3 $ oc get pods -l app = rook-ceph-mgr NAME READY STATUS RESTARTS AGE rook-ceph-mgr 1/1 Running 0 14h Ensure service monitor is present 1 2 3 $ oc get servicemonitor rook-ceph-mgr NAME AGE rook-ceph-mgr 14h Ensure the prometheus rules object has been created 1 2 3 $ oc get prometheusrules -l prometheus = rook-prometheus NAME AGE prometheus-ceph-rules 14h","title":"OpenShift Common Issues"},{"location":"Troubleshooting/openshift-common-issues/#enable-monitoring-in-the-storage-dashboard","text":"OpenShift Console uses OpenShift Prometheus for monitoring and populating data in Storage Dashboard. Additional configuration is required to monitor the Ceph Cluster from the storage dashboard. Change the monitoring namespace to openshift-monitoring Change the namespace of the RoleBinding rook-ceph-metrics from rook-ceph to openshift-monitoring for the prometheus-k8s ServiceAccount in rbac.yaml . 1 2 3 4 subjects : - kind : ServiceAccount name : prometheus-k8s namespace : openshift-monitoring Enable Ceph Cluster monitoring Follow ceph-monitoring/prometheus-alerts . Set the required label on the namespace 1 oc label namespace rook-ceph \"openshift.io/cluster-monitoring=true\"","title":"Enable Monitoring in the Storage Dashboard"},{"location":"Troubleshooting/openshift-common-issues/#troubleshoot-monitoring-issues","text":"Attention Switch to rook-ceph namespace using oc project rook-ceph . Ensure ceph-mgr pod is Running 1 2 3 $ oc get pods -l app = rook-ceph-mgr NAME READY STATUS RESTARTS AGE rook-ceph-mgr 1/1 Running 0 14h Ensure service monitor is present 1 2 3 $ oc get servicemonitor rook-ceph-mgr NAME AGE rook-ceph-mgr 14h Ensure the prometheus rules object has been created 1 2 3 $ oc get prometheusrules -l prometheus = rook-prometheus NAME AGE prometheus-ceph-rules 14h","title":"Troubleshoot Monitoring Issues"},{"location":"Troubleshooting/performance-profiling/","text":"Collect perf data of a ceph process at runtime \u00b6 Warn This is an advanced topic please be aware of the steps you're performing or reach out to the experts for further guidance. There are some cases where the debug logs are not sufficient to investigate issues like high CPU utilization of a Ceph process. In that situation, coredump and perf information of a Ceph process is useful to be collected which can be shared with the Ceph team in an issue. To collect this information, please follow these steps: Edit the rook-ceph-operator deployment and set ROOK_HOSTPATH_REQUIRES_PRIVILEGED to true . Wait for the pods to get reinitialized: 1 # watch kubectl -n rook-ceph get pods Enter the respective pod of the Ceph process which needs to be investigated. For example: 1 # kubectl -n rook-ceph exec -it deploy/rook-ceph-mon-a -- bash Install gdb , perf and git inside the pod. For example: 1 # dnf install gdb git perf -y Capture perf data of the respective Ceph process: 1 2 # perf record -e cycles --call-graph dwarf -p  # perf report > perf_report_ Grab the pid of the respective Ceph process to collect its backtrace at multiple time instances, attach gdb to it and share the output gdb.txt : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # gdb -p  - set pag off - set log on - thr a a bt full # This captures the complete backtrace of the process - backtrace - Ctrl+C - backtrace - Ctrl+C - backtrace - Ctrl+C - backtrace - set log off - q (to exit out of gdb) Grab the live coredump of the respective process using gcore : 1 # gcore  Capture the Wallclock Profiler data for the respective Ceph process and share the output gdbpmp.data generated: 1 2 3 # git clone https://github.com/markhpc/gdbpmp # cd gdbpmp # ./gdbpmp.py -p  -n 100 -o gdbpmp.data Collect the perf.data , perf_report , backtrace of the process gdb.txt , core file and profiler data gdbpmp.data and upload it to the tracker issue for troubleshooting purposes.","title":"Performance Profiling"},{"location":"Troubleshooting/performance-profiling/#collect-perf-data-of-a-ceph-process-at-runtime","text":"Warn This is an advanced topic please be aware of the steps you're performing or reach out to the experts for further guidance. There are some cases where the debug logs are not sufficient to investigate issues like high CPU utilization of a Ceph process. In that situation, coredump and perf information of a Ceph process is useful to be collected which can be shared with the Ceph team in an issue. To collect this information, please follow these steps: Edit the rook-ceph-operator deployment and set ROOK_HOSTPATH_REQUIRES_PRIVILEGED to true . Wait for the pods to get reinitialized: 1 # watch kubectl -n rook-ceph get pods Enter the respective pod of the Ceph process which needs to be investigated. For example: 1 # kubectl -n rook-ceph exec -it deploy/rook-ceph-mon-a -- bash Install gdb , perf and git inside the pod. For example: 1 # dnf install gdb git perf -y Capture perf data of the respective Ceph process: 1 2 # perf record -e cycles --call-graph dwarf -p  # perf report > perf_report_ Grab the pid of the respective Ceph process to collect its backtrace at multiple time instances, attach gdb to it and share the output gdb.txt : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # gdb -p  - set pag off - set log on - thr a a bt full # This captures the complete backtrace of the process - backtrace - Ctrl+C - backtrace - Ctrl+C - backtrace - Ctrl+C - backtrace - set log off - q (to exit out of gdb) Grab the live coredump of the respective process using gcore : 1 # gcore  Capture the Wallclock Profiler data for the respective Ceph process and share the output gdbpmp.data generated: 1 2 3 # git clone https://github.com/markhpc/gdbpmp # cd gdbpmp # ./gdbpmp.py -p  -n 100 -o gdbpmp.data Collect the perf.data , perf_report , backtrace of the process gdb.txt , core file and profiler data gdbpmp.data and upload it to the tracker issue for troubleshooting purposes.","title":"Collect perf data of a ceph process at runtime"},{"location":"Upgrade/ceph-upgrade/","text":"This guide will walk through the steps to upgrade the version of Ceph in a Rook cluster. Rook and Ceph upgrades are designed to ensure data remains available even while the upgrade is proceeding. Rook will perform the upgrades in a rolling fashion such that application pods are not disrupted. Rook is cautious when performing upgrades. When an upgrade is requested (the Ceph image has been updated in the CR), Rook will go through all the daemons one by one and will individually perform checks on them. It will make sure a particular daemon can be stopped before performing the upgrade. Once the deployment has been updated, it checks if this is ok to continue. After each daemon is updated we wait for things to settle (monitors to be in a quorum, PGs to be clean for OSDs, up for MDSes, etc.), then only when the condition is met we move to the next daemon. We repeat this process until all the daemons have been updated. Considerations \u00b6 WARNING : Upgrading a Rook cluster is not without risk. There may be unexpected issues or obstacles that damage the integrity and health of the storage cluster, including data loss. The Rook cluster's storage may be unavailable for short periods during the upgrade process. Read this document in full before undertaking a Rook cluster upgrade. Supported Versions \u00b6 Rook v1.12 supports the following Ceph versions: Ceph Reef v18.2.0 or newer Ceph Quincy v17.2.0 or newer Ceph Pacific v16.2.7 or newer Support for Ceph Pacific (16.2.x) will be removed in the next Rook release. It will be mandatory to upgrade to Quincy or Reef before upgrading to the Rook release after v1.12.x. Important When an update is requested, the operator will check Ceph's status, if it is in HEALTH_ERR the operator will refuse to proceed with the upgrade. Warning Ceph v17.2.2 has a blocking issue when running with Rook. Use v17.2.3 or newer when possible. Quincy Consideration \u00b6 In Ceph Quincy (v17), the device_health_metrics pool was renamed to .mgr . Ceph will perform this migration automatically. The pool rename will be automatically handled by Rook if the configuration of the device_health_metrics pool is not customized via CephBlockPool. If the configuration of the device_health_metrics pool is customized via CephBlockPool, two extra steps are required after the Ceph upgrade is complete. Once upgrade is complete: Create a new CephBlockPool to configure the .mgr built-in pool. For an example, see builtin mgr pool . Delete the old CephBlockPool that represents the device_health_metrics pool. CephNFS User Consideration \u00b6 Ceph Quincy v17.2.1 has a potentially breaking regression with CephNFS. See the NFS documentation's known issue for more detail. Ceph Images \u00b6 Official Ceph container images can be found on Quay . These images are tagged in a few ways: The most explicit form of tags are full-ceph-version-and-build tags (e.g., v17.2.6-20230410 ). These tags are recommended for production clusters, as there is no possibility for the cluster to be heterogeneous with respect to the version of Ceph running in containers. Ceph major version tags (e.g., v17 ) are useful for development and test clusters so that the latest version of Ceph is always available. Ceph containers other than the official images from the registry above will not be supported. Example Upgrade to Ceph Quincy \u00b6 1. Update the Ceph daemons \u00b6 The upgrade will be automated by the Rook operator after the desired Ceph image is changed in the CephCluster CRD ( spec.cephVersion.image ). 1 2 3 ROOK_CLUSTER_NAMESPACE=rook-ceph NEW_CEPH_IMAGE='quay.io/ceph/ceph:v17.2.6-20230410' kubectl -n $ROOK_CLUSTER_NAMESPACE patch CephCluster $ROOK_CLUSTER_NAMESPACE --type=merge -p \"{\\\"spec\\\": {\\\"cephVersion\\\": {\\\"image\\\": \\\"$NEW_CEPH_IMAGE\\\"}}}\" 2. Wait for the pod updates \u00b6 As with upgrading Rook, now wait for the upgrade to complete. Status can be determined in a similar way to the Rook upgrade as well. 1 watch --exec kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\tceph-version=\"}{.metadata.labels.ceph-version}{\"\\n\"}{end}' Confirm the upgrade is completed when the versions are all on the desired Ceph version. 1 2 3 4 5 6 kubectl -n $ROOK_CLUSTER_NAMESPACE get deployment -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{\"ceph-version=\"}{.metadata.labels.ceph-version}{\"\\n\"}{end}' | sort | uniq This cluster is not yet finished: ceph-version=15.2.13-0 ceph-version=v17.2.6-0 This cluster is finished: ceph-version=v17.2.6-0 3. Verify cluster health \u00b6 Verify the Ceph cluster's health using the health verification .","title":"Ceph Upgrades"},{"location":"Upgrade/ceph-upgrade/#considerations","text":"WARNING : Upgrading a Rook cluster is not without risk. There may be unexpected issues or obstacles that damage the integrity and health of the storage cluster, including data loss. The Rook cluster's storage may be unavailable for short periods during the upgrade process. Read this document in full before undertaking a Rook cluster upgrade.","title":"Considerations"},{"location":"Upgrade/ceph-upgrade/#supported-versions","text":"Rook v1.12 supports the following Ceph versions: Ceph Reef v18.2.0 or newer Ceph Quincy v17.2.0 or newer Ceph Pacific v16.2.7 or newer Support for Ceph Pacific (16.2.x) will be removed in the next Rook release. It will be mandatory to upgrade to Quincy or Reef before upgrading to the Rook release after v1.12.x. Important When an update is requested, the operator will check Ceph's status, if it is in HEALTH_ERR the operator will refuse to proceed with the upgrade. Warning Ceph v17.2.2 has a blocking issue when running with Rook. Use v17.2.3 or newer when possible.","title":"Supported Versions"},{"location":"Upgrade/ceph-upgrade/#quincy-consideration","text":"In Ceph Quincy (v17), the device_health_metrics pool was renamed to .mgr . Ceph will perform this migration automatically. The pool rename will be automatically handled by Rook if the configuration of the device_health_metrics pool is not customized via CephBlockPool. If the configuration of the device_health_metrics pool is customized via CephBlockPool, two extra steps are required after the Ceph upgrade is complete. Once upgrade is complete: Create a new CephBlockPool to configure the .mgr built-in pool. For an example, see builtin mgr pool . Delete the old CephBlockPool that represents the device_health_metrics pool.","title":"Quincy Consideration"},{"location":"Upgrade/ceph-upgrade/#cephnfs-user-consideration","text":"Ceph Quincy v17.2.1 has a potentially breaking regression with CephNFS. See the NFS documentation's known issue for more detail.","title":"CephNFS User Consideration"},{"location":"Upgrade/ceph-upgrade/#ceph-images","text":"Official Ceph container images can be found on Quay . These images are tagged in a few ways: The most explicit form of tags are full-ceph-version-and-build tags (e.g., v17.2.6-20230410 ). These tags are recommended for production clusters, as there is no possibility for the cluster to be heterogeneous with respect to the version of Ceph running in containers. Ceph major version tags (e.g., v17 ) are useful for development and test clusters so that the latest version of Ceph is always available. Ceph containers other than the official images from the registry above will not be supported.","title":"Ceph Images"},{"location":"Upgrade/ceph-upgrade/#example-upgrade-to-ceph-quincy","text":"","title":"Example Upgrade to Ceph Quincy"},{"location":"Upgrade/ceph-upgrade/#1-update-the-ceph-daemons","text":"The upgrade will be automated by the Rook operator after the desired Ceph image is changed in the CephCluster CRD ( spec.cephVersion.image ). 1 2 3 ROOK_CLUSTER_NAMESPACE=rook-ceph NEW_CEPH_IMAGE='quay.io/ceph/ceph:v17.2.6-20230410' kubectl -n $ROOK_CLUSTER_NAMESPACE patch CephCluster $ROOK_CLUSTER_NAMESPACE --type=merge -p \"{\\\"spec\\\": {\\\"cephVersion\\\": {\\\"image\\\": \\\"$NEW_CEPH_IMAGE\\\"}}}\"","title":"1. Update the Ceph daemons"},{"location":"Upgrade/ceph-upgrade/#2-wait-for-the-pod-updates","text":"As with upgrading Rook, now wait for the upgrade to complete. Status can be determined in a similar way to the Rook upgrade as well. 1 watch --exec kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\tceph-version=\"}{.metadata.labels.ceph-version}{\"\\n\"}{end}' Confirm the upgrade is completed when the versions are all on the desired Ceph version. 1 2 3 4 5 6 kubectl -n $ROOK_CLUSTER_NAMESPACE get deployment -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{\"ceph-version=\"}{.metadata.labels.ceph-version}{\"\\n\"}{end}' | sort | uniq This cluster is not yet finished: ceph-version=15.2.13-0 ceph-version=v17.2.6-0 This cluster is finished: ceph-version=v17.2.6-0","title":"2. Wait for the pod updates"},{"location":"Upgrade/ceph-upgrade/#3-verify-cluster-health","text":"Verify the Ceph cluster's health using the health verification .","title":"3. Verify cluster health"},{"location":"Upgrade/health-verification/","text":"Rook and Ceph upgrades are designed to ensure data remains available even while the upgrade is proceeding. Rook will perform the upgrades in a rolling fashion such that application pods are not disrupted. To ensure the upgrades are seamless, it is important to begin the upgrades with Ceph in a fully healthy state. This guide reviews ways of verifying the health of a CephCluster. See the troubleshooting documentation for any issues during upgrades: General K8s troubleshooting Ceph common issues CSI common issues Pods all Running \u00b6 In a healthy Rook cluster, all pods in the Rook namespace should be in the Running (or Completed ) state and have few, if any, pod restarts. 1 2 ROOK_CLUSTER_NAMESPACE=rook-ceph kubectl -n $ROOK_CLUSTER_NAMESPACE get pods Status Output \u00b6 The Rook toolbox contains the Ceph tools that gives status details of the cluster with the ceph status command. Below is an output sample: 1 2 TOOLS_POD=$(kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[*].metadata.name}') kubectl -n $ROOK_CLUSTER_NAMESPACE exec -it $TOOLS_POD -- ceph status The output should look similar to the following: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 cluster: id: a3f4d647-9538-4aff-9fd1-b845873c3fe9 health: HEALTH_OK services: mon: 3 daemons, quorum b,c,a mgr: a(active) mds: myfs-1/1/1 up {0=myfs-a=up:active}, 1 up:standby-replay osd: 6 osds: 6 up, 6 in rgw: 1 daemon active data: pools: 9 pools, 900 pgs objects: 67 objects, 11 KiB usage: 6.1 GiB used, 54 GiB / 60 GiB avail pgs: 900 active+clean io: client: 7.4 KiB/s rd, 681 B/s wr, 11 op/s rd, 4 op/s wr recovery: 164 B/s, 1 objects/s In the output above, note the following indications that the cluster is in a healthy state: Cluster health: The overall cluster status is HEALTH_OK and there are no warning or error status messages displayed. Monitors (mon): All of the monitors are included in the quorum list. Manager (mgr): The Ceph manager is in the active state. OSDs (osd): All OSDs are up and in . Placement groups (pgs): All PGs are in the active+clean state. (If applicable) Ceph filesystem metadata server (mds): all MDSes are active for all filesystems (If applicable) Ceph object store RADOS gateways (rgw): all daemons are active If the ceph status output has deviations from the general good health described above, there may be an issue that needs to be investigated further. Other commands may show more relevant details on the health of the system, such as ceph osd status . See the Ceph troubleshooting docs for help. Upgrading an unhealthy cluster \u00b6 Rook will not upgrade Ceph daemons if the health is in a HEALTH_ERR state. Rook can be configured to proceed with the (potentially unsafe) upgrade by setting either skipUpgradeChecks: true or continueUpgradeAfterChecksEvenIfNotHealthy: true as described in the cluster CR settings . Container Versions \u00b6 The container version running in a specific pod in the Rook cluster can be verified in its pod spec output. For example, for the monitor pod mon-b , verify the container version it is running with the below commands: 1 2 POD_NAME=$(kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -o custom-columns=name:.metadata.name --no-headers | grep rook-ceph-mon-b) kubectl -n $ROOK_CLUSTER_NAMESPACE get pod ${POD_NAME} -o jsonpath='{.spec.containers[0].image}' The status and container versions for all Rook pods can be collected all at once with the following commands: 1 2 kubectl -n $ROOK_OPERATOR_NAMESPACE get pod -o jsonpath='{range .items[*]}{.metadata.name}{\"\\n\\t\"}{.status.phase}{\"\\t\\t\"}{.spec.containers[0].image}{\"\\t\"}{.spec.initContainers[0]}{\"\\n\"}{end}' && \\ kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -o jsonpath='{range .items[*]}{.metadata.name}{\"\\n\\t\"}{.status.phase}{\"\\t\\t\"}{.spec.containers[0].image}{\"\\t\"}{.spec.initContainers[0].image}{\"\\n\"}{end}' The rook-version label exists on Ceph resources. For various resource controllers, a summary of the resource controllers can be gained with the commands below. These will report the requested, updated, and currently available replicas for various Rook resources in addition to the version of Rook for resources managed by Rook. Note that the operator and toolbox deployments do not have a rook-version label set. 1 2 3 kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' kubectl -n $ROOK_CLUSTER_NAMESPACE get jobs -o jsonpath='{range .items[*]}{.metadata.name}{\" \\tsucceeded: \"}{.status.succeeded}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' Rook Volume Health \u00b6 Any pod that is using a Rook volume should also remain healthy: The pod should be in the Running state with few, if any, restarts There should be no errors in its logs The pod should still be able to read and write to the attached Rook volume.","title":"Health Verification"},{"location":"Upgrade/health-verification/#pods-all-running","text":"In a healthy Rook cluster, all pods in the Rook namespace should be in the Running (or Completed ) state and have few, if any, pod restarts. 1 2 ROOK_CLUSTER_NAMESPACE=rook-ceph kubectl -n $ROOK_CLUSTER_NAMESPACE get pods","title":"Pods all Running"},{"location":"Upgrade/health-verification/#status-output","text":"The Rook toolbox contains the Ceph tools that gives status details of the cluster with the ceph status command. Below is an output sample: 1 2 TOOLS_POD=$(kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[*].metadata.name}') kubectl -n $ROOK_CLUSTER_NAMESPACE exec -it $TOOLS_POD -- ceph status The output should look similar to the following: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 cluster: id: a3f4d647-9538-4aff-9fd1-b845873c3fe9 health: HEALTH_OK services: mon: 3 daemons, quorum b,c,a mgr: a(active) mds: myfs-1/1/1 up {0=myfs-a=up:active}, 1 up:standby-replay osd: 6 osds: 6 up, 6 in rgw: 1 daemon active data: pools: 9 pools, 900 pgs objects: 67 objects, 11 KiB usage: 6.1 GiB used, 54 GiB / 60 GiB avail pgs: 900 active+clean io: client: 7.4 KiB/s rd, 681 B/s wr, 11 op/s rd, 4 op/s wr recovery: 164 B/s, 1 objects/s In the output above, note the following indications that the cluster is in a healthy state: Cluster health: The overall cluster status is HEALTH_OK and there are no warning or error status messages displayed. Monitors (mon): All of the monitors are included in the quorum list. Manager (mgr): The Ceph manager is in the active state. OSDs (osd): All OSDs are up and in . Placement groups (pgs): All PGs are in the active+clean state. (If applicable) Ceph filesystem metadata server (mds): all MDSes are active for all filesystems (If applicable) Ceph object store RADOS gateways (rgw): all daemons are active If the ceph status output has deviations from the general good health described above, there may be an issue that needs to be investigated further. Other commands may show more relevant details on the health of the system, such as ceph osd status . See the Ceph troubleshooting docs for help.","title":"Status Output"},{"location":"Upgrade/health-verification/#upgrading-an-unhealthy-cluster","text":"Rook will not upgrade Ceph daemons if the health is in a HEALTH_ERR state. Rook can be configured to proceed with the (potentially unsafe) upgrade by setting either skipUpgradeChecks: true or continueUpgradeAfterChecksEvenIfNotHealthy: true as described in the cluster CR settings .","title":"Upgrading an unhealthy cluster"},{"location":"Upgrade/health-verification/#container-versions","text":"The container version running in a specific pod in the Rook cluster can be verified in its pod spec output. For example, for the monitor pod mon-b , verify the container version it is running with the below commands: 1 2 POD_NAME=$(kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -o custom-columns=name:.metadata.name --no-headers | grep rook-ceph-mon-b) kubectl -n $ROOK_CLUSTER_NAMESPACE get pod ${POD_NAME} -o jsonpath='{.spec.containers[0].image}' The status and container versions for all Rook pods can be collected all at once with the following commands: 1 2 kubectl -n $ROOK_OPERATOR_NAMESPACE get pod -o jsonpath='{range .items[*]}{.metadata.name}{\"\\n\\t\"}{.status.phase}{\"\\t\\t\"}{.spec.containers[0].image}{\"\\t\"}{.spec.initContainers[0]}{\"\\n\"}{end}' && \\ kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -o jsonpath='{range .items[*]}{.metadata.name}{\"\\n\\t\"}{.status.phase}{\"\\t\\t\"}{.spec.containers[0].image}{\"\\t\"}{.spec.initContainers[0].image}{\"\\n\"}{end}' The rook-version label exists on Ceph resources. For various resource controllers, a summary of the resource controllers can be gained with the commands below. These will report the requested, updated, and currently available replicas for various Rook resources in addition to the version of Rook for resources managed by Rook. Note that the operator and toolbox deployments do not have a rook-version label set. 1 2 3 kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' kubectl -n $ROOK_CLUSTER_NAMESPACE get jobs -o jsonpath='{range .items[*]}{.metadata.name}{\" \\tsucceeded: \"}{.status.succeeded}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}'","title":"Container Versions"},{"location":"Upgrade/health-verification/#rook-volume-health","text":"Any pod that is using a Rook volume should also remain healthy: The pod should be in the Running state with few, if any, restarts There should be no errors in its logs The pod should still be able to read and write to the attached Rook volume.","title":"Rook Volume Health"},{"location":"Upgrade/rook-upgrade/","text":"This guide will walk through the steps to upgrade the software in a Rook cluster from one version to the next. This guide focuses on updating the Rook version for the management layer, while the Ceph upgrade guide focuses on updating the data layer. Upgrades for both the operator and for Ceph are entirely automated except where Rook's permissions need to be explicitly updated by an admin or when incompatibilities need to be addressed manually due to customizations. We welcome feedback and opening issues! Supported Versions \u00b6 This guide is for upgrading from Rook v1.11.x to Rook v1.12.x . Please refer to the upgrade guides from previous releases for supported upgrade paths. Rook upgrades are only supported between official releases. For a guide to upgrade previous versions of Rook, please refer to the version of documentation for those releases. Upgrade 1.10 to 1.11 Upgrade 1.9 to 1.10 Upgrade 1.8 to 1.9 Upgrade 1.7 to 1.8 Upgrade 1.6 to 1.7 Upgrade 1.5 to 1.6 Upgrade 1.4 to 1.5 Upgrade 1.3 to 1.4 Upgrade 1.2 to 1.3 Upgrade 1.1 to 1.2 Upgrade 1.0 to 1.1 Upgrade 0.9 to 1.0 Upgrade 0.8 to 0.9 Upgrade 0.7 to 0.8 Upgrade 0.6 to 0.7 Upgrade 0.5 to 0.6 Important Rook releases from master are expressly unsupported. It is strongly recommended to use official releases of Rook. Unreleased versions from the master branch are subject to changes and incompatibilities that will not be supported in the official releases. Builds from the master branch can have functionality changed or removed at any time without compatibility support and without prior notice. Breaking changes in v1.12 \u00b6 The minimum supported version of Kubernetes is v1.22. CephCSI CephFS driver introduced a breaking change in v3.9.0. If any existing CephFS storageclass in the cluster has MountOptions parameter set, follow the steps mentioned in the CephCSI upgrade guide to ensure a smooth upgrade. Considerations \u00b6 With this upgrade guide, there are a few notes to consider: WARNING : Upgrading a Rook cluster is not without risk. There may be unexpected issues or obstacles that damage the integrity and health the storage cluster, including data loss. The Rook cluster's storage may be unavailable for short periods during the upgrade process for both Rook operator updates and for Ceph version updates. Read this document in full before undertaking a Rook cluster upgrade. Patch Release Upgrades \u00b6 Unless otherwise noted due to extenuating requirements, upgrades from one patch release of Rook to another are as simple as updating the common resources and the image of the Rook operator. For example, when Rook v1.12.1 is released, the process of updating from v1.12.0 is as simple as running the following: 1 2 git clone --single-branch --depth=1 --branch v1.12.1 https://github.com/rook/rook.git cd rook/deploy/examples If the Rook Operator or CephCluster are deployed into a different namespace than rook-ceph , see the Update common resources and CRDs section for instructions on how to change the default namespaces in common.yaml . Then, apply the latest changes from v1.12, and update the Rook Operator image. 1 2 kubectl apply -f common.yaml -f crds.yaml kubectl -n rook-ceph set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:v1.12.1 As exemplified above, it is a good practice to update Rook common resources from the example manifests before any update. The common resources and CRDs might not be updated with every release, but Kubernetes will only apply updates to the ones that changed. Also update optional resources like Prometheus monitoring noted more fully in the upgrade section below . Helm \u00b6 If Rook is installed via the Helm chart, Helm will handle some details of the upgrade itself. The upgrade steps in this guide will clarify what Helm handles automatically. The rook-ceph helm chart upgrade performs the Rook upgrade. The rook-ceph-cluster helm chart upgrade performs a Ceph upgrade if the Ceph image is updated. Note Be sure to update to a supported Helm version Cluster Health \u00b6 In order to successfully upgrade a Rook cluster, the following prerequisites must be met: The cluster should be in a healthy state with full functionality. Review the health verification guide in order to verify a CephCluster is in a good starting state. All pods consuming Rook storage should be created, running, and in a steady state. Rook Operator Upgrade \u00b6 The examples given in this guide upgrade a live Rook cluster running v1.11.7 to the version v1.12.0 . This upgrade should work from any official patch release of Rook v1.11 to any official patch release of v1.12. Let's get started! Environment \u00b6 These instructions will work for as long the environment is parameterized correctly. Set the following environment variables, which will be used throughout this document. 1 2 3 # Parameterize the environment export ROOK_OPERATOR_NAMESPACE=rook-ceph export ROOK_CLUSTER_NAMESPACE=rook-ceph 1. Update common resources and CRDs \u00b6 Hint Common resources and CRDs are automatically updated when using Helm charts. First, apply updates to Rook common resources. This includes modified privileges (RBAC) needed by the Operator. Also update the Custom Resource Definitions (CRDs). Get the latest common resources manifests that contain the latest changes. 1 2 git clone --single-branch --depth=1 --branch master https://github.com/rook/rook.git cd rook/deploy/examples If the Rook Operator or CephCluster are deployed into a different namespace than rook-ceph , update the common resource manifests to use your ROOK_OPERATOR_NAMESPACE and ROOK_CLUSTER_NAMESPACE using sed . 1 2 3 4 sed -i.bak \\ -e \"s/\\(.*\\):.*# namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE # namespace:operator/g\" \\ -e \"s/\\(.*\\):.*# namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE # namespace:cluster/g\" \\ common.yaml Apply the resources. 1 kubectl apply -f common.yaml -f crds.yaml Prometheus Updates \u00b6 If Prometheus monitoring is enabled, follow this step to upgrade the Prometheus RBAC resources as well. 1 kubectl apply -f deploy/examples/monitoring/rbac.yaml 2. Update the Rook Operator \u00b6 Hint The operator is automatically updated when using Helm charts. The largest portion of the upgrade is triggered when the operator's image is updated to v1.12.x . When the operator is updated, it will proceed to update all of the Ceph daemons. 1 kubectl -n $ROOK_OPERATOR_NAMESPACE set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:master 3. Update Ceph CSI \u00b6 Hint This is automatically updated if custom CSI image versions are not set. Important The minimum supported version of Ceph-CSI is v3.8.0. Update to the latest Ceph-CSI drivers if custom CSI images are specified. See the CSI Custom Images documentation. Note If using snapshots, refer to the Upgrade Snapshot API guide . 4. Wait for the upgrade to complete \u00b6 Watch now in amazement as the Ceph mons, mgrs, OSDs, rbd-mirrors, MDSes and RGWs are terminated and replaced with updated versions in sequence. The cluster may be unresponsive very briefly as mons update, and the Ceph Filesystem may fall offline a few times while the MDSes are upgrading. This is normal. The versions of the components can be viewed as they are updated: 1 watch --exec kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' As an example, this cluster is midway through updating the OSDs. When all deployments report 1/1/1 availability and rook-version=v1.12.0 , the Ceph cluster's core components are fully updated. 1 2 3 4 5 6 7 8 9 Every 2.0s: kubectl -n rook-ceph get deployment -o j... rook-ceph-mgr-a req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-a req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-b req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-c req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-osd-0 req/upd/avl: 1// rook-version=v1.12.0 rook-ceph-osd-1 req/upd/avl: 1/1/1 rook-version=v1.11.7 rook-ceph-osd-2 req/upd/avl: 1/1/1 rook-version=v1.11.7 An easy check to see if the upgrade is totally finished is to check that there is only one rook-version reported across the cluster. 1 2 3 4 5 6 # kubectl -n $ROOK_CLUSTER_NAMESPACE get deployment -l rook_cluster = $ROOK_CLUSTER_NAMESPACE -o jsonpath = '{range .items[*]}{\"rook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' | sort | uniq This cluster is not yet finished: rook-version=v1.11.7 rook-version=v1.12.0 This cluster is finished: rook-version=v1.12.0 5. Verify the updated cluster \u00b6 At this point, the Rook operator should be running version rook/ceph:v1.12.0 . Verify the CephCluster health using the health verification doc .","title":"Rook Upgrades"},{"location":"Upgrade/rook-upgrade/#supported-versions","text":"This guide is for upgrading from Rook v1.11.x to Rook v1.12.x . Please refer to the upgrade guides from previous releases for supported upgrade paths. Rook upgrades are only supported between official releases. For a guide to upgrade previous versions of Rook, please refer to the version of documentation for those releases. Upgrade 1.10 to 1.11 Upgrade 1.9 to 1.10 Upgrade 1.8 to 1.9 Upgrade 1.7 to 1.8 Upgrade 1.6 to 1.7 Upgrade 1.5 to 1.6 Upgrade 1.4 to 1.5 Upgrade 1.3 to 1.4 Upgrade 1.2 to 1.3 Upgrade 1.1 to 1.2 Upgrade 1.0 to 1.1 Upgrade 0.9 to 1.0 Upgrade 0.8 to 0.9 Upgrade 0.7 to 0.8 Upgrade 0.6 to 0.7 Upgrade 0.5 to 0.6 Important Rook releases from master are expressly unsupported. It is strongly recommended to use official releases of Rook. Unreleased versions from the master branch are subject to changes and incompatibilities that will not be supported in the official releases. Builds from the master branch can have functionality changed or removed at any time without compatibility support and without prior notice.","title":"Supported Versions"},{"location":"Upgrade/rook-upgrade/#breaking-changes-in-v112","text":"The minimum supported version of Kubernetes is v1.22. CephCSI CephFS driver introduced a breaking change in v3.9.0. If any existing CephFS storageclass in the cluster has MountOptions parameter set, follow the steps mentioned in the CephCSI upgrade guide to ensure a smooth upgrade.","title":"Breaking changes in v1.12"},{"location":"Upgrade/rook-upgrade/#considerations","text":"With this upgrade guide, there are a few notes to consider: WARNING : Upgrading a Rook cluster is not without risk. There may be unexpected issues or obstacles that damage the integrity and health the storage cluster, including data loss. The Rook cluster's storage may be unavailable for short periods during the upgrade process for both Rook operator updates and for Ceph version updates. Read this document in full before undertaking a Rook cluster upgrade.","title":"Considerations"},{"location":"Upgrade/rook-upgrade/#patch-release-upgrades","text":"Unless otherwise noted due to extenuating requirements, upgrades from one patch release of Rook to another are as simple as updating the common resources and the image of the Rook operator. For example, when Rook v1.12.1 is released, the process of updating from v1.12.0 is as simple as running the following: 1 2 git clone --single-branch --depth=1 --branch v1.12.1 https://github.com/rook/rook.git cd rook/deploy/examples If the Rook Operator or CephCluster are deployed into a different namespace than rook-ceph , see the Update common resources and CRDs section for instructions on how to change the default namespaces in common.yaml . Then, apply the latest changes from v1.12, and update the Rook Operator image. 1 2 kubectl apply -f common.yaml -f crds.yaml kubectl -n rook-ceph set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:v1.12.1 As exemplified above, it is a good practice to update Rook common resources from the example manifests before any update. The common resources and CRDs might not be updated with every release, but Kubernetes will only apply updates to the ones that changed. Also update optional resources like Prometheus monitoring noted more fully in the upgrade section below .","title":"Patch Release Upgrades"},{"location":"Upgrade/rook-upgrade/#helm","text":"If Rook is installed via the Helm chart, Helm will handle some details of the upgrade itself. The upgrade steps in this guide will clarify what Helm handles automatically. The rook-ceph helm chart upgrade performs the Rook upgrade. The rook-ceph-cluster helm chart upgrade performs a Ceph upgrade if the Ceph image is updated. Note Be sure to update to a supported Helm version","title":"Helm"},{"location":"Upgrade/rook-upgrade/#cluster-health","text":"In order to successfully upgrade a Rook cluster, the following prerequisites must be met: The cluster should be in a healthy state with full functionality. Review the health verification guide in order to verify a CephCluster is in a good starting state. All pods consuming Rook storage should be created, running, and in a steady state.","title":"Cluster Health"},{"location":"Upgrade/rook-upgrade/#rook-operator-upgrade","text":"The examples given in this guide upgrade a live Rook cluster running v1.11.7 to the version v1.12.0 . This upgrade should work from any official patch release of Rook v1.11 to any official patch release of v1.12. Let's get started!","title":"Rook Operator Upgrade"},{"location":"Upgrade/rook-upgrade/#environment","text":"These instructions will work for as long the environment is parameterized correctly. Set the following environment variables, which will be used throughout this document. 1 2 3 # Parameterize the environment export ROOK_OPERATOR_NAMESPACE=rook-ceph export ROOK_CLUSTER_NAMESPACE=rook-ceph","title":"Environment"},{"location":"Upgrade/rook-upgrade/#1-update-common-resources-and-crds","text":"Hint Common resources and CRDs are automatically updated when using Helm charts. First, apply updates to Rook common resources. This includes modified privileges (RBAC) needed by the Operator. Also update the Custom Resource Definitions (CRDs). Get the latest common resources manifests that contain the latest changes. 1 2 git clone --single-branch --depth=1 --branch master https://github.com/rook/rook.git cd rook/deploy/examples If the Rook Operator or CephCluster are deployed into a different namespace than rook-ceph , update the common resource manifests to use your ROOK_OPERATOR_NAMESPACE and ROOK_CLUSTER_NAMESPACE using sed . 1 2 3 4 sed -i.bak \\ -e \"s/\\(.*\\):.*# namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE # namespace:operator/g\" \\ -e \"s/\\(.*\\):.*# namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE # namespace:cluster/g\" \\ common.yaml Apply the resources. 1 kubectl apply -f common.yaml -f crds.yaml","title":"1. Update common resources and CRDs"},{"location":"Upgrade/rook-upgrade/#prometheus-updates","text":"If Prometheus monitoring is enabled, follow this step to upgrade the Prometheus RBAC resources as well. 1 kubectl apply -f deploy/examples/monitoring/rbac.yaml","title":"Prometheus Updates"},{"location":"Upgrade/rook-upgrade/#2-update-the-rook-operator","text":"Hint The operator is automatically updated when using Helm charts. The largest portion of the upgrade is triggered when the operator's image is updated to v1.12.x . When the operator is updated, it will proceed to update all of the Ceph daemons. 1 kubectl -n $ROOK_OPERATOR_NAMESPACE set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:master","title":"2. Update the Rook Operator"},{"location":"Upgrade/rook-upgrade/#3-update-ceph-csi","text":"Hint This is automatically updated if custom CSI image versions are not set. Important The minimum supported version of Ceph-CSI is v3.8.0. Update to the latest Ceph-CSI drivers if custom CSI images are specified. See the CSI Custom Images documentation. Note If using snapshots, refer to the Upgrade Snapshot API guide .","title":"3. Update Ceph CSI"},{"location":"Upgrade/rook-upgrade/#4-wait-for-the-upgrade-to-complete","text":"Watch now in amazement as the Ceph mons, mgrs, OSDs, rbd-mirrors, MDSes and RGWs are terminated and replaced with updated versions in sequence. The cluster may be unresponsive very briefly as mons update, and the Ceph Filesystem may fall offline a few times while the MDSes are upgrading. This is normal. The versions of the components can be viewed as they are updated: 1 watch --exec kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' As an example, this cluster is midway through updating the OSDs. When all deployments report 1/1/1 availability and rook-version=v1.12.0 , the Ceph cluster's core components are fully updated. 1 2 3 4 5 6 7 8 9 Every 2.0s: kubectl -n rook-ceph get deployment -o j... rook-ceph-mgr-a req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-a req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-b req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-c req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-osd-0 req/upd/avl: 1// rook-version=v1.12.0 rook-ceph-osd-1 req/upd/avl: 1/1/1 rook-version=v1.11.7 rook-ceph-osd-2 req/upd/avl: 1/1/1 rook-version=v1.11.7 An easy check to see if the upgrade is totally finished is to check that there is only one rook-version reported across the cluster. 1 2 3 4 5 6 # kubectl -n $ROOK_CLUSTER_NAMESPACE get deployment -l rook_cluster = $ROOK_CLUSTER_NAMESPACE -o jsonpath = '{range .items[*]}{\"rook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' | sort | uniq This cluster is not yet finished: rook-version=v1.11.7 rook-version=v1.12.0 This cluster is finished: rook-version=v1.12.0","title":"4. Wait for the upgrade to complete"},{"location":"Upgrade/rook-upgrade/#5-verify-the-updated-cluster","text":"At this point, the Rook operator should be running version rook/ceph:v1.12.0 . Verify the CephCluster health using the health verification doc .","title":"5. Verify the updated cluster"}]}
\ No newline at end of file
+{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"CRDs/ceph-client-crd/","text":"Rook allows creation and updating clients through the custom resource definitions (CRDs). For more information about user management and capabilities see the Ceph docs . Use Case: Connecting to Ceph \u00b6 Use Client CRD in case you want to integrate Rook with applications that are using LibRBD directly. For example for OpenStack deployment with Ceph backend use Client CRD to create OpenStack services users. The Client CRD is not needed for Flex or CSI driver users. The drivers create the needed users automatically. Prerequisites \u00b6 This guide assumes you have created a Rook cluster as explained in the main Quickstart guide . 1. Creating Ceph User \u00b6 To get you started, here is a simple example of a CRD to configure a Ceph client with capabilities. 1 2 3 4 5 6 7 8 9 10 --- apiVersion : ceph.rook.io/v1 kind : CephClient metadata : name : example namespace : rook-ceph spec : caps : mon : 'profile rbd, allow r' osd : 'profile rbd pool=volumes, profile rbd pool=vms, profile rbd-read-only pool=images' To use CephClient to connect to a Ceph cluster: 2. Find the generated secret for the CephClient \u00b6 Once your CephClient has been processed by Rook, it will be updated to include your secret: 1 kubectl -n rook-ceph get cephclient example -o jsonpath='{.status.info.secretName}' 3. Extract Ceph cluster credentials from the generated secret \u00b6 Extract Ceph cluster credentials from the generated secret (note that the subkey will be your original client name): 1 kubectl --namespace rook-ceph get secret rook-ceph-client-example -o jsonpath=\"{.data.example}\" | base64 -d The base64 encoded value that is returned is the password for your ceph client. 4. Retrieve the mon endpoints \u00b6 To send writes to the cluster, you must retrieve the mons in use: 1 kubectl --namespace rook-ceph get configmap rook-ceph-mon-endpoints -o jsonpath='{.data.data}' | sed 's/.=//g'` This command should produce a line that looks somewhat like this: 1 10.107.72.122:6789,10.103.244.218:6789,10.99.33.227:6789 5. (optional) Generate Ceph configuration files \u00b6 If you choose to generate files for Ceph to use you will need to generate the following files: General configuration file (ex. ceph.conf ) Keyring file (ex. ceph.keyring ) Examples of the files follow: ceph.conf 1 2 3 [global] mon_host = 10.107.72.122:6789,10.103.244.218:6789,10.99.33.227:6789 log file = /tmp/ceph-$pid.log ceph.keyring 1 2 3 4 5 6 [client.example] key = < key, decoded from k8s secret> # The caps below are for a rbd workload -- you may need to edit/modify these capabilities for other workloads # see https://docs.ceph.com/en/latest/cephfs/capabilities caps mon = 'allow r' caps osd = 'profile rbd pool=, profile rb pool=' 6. Connect to the Ceph cluster with your given client ID \u00b6 With the files we've created, you should be able to query the cluster by setting Ceph ENV variables and running ceph status : 1 2 3 4 export CEPH_CONF=/libsqliteceph/ceph.conf; export CEPH_KEYRING=/libsqliteceph/ceph.keyring; export CEPH_ARGS=--id example; ceph status With this config, the ceph tools ( ceph CLI, in-program access, etc) can connect to and utilize the Ceph cluster. Use Case: SQLite \u00b6 The Ceph project contains a SQLite VFS that interacts with RBD directly, called libcephsqlite . First, on your workload ensure that you have the appropriate packages installed that make libcephsqlite.so available: ceph on Alpine libsqlite3-mod-ceph on Ubuntu libcephsqlite on Fedora ceph on CentOS Without the appropriate package (or a from-scratch build of SQLite), you will be unable to load libcephsqlite.so . After creating a CephClient similar to deploy/examples/sqlitevfs-client.yaml and retrieving it's credentials, you may set the following ENV variables: 1 2 3 export CEPH_CONF=/libsqliteceph/ceph.conf; export CEPH_KEYRING=/libsqliteceph/ceph.keyring; export CEPH_ARGS=--id sqlitevfs Then start your SQLite database: 1 2 3 sqlite> .load libcephsqlite.so sqlite> .open file:///poolname:/test.db?vfs=ceph sqlite> If those lines complete without error, you have successfully set up SQLite to access Ceph. See the libcephsqlite documentation for more information on the file URL format.","title":"CephClient CRD"},{"location":"CRDs/ceph-client-crd/#use-case-connecting-to-ceph","text":"Use Client CRD in case you want to integrate Rook with applications that are using LibRBD directly. For example for OpenStack deployment with Ceph backend use Client CRD to create OpenStack services users. The Client CRD is not needed for Flex or CSI driver users. The drivers create the needed users automatically.","title":"Use Case: Connecting to Ceph"},{"location":"CRDs/ceph-client-crd/#prerequisites","text":"This guide assumes you have created a Rook cluster as explained in the main Quickstart guide .","title":"Prerequisites"},{"location":"CRDs/ceph-client-crd/#1-creating-ceph-user","text":"To get you started, here is a simple example of a CRD to configure a Ceph client with capabilities. 1 2 3 4 5 6 7 8 9 10 --- apiVersion : ceph.rook.io/v1 kind : CephClient metadata : name : example namespace : rook-ceph spec : caps : mon : 'profile rbd, allow r' osd : 'profile rbd pool=volumes, profile rbd pool=vms, profile rbd-read-only pool=images' To use CephClient to connect to a Ceph cluster:","title":"1. Creating Ceph User"},{"location":"CRDs/ceph-client-crd/#2-find-the-generated-secret-for-the-cephclient","text":"Once your CephClient has been processed by Rook, it will be updated to include your secret: 1 kubectl -n rook-ceph get cephclient example -o jsonpath='{.status.info.secretName}'","title":"2. Find the generated secret for the CephClient"},{"location":"CRDs/ceph-client-crd/#3-extract-ceph-cluster-credentials-from-the-generated-secret","text":"Extract Ceph cluster credentials from the generated secret (note that the subkey will be your original client name): 1 kubectl --namespace rook-ceph get secret rook-ceph-client-example -o jsonpath=\"{.data.example}\" | base64 -d The base64 encoded value that is returned is the password for your ceph client.","title":"3. Extract Ceph cluster credentials from the generated secret"},{"location":"CRDs/ceph-client-crd/#4-retrieve-the-mon-endpoints","text":"To send writes to the cluster, you must retrieve the mons in use: 1 kubectl --namespace rook-ceph get configmap rook-ceph-mon-endpoints -o jsonpath='{.data.data}' | sed 's/.=//g'` This command should produce a line that looks somewhat like this: 1 10.107.72.122:6789,10.103.244.218:6789,10.99.33.227:6789","title":"4. Retrieve the mon endpoints"},{"location":"CRDs/ceph-client-crd/#5-optional-generate-ceph-configuration-files","text":"If you choose to generate files for Ceph to use you will need to generate the following files: General configuration file (ex. ceph.conf ) Keyring file (ex. ceph.keyring ) Examples of the files follow: ceph.conf 1 2 3 [global] mon_host = 10.107.72.122:6789,10.103.244.218:6789,10.99.33.227:6789 log file = /tmp/ceph-$pid.log ceph.keyring 1 2 3 4 5 6 [client.example] key = < key, decoded from k8s secret> # The caps below are for a rbd workload -- you may need to edit/modify these capabilities for other workloads # see https://docs.ceph.com/en/latest/cephfs/capabilities caps mon = 'allow r' caps osd = 'profile rbd pool=, profile rb pool='","title":"5. (optional) Generate Ceph configuration files"},{"location":"CRDs/ceph-client-crd/#6-connect-to-the-ceph-cluster-with-your-given-client-id","text":"With the files we've created, you should be able to query the cluster by setting Ceph ENV variables and running ceph status : 1 2 3 4 export CEPH_CONF=/libsqliteceph/ceph.conf; export CEPH_KEYRING=/libsqliteceph/ceph.keyring; export CEPH_ARGS=--id example; ceph status With this config, the ceph tools ( ceph CLI, in-program access, etc) can connect to and utilize the Ceph cluster.","title":"6. Connect to the Ceph cluster with your given client ID"},{"location":"CRDs/ceph-client-crd/#use-case-sqlite","text":"The Ceph project contains a SQLite VFS that interacts with RBD directly, called libcephsqlite . First, on your workload ensure that you have the appropriate packages installed that make libcephsqlite.so available: ceph on Alpine libsqlite3-mod-ceph on Ubuntu libcephsqlite on Fedora ceph on CentOS Without the appropriate package (or a from-scratch build of SQLite), you will be unable to load libcephsqlite.so . After creating a CephClient similar to deploy/examples/sqlitevfs-client.yaml and retrieving it's credentials, you may set the following ENV variables: 1 2 3 export CEPH_CONF=/libsqliteceph/ceph.conf; export CEPH_KEYRING=/libsqliteceph/ceph.keyring; export CEPH_ARGS=--id sqlitevfs Then start your SQLite database: 1 2 3 sqlite> .load libcephsqlite.so sqlite> .open file:///poolname:/test.db?vfs=ceph sqlite> If those lines complete without error, you have successfully set up SQLite to access Ceph. See the libcephsqlite documentation for more information on the file URL format.","title":"Use Case: SQLite"},{"location":"CRDs/ceph-nfs-crd/","text":"Rook allows exporting NFS shares of a CephFilesystem or CephObjectStore through the CephNFS custom resource definition. Example \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 apiVersion : ceph.rook.io/v1 kind : CephNFS metadata : name : my-nfs namespace : rook-ceph spec : # Settings for the NFS server server : active : 1 placement : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - nfs-node topologySpreadConstraints : tolerations : - key : nfs-node operator : Exists podAffinity : podAntiAffinity : annotations : my-annotation : something labels : my-label : something resources : limits : cpu : \"3\" memory : \"8Gi\" requests : cpu : \"3\" memory : \"8Gi\" priorityClassName : \"\" logLevel : NIV_INFO security : kerberos : principalName : \"nfs\" domainName : \"DOMAIN1.EXAMPLE.COM\" configFiles : volumeSource : configMap : name : my-krb5-config-files keytabFile : volumeSource : secret : secretName : my-nfs-keytab defaultMode : 0600 # mode must be 0600 sssd : sidecar : image : registry.access.redhat.com/rhel7/sssd:latest sssdConfigFile : volumeSource : configMap : name : my-nfs-sssd-config defaultMode : 0600 # mode must be 0600 debugLevel : 0 resources : {} NFS Settings \u00b6 Server \u00b6 The server spec sets configuration for Rook-created NFS-Ganesha server pods. active : The number of active NFS servers. Rook supports creating more than one active NFS server, but cannot guarantee high availability. For values greater than 1, see the known issue below. placement : Kubernetes placement restrictions to apply to NFS server Pod(s). This is similar to placement defined for daemons configured by the CephCluster CRD . annotations : Kubernetes annotations to apply to NFS server Pod(s) labels : Kubernetes labels to apply to NFS server Pod(s) resources : Kubernetes resource requests and limits to set on NFS server containers priorityClassName : Set priority class name for the NFS server Pod(s) logLevel : The log level that NFS-Ganesha servers should output. Default value: NIV_INFO Supported values: NIV_NULL | NIV_FATAL | NIV_MAJ | NIV_CRIT | NIV_WARN | NIV_EVENT | NIV_INFO | NIV_DEBUG | NIV_MID_DEBUG | NIV_FULL_DEBUG | NB_LOG_LEVEL hostNetwork : Whether host networking is enabled for the NFS server pod(s). If not set, the network settings from the CephCluster CR will be applied. Security \u00b6 The security spec sets security configuration for the NFS cluster. kerberos : Kerberos configures NFS-Ganesha to secure NFS client connections with Kerberos. principalName : this value is combined with (a) the namespace and name of the CephNFS (with a hyphen between) and (b) the Realm configured in the user-provided kerberos config file(s) to determine the full service principal name: /-@ . e.g., nfs/rook-ceph-my-nfs@example.net. For full details, see the NFS security doc . domainName : this is the domain name used in the kerberos credentials. This is used to configure idmap to map the kerberos credentials to uid/gid. Without this configured, NFS-Ganesha will use the anonuid/anongid configured (default: -2) when accessing the local filesystem. eg., DOMAIN1.EXAMPLE.COM. NFS security doc . configFiles : defines where the Kerberos configuration should be sourced from. Config files will be placed into the /etc/krb5.conf.rook/ directory. For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource for Kerberos configuration files like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. The volume may contain multiple files, all of which will be loaded. keytabFile : defines where the Kerberos keytab should be sourced from. The keytab file will be placed into /etc/krb5.keytab . For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource for the Kerberos keytab file like what is normally used to configure Volumes for a Pod. For example, a Secret or HostPath. There are two requirements for the source's content: The config file must be mountable via subPath: krb5.keytab . For example, in a Secret, the data item must be named krb5.keytab , or items must be defined to select the key and give it path krb5.keytab . A HostPath directory must have the krb5.keytab file. The volume or config file must have mode 0600. sssd : SSSD enables integration with System Security Services Daemon (SSSD). See also: ID mapping via SSSD . sidecar : Specifying this configuration tells Rook to run SSSD in a sidecar alongside the NFS server in each NFS pod. image : defines the container image that should be used for the SSSD sidecar. sssdConfigFile : defines where the SSSD configuration should be sourced from. The config file will be placed into /etc/sssd/sssd.conf . For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. There are two requirements for the source's content: The config file must be mountable via subPath: sssd.conf . For example, in a ConfigMap, the data item must be named sssd.conf , or items must be defined to select the key and give it path sssd.conf . A HostPath directory must have the sssd.conf file. The volume or config file must have mode 0600. additionalFiles : adds any number of additional files into the SSSD sidecar. All files will be placed into /etc/sssd/rook-additional/ and can be referenced by the SSSD config file. For example, CA and/or TLS certificates to authenticate with Kerberos. subPath : the sub-path of /etc/sssd/rook-additional to add files into. This can include / to create arbitrarily deep sub-paths if desired. If the volumeSource is a file, this will refer to a file name. volumeSource : this is a standard Kubernetes VolumeSource for additional files like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. The volume may contain multiple files, a single file, or may be a file on its own (e.g., a host path with type: File ). debugLevel : sets the debug level for SSSD. If unset or 0 , Rook does nothing. Otherwise, this may be a value between 1 and 10. See the SSSD docs for more info. resources : Kubernetes resource requests and limits to set on NFS server containers Scaling the active server count \u00b6 It is possible to scale the size of the cluster up or down by modifying the spec.server.active field. Scaling the cluster size up can be done at will. Once the new server comes up, clients can be assigned to it immediately. The CRD always eliminates the highest index servers first, in reverse order from how they were started. Scaling down the cluster requires that clients be migrated from servers that will be eliminated to others. That process is currently a manual one and should be performed before reducing the size of the cluster. Warning See the known issue below about setting this value greater than one. Known issues \u00b6 server.active count greater than 1 \u00b6 Active-active scale out does not work well with the NFS protocol. If one NFS server in a cluster is offline, other servers may block client requests until the offline server returns, which may not always happen due to the Kubernetes scheduler. Workaround: It is safest to run only a single NFS server, but we do not limit this if it benefits your use case. Ceph v17.2.1 \u00b6 Ceph NFS management with the Rook mgr module enabled has a breaking regression with the Ceph Quincy v17.2.1 release. Workaround: Leave Ceph's Rook orchestrator mgr module disabled. If you have enabled it, you must disable it using the snippet below from the toolbox. 1 2 ceph orch set backend \"\" ceph mgr module disable rook","title":"CephNFS CRD"},{"location":"CRDs/ceph-nfs-crd/#example","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 apiVersion : ceph.rook.io/v1 kind : CephNFS metadata : name : my-nfs namespace : rook-ceph spec : # Settings for the NFS server server : active : 1 placement : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - nfs-node topologySpreadConstraints : tolerations : - key : nfs-node operator : Exists podAffinity : podAntiAffinity : annotations : my-annotation : something labels : my-label : something resources : limits : cpu : \"3\" memory : \"8Gi\" requests : cpu : \"3\" memory : \"8Gi\" priorityClassName : \"\" logLevel : NIV_INFO security : kerberos : principalName : \"nfs\" domainName : \"DOMAIN1.EXAMPLE.COM\" configFiles : volumeSource : configMap : name : my-krb5-config-files keytabFile : volumeSource : secret : secretName : my-nfs-keytab defaultMode : 0600 # mode must be 0600 sssd : sidecar : image : registry.access.redhat.com/rhel7/sssd:latest sssdConfigFile : volumeSource : configMap : name : my-nfs-sssd-config defaultMode : 0600 # mode must be 0600 debugLevel : 0 resources : {}","title":"Example"},{"location":"CRDs/ceph-nfs-crd/#nfs-settings","text":"","title":"NFS Settings"},{"location":"CRDs/ceph-nfs-crd/#server","text":"The server spec sets configuration for Rook-created NFS-Ganesha server pods. active : The number of active NFS servers. Rook supports creating more than one active NFS server, but cannot guarantee high availability. For values greater than 1, see the known issue below. placement : Kubernetes placement restrictions to apply to NFS server Pod(s). This is similar to placement defined for daemons configured by the CephCluster CRD . annotations : Kubernetes annotations to apply to NFS server Pod(s) labels : Kubernetes labels to apply to NFS server Pod(s) resources : Kubernetes resource requests and limits to set on NFS server containers priorityClassName : Set priority class name for the NFS server Pod(s) logLevel : The log level that NFS-Ganesha servers should output. Default value: NIV_INFO Supported values: NIV_NULL | NIV_FATAL | NIV_MAJ | NIV_CRIT | NIV_WARN | NIV_EVENT | NIV_INFO | NIV_DEBUG | NIV_MID_DEBUG | NIV_FULL_DEBUG | NB_LOG_LEVEL hostNetwork : Whether host networking is enabled for the NFS server pod(s). If not set, the network settings from the CephCluster CR will be applied.","title":"Server"},{"location":"CRDs/ceph-nfs-crd/#security","text":"The security spec sets security configuration for the NFS cluster. kerberos : Kerberos configures NFS-Ganesha to secure NFS client connections with Kerberos. principalName : this value is combined with (a) the namespace and name of the CephNFS (with a hyphen between) and (b) the Realm configured in the user-provided kerberos config file(s) to determine the full service principal name: /-@ . e.g., nfs/rook-ceph-my-nfs@example.net. For full details, see the NFS security doc . domainName : this is the domain name used in the kerberos credentials. This is used to configure idmap to map the kerberos credentials to uid/gid. Without this configured, NFS-Ganesha will use the anonuid/anongid configured (default: -2) when accessing the local filesystem. eg., DOMAIN1.EXAMPLE.COM. NFS security doc . configFiles : defines where the Kerberos configuration should be sourced from. Config files will be placed into the /etc/krb5.conf.rook/ directory. For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource for Kerberos configuration files like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. The volume may contain multiple files, all of which will be loaded. keytabFile : defines where the Kerberos keytab should be sourced from. The keytab file will be placed into /etc/krb5.keytab . For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource for the Kerberos keytab file like what is normally used to configure Volumes for a Pod. For example, a Secret or HostPath. There are two requirements for the source's content: The config file must be mountable via subPath: krb5.keytab . For example, in a Secret, the data item must be named krb5.keytab , or items must be defined to select the key and give it path krb5.keytab . A HostPath directory must have the krb5.keytab file. The volume or config file must have mode 0600. sssd : SSSD enables integration with System Security Services Daemon (SSSD). See also: ID mapping via SSSD . sidecar : Specifying this configuration tells Rook to run SSSD in a sidecar alongside the NFS server in each NFS pod. image : defines the container image that should be used for the SSSD sidecar. sssdConfigFile : defines where the SSSD configuration should be sourced from. The config file will be placed into /etc/sssd/sssd.conf . For advanced usage, see the NFS security doc . volumeSource : this is a standard Kubernetes VolumeSource like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. There are two requirements for the source's content: The config file must be mountable via subPath: sssd.conf . For example, in a ConfigMap, the data item must be named sssd.conf , or items must be defined to select the key and give it path sssd.conf . A HostPath directory must have the sssd.conf file. The volume or config file must have mode 0600. additionalFiles : adds any number of additional files into the SSSD sidecar. All files will be placed into /etc/sssd/rook-additional/ and can be referenced by the SSSD config file. For example, CA and/or TLS certificates to authenticate with Kerberos. subPath : the sub-path of /etc/sssd/rook-additional to add files into. This can include / to create arbitrarily deep sub-paths if desired. If the volumeSource is a file, this will refer to a file name. volumeSource : this is a standard Kubernetes VolumeSource for additional files like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. The volume may contain multiple files, a single file, or may be a file on its own (e.g., a host path with type: File ). debugLevel : sets the debug level for SSSD. If unset or 0 , Rook does nothing. Otherwise, this may be a value between 1 and 10. See the SSSD docs for more info. resources : Kubernetes resource requests and limits to set on NFS server containers","title":"Security"},{"location":"CRDs/ceph-nfs-crd/#scaling-the-active-server-count","text":"It is possible to scale the size of the cluster up or down by modifying the spec.server.active field. Scaling the cluster size up can be done at will. Once the new server comes up, clients can be assigned to it immediately. The CRD always eliminates the highest index servers first, in reverse order from how they were started. Scaling down the cluster requires that clients be migrated from servers that will be eliminated to others. That process is currently a manual one and should be performed before reducing the size of the cluster. Warning See the known issue below about setting this value greater than one.","title":"Scaling the active server count"},{"location":"CRDs/ceph-nfs-crd/#known-issues","text":"","title":"Known issues"},{"location":"CRDs/ceph-nfs-crd/#serveractive-count-greater-than-1","text":"Active-active scale out does not work well with the NFS protocol. If one NFS server in a cluster is offline, other servers may block client requests until the offline server returns, which may not always happen due to the Kubernetes scheduler. Workaround: It is safest to run only a single NFS server, but we do not limit this if it benefits your use case.","title":"server.active count greater than 1"},{"location":"CRDs/ceph-nfs-crd/#ceph-v1721","text":"Ceph NFS management with the Rook mgr module enabled has a breaking regression with the Ceph Quincy v17.2.1 release. Workaround: Leave Ceph's Rook orchestrator mgr module disabled. If you have enabled it, you must disable it using the snippet below from the toolbox. 1 2 ceph orch set backend \"\" ceph mgr module disable rook","title":"Ceph v17.2.1"},{"location":"CRDs/specification/","text":"Packages: ceph.rook.io/v1 ceph.rook.io/v1 Package v1 is the v1 version of the API. Resource Types: CephBlockPool CephBlockPoolRadosNamespace CephBucketNotification CephBucketTopic CephCOSIDriver CephClient CephCluster CephFilesystem CephFilesystemMirror CephFilesystemSubVolumeGroup CephNFS CephObjectRealm CephObjectStore CephObjectStoreUser CephObjectZone CephObjectZoneGroup CephRBDMirror CephBlockPool CephBlockPool represents a Ceph Storage Pool Field Description apiVersion string ceph.rook.io/v1 kind string CephBlockPool metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec NamedBlockPoolSpec name string (Optional) The desired name of the pool if different from the CephBlockPool CR name. PoolSpec PoolSpec (Members of PoolSpec are embedded into this type.) The core pool configuration status CephBlockPoolStatus CephBlockPoolRadosNamespace CephBlockPoolRadosNamespace represents a Ceph BlockPool Rados Namespace Field Description apiVersion string ceph.rook.io/v1 kind string CephBlockPoolRadosNamespace metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec CephBlockPoolRadosNamespaceSpec Spec represents the specification of a Ceph BlockPool Rados Namespace blockPoolName string BlockPoolName is the name of Ceph BlockPool. Typically it\u2019s the name of the CephBlockPool CR. status CephBlockPoolRadosNamespaceStatus (Optional) Status represents the status of a CephBlockPool Rados Namespace CephBucketNotification CephBucketNotification represents a Bucket Notifications Field Description apiVersion string ceph.rook.io/v1 kind string CephBucketNotification metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec BucketNotificationSpec topic string The name of the topic associated with this notification events []BucketNotificationEvent (Optional) List of events that should trigger the notification filter NotificationFilterSpec (Optional) Spec of notification filter status Status (Optional) CephBucketTopic CephBucketTopic represents a Ceph Object Topic for Bucket Notifications Field Description apiVersion string ceph.rook.io/v1 kind string CephBucketTopic metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec BucketTopicSpec objectStoreName string The name of the object store on which to define the topic objectStoreNamespace string The namespace of the object store on which to define the topic opaqueData string (Optional) Data which is sent in each event persistent bool (Optional) Indication whether notifications to this endpoint are persistent or not endpoint TopicEndpointSpec Contains the endpoint spec of the topic status BucketTopicStatus (Optional) CephCOSIDriver CephCOSIDriver represents the CRD for the Ceph COSI Driver Deployment Field Description apiVersion string ceph.rook.io/v1 kind string CephCOSIDriver metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec CephCOSIDriverSpec Spec represents the specification of a Ceph COSI Driver image string (Optional) Image is the container image to run the Ceph COSI driver objectProvisionerImage string (Optional) ObjectProvisionerImage is the container image to run the COSI driver sidecar deploymentStrategy COSIDeploymentStrategy (Optional) DeploymentStrategy is the strategy to use to deploy the COSI driver. placement Placement (Optional) Placement is the placement strategy to use for the COSI driver resources Kubernetes core/v1.ResourceRequirements (Optional) Resources is the resource requirements for the COSI driver CephClient CephClient represents a Ceph Client Field Description apiVersion string ceph.rook.io/v1 kind string CephClient metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ClientSpec Spec represents the specification of a Ceph Client name string (Optional) caps map[string]string status CephClientStatus (Optional) Status represents the status of a Ceph Client CephCluster CephCluster is a Ceph storage cluster Field Description apiVersion string ceph.rook.io/v1 kind string CephCluster metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ClusterSpec cephVersion CephVersionSpec (Optional) The version information that instructs Rook to orchestrate a particular version of Ceph. storage StorageScopeSpec (Optional) A spec for available storage in the cluster and how it should be used annotations AnnotationsSpec (Optional) The annotations-related configuration to add/set on each Pod related object. labels LabelsSpec (Optional) The labels-related configuration to add/set on each Pod related object. placement PlacementSpec (Optional) The placement-related configuration to pass to kubernetes (affinity, node selector, tolerations). network NetworkSpec (Optional) Network related configuration resources ResourceSpec (Optional) Resources set resource requests and limits priorityClassNames PriorityClassNamesSpec (Optional) PriorityClassNames sets priority classes on components dataDirHostPath string (Optional) The path on the host where config and data can be persisted skipUpgradeChecks bool (Optional) SkipUpgradeChecks defines if an upgrade should be forced even if one of the check fails continueUpgradeAfterChecksEvenIfNotHealthy bool (Optional) ContinueUpgradeAfterChecksEvenIfNotHealthy defines if an upgrade should continue even if PGs are not clean waitTimeoutForHealthyOSDInMinutes time.Duration (Optional) WaitTimeoutForHealthyOSDInMinutes defines the time the operator would wait before an OSD can be stopped for upgrade or restart. If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one if continueUpgradeAfterChecksEvenIfNotHealthy is false . If continueUpgradeAfterChecksEvenIfNotHealthy is true , then operator would continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won\u2019t be applied if skipUpgradeChecks is true . The default wait timeout is 10 minutes. disruptionManagement DisruptionManagementSpec (Optional) A spec for configuring disruption management. mon MonSpec (Optional) A spec for mon related options crashCollector CrashCollectorSpec (Optional) A spec for the crash controller dashboard DashboardSpec (Optional) Dashboard settings monitoring MonitoringSpec (Optional) Prometheus based Monitoring settings external ExternalSpec (Optional) Whether the Ceph Cluster is running external to this Kubernetes cluster mon, mgr, osd, mds, and discover daemons will not be created for external clusters. mgr MgrSpec (Optional) A spec for mgr related options removeOSDsIfOutAndSafeToRemove bool (Optional) Remove the OSD that is out and safe to remove only if this option is true cleanupPolicy CleanupPolicySpec (Optional) Indicates user intent when deleting a cluster; blocks orchestration and should not be set if cluster deletion is not imminent. healthCheck CephClusterHealthCheckSpec (Optional) Internal daemon healthchecks and liveness probe security SecuritySpec (Optional) Security represents security settings logCollector LogCollectorSpec (Optional) Logging represents loggings settings status ClusterStatus (Optional) CephFilesystem CephFilesystem represents a Ceph Filesystem Field Description apiVersion string ceph.rook.io/v1 kind string CephFilesystem metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec FilesystemSpec metadataPool PoolSpec The metadata pool settings dataPools []NamedPoolSpec The data pool settings, with optional predefined pool name. preservePoolsOnDelete bool (Optional) Preserve pools on filesystem deletion preserveFilesystemOnDelete bool (Optional) Preserve the fs in the cluster on CephFilesystem CR deletion. Setting this to true automatically implies PreservePoolsOnDelete is true. metadataServer MetadataServerSpec The mds pod info mirroring FSMirroringSpec (Optional) The mirroring settings statusCheck MirrorHealthCheckSpec The mirroring statusCheck status CephFilesystemStatus CephFilesystemMirror CephFilesystemMirror is the Ceph Filesystem Mirror object definition Field Description apiVersion string ceph.rook.io/v1 kind string CephFilesystemMirror metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec FilesystemMirroringSpec placement Placement (Optional) The affinity to place the rgw pods (default is to place on any available node) annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the cephfs-mirror pods priorityClassName string (Optional) PriorityClassName sets priority class on the cephfs-mirror pods status Status (Optional) CephFilesystemSubVolumeGroup CephFilesystemSubVolumeGroup represents a Ceph Filesystem SubVolumeGroup Field Description apiVersion string ceph.rook.io/v1 kind string CephFilesystemSubVolumeGroup metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec CephFilesystemSubVolumeGroupSpec Spec represents the specification of a Ceph Filesystem SubVolumeGroup filesystemName string FilesystemName is the name of Ceph Filesystem SubVolumeGroup volume name. Typically it\u2019s the name of the CephFilesystem CR. If not coming from the CephFilesystem CR, it can be retrieved from the list of Ceph Filesystem volumes with ceph fs volume ls . To learn more about Ceph Filesystem abstractions see https://docs.ceph.com/en/latest/cephfs/fs-volumes/#fs-volumes-and-subvolumes status CephFilesystemSubVolumeGroupStatus (Optional) Status represents the status of a CephFilesystem SubvolumeGroup CephNFS CephNFS represents a Ceph NFS Field Description apiVersion string ceph.rook.io/v1 kind string CephNFS metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec NFSGaneshaSpec rados GaneshaRADOSSpec (Optional) RADOS is the Ganesha RADOS specification server GaneshaServerSpec Server is the Ganesha Server specification security NFSSecuritySpec (Optional) Security allows specifying security configurations for the NFS cluster status Status (Optional) CephObjectRealm CephObjectRealm represents a Ceph Object Store Gateway Realm Field Description apiVersion string ceph.rook.io/v1 kind string CephObjectRealm metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ObjectRealmSpec (Optional) pull PullSpec status Status (Optional) CephObjectStore CephObjectStore represents a Ceph Object Store Gateway Field Description apiVersion string ceph.rook.io/v1 kind string CephObjectStore metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ObjectStoreSpec metadataPool PoolSpec (Optional) The metadata pool settings dataPool PoolSpec (Optional) The data pool settings preservePoolsOnDelete bool (Optional) Preserve pools on object store deletion gateway GatewaySpec (Optional) The rgw pod info zone ZoneSpec (Optional) The multisite info healthCheck ObjectHealthCheckSpec (Optional) The RGW health probes security ObjectStoreSecuritySpec (Optional) Security represents security settings status ObjectStoreStatus CephObjectStoreUser CephObjectStoreUser represents a Ceph Object Store Gateway User Field Description apiVersion string ceph.rook.io/v1 kind string CephObjectStoreUser metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ObjectStoreUserSpec store string (Optional) The store the user will be created in displayName string (Optional) The display name for the ceph users capabilities ObjectUserCapSpec (Optional) quotas ObjectUserQuotaSpec (Optional) status ObjectStoreUserStatus (Optional) CephObjectZone CephObjectZone represents a Ceph Object Store Gateway Zone Field Description apiVersion string ceph.rook.io/v1 kind string CephObjectZone metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ObjectZoneSpec zoneGroup string The display name for the ceph users metadataPool PoolSpec The metadata pool settings dataPool PoolSpec The data pool settings customEndpoints []string (Optional) If this zone cannot be accessed from other peer Ceph clusters via the ClusterIP Service endpoint created by Rook, you must set this to the externally reachable endpoint(s). You may include the port in the definition. For example: \u201c https://my-object-store.my-domain.net:443\u201d . In many cases, you should set this to the endpoint of the ingress resource that makes the CephObjectStore associated with this CephObjectStoreZone reachable to peer clusters. The list can have one or more endpoints pointing to different RGW servers in the zone. If a CephObjectStore endpoint is omitted from this list, that object store\u2019s gateways will not receive multisite replication data (see CephObjectStore.spec.gateway.disableMultisiteSyncTraffic). preservePoolsOnDelete bool (Optional) Preserve pools on object zone deletion status Status (Optional) CephObjectZoneGroup CephObjectZoneGroup represents a Ceph Object Store Gateway Zone Group Field Description apiVersion string ceph.rook.io/v1 kind string CephObjectZoneGroup metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec ObjectZoneGroupSpec realm string The display name for the ceph users status Status (Optional) CephRBDMirror CephRBDMirror represents a Ceph RBD Mirror Field Description apiVersion string ceph.rook.io/v1 kind string CephRBDMirror metadata Kubernetes meta/v1.ObjectMeta Refer to the Kubernetes API documentation for the fields of the metadata field. spec RBDMirroringSpec count int Count represents the number of rbd mirror instance to run peers MirroringPeerSpec (Optional) Peers represents the peers spec placement Placement (Optional) The affinity to place the rgw pods (default is to place on any available node) annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the rbd mirror pods priorityClassName string (Optional) PriorityClassName sets priority class on the rbd mirror pods status Status (Optional) AMQPEndpointSpec ( Appears on: TopicEndpointSpec ) AMQPEndpointSpec represent the spec of an AMQP endpoint of a Bucket Topic Field Description uri string The URI of the AMQP endpoint to push notification to exchange string Name of the exchange that is used to route messages based on topics disableVerifySSL bool (Optional) Indicate whether the server certificate is validated by the client or not ackLevel string (Optional) The ack level required for this topic (none/broker/routeable) Annotations ( map[string]string alias) ( Appears on: FilesystemMirroringSpec , GaneshaServerSpec , GatewaySpec , MetadataServerSpec , RBDMirroringSpec , RGWServiceSpec ) Annotations are annotations AnnotationsSpec ( map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]github.com/rook/rook/pkg/apis/ceph.rook.io/v1.Annotations alias) ( Appears on: ClusterSpec ) AnnotationsSpec is the main spec annotation for all daemons BucketNotificationEvent ( string alias) ( Appears on: BucketNotificationSpec ) BucketNotificationSpec represent the event type of the bucket notification BucketNotificationSpec ( Appears on: CephBucketNotification ) BucketNotificationSpec represent the spec of a Bucket Notification Field Description topic string The name of the topic associated with this notification events []BucketNotificationEvent (Optional) List of events that should trigger the notification filter NotificationFilterSpec (Optional) Spec of notification filter BucketTopicSpec ( Appears on: CephBucketTopic ) BucketTopicSpec represent the spec of a Bucket Topic Field Description objectStoreName string The name of the object store on which to define the topic objectStoreNamespace string The namespace of the object store on which to define the topic opaqueData string (Optional) Data which is sent in each event persistent bool (Optional) Indication whether notifications to this endpoint are persistent or not endpoint TopicEndpointSpec Contains the endpoint spec of the topic BucketTopicStatus ( Appears on: CephBucketTopic ) BucketTopicStatus represents the Status of a CephBucketTopic Field Description phase string (Optional) ARN string (Optional) The ARN of the topic generated by the RGW observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. COSIDeploymentStrategy ( string alias) ( Appears on: CephCOSIDriverSpec ) COSIDeploymentStrategy represents the strategy to use to deploy the Ceph COSI driver Value Description \"Always\" Always means the Ceph COSI driver will be deployed even if the object store is not present \"Auto\" Auto means the Ceph COSI driver will be deployed automatically if object store is present \"Never\" Never means the Ceph COSI driver will never deployed Capacity ( Appears on: CephStatus ) Capacity is the capacity information of a Ceph Cluster Field Description bytesTotal uint64 bytesUsed uint64 bytesAvailable uint64 lastUpdated string CephBlockPoolRadosNamespaceSpec ( Appears on: CephBlockPoolRadosNamespace ) CephBlockPoolRadosNamespaceSpec represents the specification of a CephBlockPool Rados Namespace Field Description blockPoolName string BlockPoolName is the name of Ceph BlockPool. Typically it\u2019s the name of the CephBlockPool CR. CephBlockPoolRadosNamespaceStatus ( Appears on: CephBlockPoolRadosNamespace ) CephBlockPoolRadosNamespaceStatus represents the Status of Ceph BlockPool Rados Namespace Field Description phase ConditionType (Optional) info map[string]string (Optional) CephBlockPoolStatus ( Appears on: CephBlockPool ) CephBlockPoolStatus represents the mirroring status of Ceph Storage Pool Field Description phase ConditionType (Optional) mirroringStatus MirroringStatusSpec (Optional) mirroringInfo MirroringInfoSpec (Optional) snapshotScheduleStatus SnapshotScheduleStatusSpec (Optional) info map[string]string (Optional) observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. conditions []Condition CephCOSIDriverSpec ( Appears on: CephCOSIDriver ) CephCOSIDriverSpec represents the specification of a Ceph COSI Driver Field Description image string (Optional) Image is the container image to run the Ceph COSI driver objectProvisionerImage string (Optional) ObjectProvisionerImage is the container image to run the COSI driver sidecar deploymentStrategy COSIDeploymentStrategy (Optional) DeploymentStrategy is the strategy to use to deploy the COSI driver. placement Placement (Optional) Placement is the placement strategy to use for the COSI driver resources Kubernetes core/v1.ResourceRequirements (Optional) Resources is the resource requirements for the COSI driver CephClientStatus ( Appears on: CephClient ) CephClientStatus represents the Status of Ceph Client Field Description phase ConditionType (Optional) info map[string]string (Optional) observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. CephClusterHealthCheckSpec ( Appears on: ClusterSpec ) CephClusterHealthCheckSpec represent the healthcheck for Ceph daemons Field Description daemonHealth DaemonHealthSpec (Optional) DaemonHealth is the health check for a given daemon livenessProbe map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]*github.com/rook/rook/pkg/apis/ceph.rook.io/v1.ProbeSpec (Optional) LivenessProbe allows changing the livenessProbe configuration for a given daemon startupProbe map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]*github.com/rook/rook/pkg/apis/ceph.rook.io/v1.ProbeSpec (Optional) StartupProbe allows changing the startupProbe configuration for a given daemon CephDaemonsVersions ( Appears on: CephStatus ) CephDaemonsVersions show the current ceph version for different ceph daemons Field Description mon map[string]int (Optional) Mon shows Mon Ceph version mgr map[string]int (Optional) Mgr shows Mgr Ceph version osd map[string]int (Optional) Osd shows Osd Ceph version rgw map[string]int (Optional) Rgw shows Rgw Ceph version mds map[string]int (Optional) Mds shows Mds Ceph version rbd-mirror map[string]int (Optional) RbdMirror shows RbdMirror Ceph version cephfs-mirror map[string]int (Optional) CephFSMirror shows CephFSMirror Ceph version overall map[string]int (Optional) Overall shows overall Ceph version CephFilesystemStatus ( Appears on: CephFilesystem ) CephFilesystemStatus represents the status of a Ceph Filesystem Field Description phase ConditionType (Optional) snapshotScheduleStatus FilesystemSnapshotScheduleStatusSpec (Optional) info map[string]string (Optional) Use only info and put mirroringStatus in it? mirroringStatus FilesystemMirroringInfoSpec (Optional) MirroringStatus is the filesystem mirroring status conditions []Condition observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. CephFilesystemSubVolumeGroupSpec ( Appears on: CephFilesystemSubVolumeGroup ) CephFilesystemSubVolumeGroupSpec represents the specification of a Ceph Filesystem SubVolumeGroup Field Description filesystemName string FilesystemName is the name of Ceph Filesystem SubVolumeGroup volume name. Typically it\u2019s the name of the CephFilesystem CR. If not coming from the CephFilesystem CR, it can be retrieved from the list of Ceph Filesystem volumes with ceph fs volume ls . To learn more about Ceph Filesystem abstractions see https://docs.ceph.com/en/latest/cephfs/fs-volumes/#fs-volumes-and-subvolumes CephFilesystemSubVolumeGroupStatus ( Appears on: CephFilesystemSubVolumeGroup ) CephFilesystemSubVolumeGroupStatus represents the Status of Ceph Filesystem SubVolumeGroup Field Description phase ConditionType (Optional) info map[string]string (Optional) observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. CephHealthMessage ( Appears on: CephStatus ) CephHealthMessage represents the health message of a Ceph Cluster Field Description severity string message string CephStatus ( Appears on: ClusterStatus ) CephStatus is the details health of a Ceph Cluster Field Description health string details map[string]github.com/rook/rook/pkg/apis/ceph.rook.io/v1.CephHealthMessage lastChecked string lastChanged string previousHealth string capacity Capacity versions CephDaemonsVersions (Optional) fsid string CephStorage ( Appears on: ClusterStatus ) CephStorage represents flavors of Ceph Cluster Storage Field Description deviceClasses []DeviceClasses osd OSDStatus CephVersionSpec ( Appears on: ClusterSpec ) CephVersionSpec represents the settings for the Ceph version that Rook is orchestrating. Field Description image string (Optional) Image is the container image used to launch the ceph daemons, such as quay.io/ceph/ceph: The full list of images can be found at https://quay.io/repository/ceph/ceph?tab=tags allowUnsupported bool (Optional) Whether to allow unsupported versions (do not set to true in production) imagePullPolicy Kubernetes core/v1.PullPolicy (Optional) ImagePullPolicy describes a policy for if/when to pull a container image One of Always, Never, IfNotPresent. CleanupConfirmationProperty ( string alias) ( Appears on: CleanupPolicySpec ) CleanupConfirmationProperty represents the cleanup confirmation Value Description \"yes-really-destroy-data\" DeleteDataDirOnHostsConfirmation represents the validation to destroy dataDirHostPath CleanupPolicySpec ( Appears on: ClusterSpec ) CleanupPolicySpec represents a Ceph Cluster cleanup policy Field Description confirmation CleanupConfirmationProperty (Optional) Confirmation represents the cleanup confirmation sanitizeDisks SanitizeDisksSpec (Optional) SanitizeDisks represents way we sanitize disks allowUninstallWithVolumes bool (Optional) AllowUninstallWithVolumes defines whether we can proceed with the uninstall if they are RBD images still present ClientSpec ( Appears on: CephClient ) ClientSpec represents the specification of a Ceph Client Field Description name string (Optional) caps map[string]string ClusterSpec ( Appears on: CephCluster ) ClusterSpec represents the specification of Ceph Cluster Field Description cephVersion CephVersionSpec (Optional) The version information that instructs Rook to orchestrate a particular version of Ceph. storage StorageScopeSpec (Optional) A spec for available storage in the cluster and how it should be used annotations AnnotationsSpec (Optional) The annotations-related configuration to add/set on each Pod related object. labels LabelsSpec (Optional) The labels-related configuration to add/set on each Pod related object. placement PlacementSpec (Optional) The placement-related configuration to pass to kubernetes (affinity, node selector, tolerations). network NetworkSpec (Optional) Network related configuration resources ResourceSpec (Optional) Resources set resource requests and limits priorityClassNames PriorityClassNamesSpec (Optional) PriorityClassNames sets priority classes on components dataDirHostPath string (Optional) The path on the host where config and data can be persisted skipUpgradeChecks bool (Optional) SkipUpgradeChecks defines if an upgrade should be forced even if one of the check fails continueUpgradeAfterChecksEvenIfNotHealthy bool (Optional) ContinueUpgradeAfterChecksEvenIfNotHealthy defines if an upgrade should continue even if PGs are not clean waitTimeoutForHealthyOSDInMinutes time.Duration (Optional) WaitTimeoutForHealthyOSDInMinutes defines the time the operator would wait before an OSD can be stopped for upgrade or restart. If the timeout exceeds and OSD is not ok to stop, then the operator would skip upgrade for the current OSD and proceed with the next one if continueUpgradeAfterChecksEvenIfNotHealthy is false . If continueUpgradeAfterChecksEvenIfNotHealthy is true , then operator would continue with the upgrade of an OSD even if its not ok to stop after the timeout. This timeout won\u2019t be applied if skipUpgradeChecks is true . The default wait timeout is 10 minutes. disruptionManagement DisruptionManagementSpec (Optional) A spec for configuring disruption management. mon MonSpec (Optional) A spec for mon related options crashCollector CrashCollectorSpec (Optional) A spec for the crash controller dashboard DashboardSpec (Optional) Dashboard settings monitoring MonitoringSpec (Optional) Prometheus based Monitoring settings external ExternalSpec (Optional) Whether the Ceph Cluster is running external to this Kubernetes cluster mon, mgr, osd, mds, and discover daemons will not be created for external clusters. mgr MgrSpec (Optional) A spec for mgr related options removeOSDsIfOutAndSafeToRemove bool (Optional) Remove the OSD that is out and safe to remove only if this option is true cleanupPolicy CleanupPolicySpec (Optional) Indicates user intent when deleting a cluster; blocks orchestration and should not be set if cluster deletion is not imminent. healthCheck CephClusterHealthCheckSpec (Optional) Internal daemon healthchecks and liveness probe security SecuritySpec (Optional) Security represents security settings logCollector LogCollectorSpec (Optional) Logging represents loggings settings ClusterState ( string alias) ( Appears on: ClusterStatus ) ClusterState represents the state of a Ceph Cluster Value Description \"Connected\" ClusterStateConnected represents the Connected state of a Ceph Cluster \"Connecting\" ClusterStateConnecting represents the Connecting state of a Ceph Cluster \"Created\" ClusterStateCreated represents the Created state of a Ceph Cluster \"Creating\" ClusterStateCreating represents the Creating state of a Ceph Cluster \"Error\" ClusterStateError represents the Error state of a Ceph Cluster \"Updating\" ClusterStateUpdating represents the Updating state of a Ceph Cluster ClusterStatus ( Appears on: CephCluster ) ClusterStatus represents the status of a Ceph cluster Field Description state ClusterState phase ConditionType message string conditions []Condition ceph CephStatus storage CephStorage version ClusterVersion observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. ClusterVersion ( Appears on: ClusterStatus ) ClusterVersion represents the version of a Ceph Cluster Field Description image string version string CompressionSpec ( Appears on: ConnectionsSpec ) Field Description enabled bool (Optional) Whether to compress the data in transit across the wire. The default is not set. Requires Ceph Quincy (v17) or newer. Condition ( Appears on: CephBlockPoolStatus , CephFilesystemStatus , ClusterStatus , ObjectStoreStatus , Status ) Condition represents a status condition on any Rook-Ceph Custom Resource. Field Description type ConditionType status Kubernetes core/v1.ConditionStatus reason ConditionReason message string lastHeartbeatTime Kubernetes meta/v1.Time lastTransitionTime Kubernetes meta/v1.Time ConditionReason ( string alias) ( Appears on: Condition ) ConditionReason is a reason for a condition Value Description \"ClusterConnected\" ClusterConnectedReason is cluster connected reason \"ClusterConnecting\" ClusterConnectingReason is cluster connecting reason \"ClusterCreated\" ClusterCreatedReason is cluster created reason \"ClusterDeleting\" ClusterDeletingReason is cluster deleting reason \"ClusterProgressing\" ClusterProgressingReason is cluster progressing reason \"Deleting\" DeletingReason represents when Rook has detected a resource object should be deleted. \"ObjectHasDependents\" ObjectHasDependentsReason represents when a resource object has dependents that are blocking deletion. \"ObjectHasNoDependents\" ObjectHasNoDependentsReason represents when a resource object has no dependents that are blocking deletion. \"ReconcileFailed\" ReconcileFailed represents when a resource reconciliation failed. \"ReconcileStarted\" ReconcileStarted represents when a resource reconciliation started. \"ReconcileSucceeded\" ReconcileSucceeded represents when a resource reconciliation was successful. ConditionType ( string alias) ( Appears on: CephBlockPoolRadosNamespaceStatus , CephBlockPoolStatus , CephClientStatus , CephFilesystemStatus , CephFilesystemSubVolumeGroupStatus , ClusterStatus , Condition , ObjectStoreStatus ) ConditionType represent a resource\u2019s status Value Description \"Connected\" ConditionConnected represents Connected state of an object \"Connecting\" ConditionConnecting represents Connecting state of an object \"Deleting\" ConditionDeleting represents Deleting state of an object \"DeletionIsBlocked\" ConditionDeletionIsBlocked represents when deletion of the object is blocked. \"Failure\" ConditionFailure represents Failure state of an object \"Progressing\" ConditionProgressing represents Progressing state of an object \"Ready\" ConditionReady represents Ready state of an object ConfigFileVolumeSource ( Appears on: KerberosConfigFiles , KerberosKeytabFile , SSSDSidecarAdditionalFile , SSSDSidecarConfigFile ) Represents the source of a volume to mount. Only one of its members may be specified. This is a subset of the full Kubernetes API\u2019s VolumeSource that is reduced to what is most likely to be useful for mounting config files/dirs into Rook pods. Field Description hostPath Kubernetes core/v1.HostPathVolumeSource (Optional) hostPath represents a pre-existing file or directory on the host machine that is directly exposed to the container. This is generally used for system agents or other privileged things that are allowed to see the host machine. Most containers will NOT need this. More info: https://kubernetes.io/docs/concepts/storage/volumes#hostpath emptyDir Kubernetes core/v1.EmptyDirVolumeSource (Optional) emptyDir represents a temporary directory that shares a pod\u2019s lifetime. More info: https://kubernetes.io/docs/concepts/storage/volumes#emptydir secret Kubernetes core/v1.SecretVolumeSource (Optional) secret represents a secret that should populate this volume. More info: https://kubernetes.io/docs/concepts/storage/volumes#secret persistentVolumeClaim Kubernetes core/v1.PersistentVolumeClaimVolumeSource (Optional) persistentVolumeClaimVolumeSource represents a reference to a PersistentVolumeClaim in the same namespace. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#persistentvolumeclaims configMap Kubernetes core/v1.ConfigMapVolumeSource (Optional) configMap represents a configMap that should populate this volume projected Kubernetes core/v1.ProjectedVolumeSource projected items for all in one resources secrets, configmaps, and downward API ConnectionsSpec ( Appears on: NetworkSpec ) Field Description encryption EncryptionSpec (Optional) Encryption settings for the network connections. compression CompressionSpec (Optional) Compression settings for the network connections. requireMsgr2 bool (Optional) Whether to require msgr2 (port 3300) even if compression or encryption are not enabled. If true, the msgr1 port (6789) will be disabled. Requires a kernel that supports msgr2 (kernel 5.11 or CentOS 8.4 or newer). CrashCollectorSpec ( Appears on: ClusterSpec ) CrashCollectorSpec represents options to configure the crash controller Field Description disable bool (Optional) Disable determines whether we should enable the crash collector daysToRetain uint (Optional) DaysToRetain represents the number of days to retain crash until they get pruned DaemonHealthSpec ( Appears on: CephClusterHealthCheckSpec ) DaemonHealthSpec is a daemon health check Field Description status HealthCheckSpec (Optional) Status represents the health check settings for the Ceph health mon HealthCheckSpec (Optional) Monitor represents the health check settings for the Ceph monitor osd HealthCheckSpec (Optional) ObjectStorageDaemon represents the health check settings for the Ceph OSDs DashboardSpec ( Appears on: ClusterSpec ) DashboardSpec represents the settings for the Ceph dashboard Field Description enabled bool (Optional) Enabled determines whether to enable the dashboard urlPrefix string (Optional) URLPrefix is a prefix for all URLs to use the dashboard with a reverse proxy port int (Optional) Port is the dashboard webserver port ssl bool (Optional) SSL determines whether SSL should be used Device ( Appears on: Selection ) Device represents a disk to use in the cluster Field Description name string (Optional) fullpath string (Optional) config map[string]string (Optional) DeviceClasses ( Appears on: CephStorage ) DeviceClasses represents device classes of a Ceph Cluster Field Description name string DisruptionManagementSpec ( Appears on: ClusterSpec ) DisruptionManagementSpec configures management of daemon disruptions Field Description managePodBudgets bool (Optional) This enables management of poddisruptionbudgets osdMaintenanceTimeout time.Duration (Optional) OSDMaintenanceTimeout sets how many additional minutes the DOWN/OUT interval is for drained failure domains it only works if managePodBudgets is true. the default is 30 minutes pgHealthCheckTimeout time.Duration (Optional) PGHealthCheckTimeout is the time (in minutes) that the operator will wait for the placement groups to become healthy (active+clean) after a drain was completed and OSDs came back up. Rook will continue with the next drain if the timeout exceeds. It only works if managePodBudgets is true. No values or 0 means that the operator will wait until the placement groups are healthy before unblocking the next drain. manageMachineDisruptionBudgets bool (Optional) Deprecated. This enables management of machinedisruptionbudgets. machineDisruptionBudgetNamespace string (Optional) Deprecated. Namespace to look for MDBs by the machineDisruptionBudgetController EncryptionSpec ( Appears on: ConnectionsSpec ) Field Description enabled bool (Optional) Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network. The default is not set. Even if encryption is not enabled, clients still establish a strong initial authentication for the connection and data integrity is still validated with a crc check. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted. EndpointAddress ( Appears on: GatewaySpec ) EndpointAddress is a tuple that describes a single IP address or host name. This is a subset of Kubernetes\u2019s v1.EndpointAddress. Field Description ip string (Optional) The IP of this endpoint. As a legacy behavior, this supports being given a DNS-adressable hostname as well. hostname string (Optional) The DNS-addressable Hostname of this endpoint. This field will be preferred over IP if both are given. ErasureCodedSpec ( Appears on: PoolSpec ) ErasureCodedSpec represents the spec for erasure code in a pool Field Description codingChunks uint Number of coding chunks per object in an erasure coded storage pool (required for erasure-coded pool type). This is the number of OSDs that can be lost simultaneously before data cannot be recovered. dataChunks uint Number of data chunks per object in an erasure coded storage pool (required for erasure-coded pool type). The number of chunks required to recover an object when any single OSD is lost is the same as dataChunks so be aware that the larger the number of data chunks, the higher the cost of recovery. algorithm string (Optional) The algorithm for erasure coding ExternalSpec ( Appears on: ClusterSpec ) ExternalSpec represents the options supported by an external cluster Field Description enable bool (Optional) Enable determines whether external mode is enabled or not FSMirroringSpec ( Appears on: FilesystemSpec ) FSMirroringSpec represents the setting for a mirrored filesystem Field Description enabled bool (Optional) Enabled whether this filesystem is mirrored or not peers MirroringPeerSpec (Optional) Peers represents the peers spec snapshotSchedules []SnapshotScheduleSpec (Optional) SnapshotSchedules is the scheduling of snapshot for mirrored filesystems snapshotRetention []SnapshotScheduleRetentionSpec (Optional) Retention is the retention policy for a snapshot schedule One path has exactly one retention policy. A policy can however contain multiple count-time period pairs in order to specify complex retention policies FilesystemMirrorInfoPeerSpec ( Appears on: FilesystemsSpec ) FilesystemMirrorInfoPeerSpec is the specification of a filesystem peer mirror Field Description uuid string (Optional) UUID is the peer unique identifier remote PeerRemoteSpec (Optional) Remote are the remote cluster information stats PeerStatSpec (Optional) Stats are the stat a peer mirror FilesystemMirroringInfo ( Appears on: FilesystemMirroringInfoSpec ) FilesystemMirrorInfoSpec is the filesystem mirror status of a given filesystem Field Description daemon_id int (Optional) DaemonID is the cephfs-mirror name filesystems []FilesystemsSpec (Optional) Filesystems is the list of filesystems managed by a given cephfs-mirror daemon FilesystemMirroringInfoSpec ( Appears on: CephFilesystemStatus ) FilesystemMirroringInfo is the status of the pool mirroring Field Description daemonsStatus []FilesystemMirroringInfo (Optional) PoolMirroringStatus is the mirroring status of a filesystem lastChecked string (Optional) LastChecked is the last time time the status was checked lastChanged string (Optional) LastChanged is the last time time the status last changed details string (Optional) Details contains potential status errors FilesystemMirroringSpec ( Appears on: CephFilesystemMirror ) FilesystemMirroringSpec is the filesystem mirroring specification Field Description placement Placement (Optional) The affinity to place the rgw pods (default is to place on any available node) annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the cephfs-mirror pods priorityClassName string (Optional) PriorityClassName sets priority class on the cephfs-mirror pods FilesystemSnapshotScheduleStatusRetention ( Appears on: FilesystemSnapshotSchedulesSpec ) FilesystemSnapshotScheduleStatusRetention is the retention specification for a filesystem snapshot schedule Field Description start string (Optional) Start is when the snapshot schedule starts created string (Optional) Created is when the snapshot schedule was created first string (Optional) First is when the first snapshot schedule was taken last string (Optional) Last is when the last snapshot schedule was taken last_pruned string (Optional) LastPruned is when the last snapshot schedule was pruned created_count int (Optional) CreatedCount is total amount of snapshots pruned_count int (Optional) PrunedCount is total amount of pruned snapshots active bool (Optional) Active is whether the scheduled is active or not FilesystemSnapshotScheduleStatusSpec ( Appears on: CephFilesystemStatus ) FilesystemSnapshotScheduleStatusSpec is the status of the snapshot schedule Field Description snapshotSchedules []FilesystemSnapshotSchedulesSpec (Optional) SnapshotSchedules is the list of snapshots scheduled lastChecked string (Optional) LastChecked is the last time time the status was checked lastChanged string (Optional) LastChanged is the last time time the status last changed details string (Optional) Details contains potential status errors FilesystemSnapshotSchedulesSpec ( Appears on: FilesystemSnapshotScheduleStatusSpec ) FilesystemSnapshotSchedulesSpec is the list of snapshot scheduled for images in a pool Field Description fs string (Optional) Fs is the name of the Ceph Filesystem subvol string (Optional) Subvol is the name of the sub volume path string (Optional) Path is the path on the filesystem rel_path string (Optional) schedule string (Optional) retention FilesystemSnapshotScheduleStatusRetention (Optional) FilesystemSpec ( Appears on: CephFilesystem ) FilesystemSpec represents the spec of a file system Field Description metadataPool PoolSpec The metadata pool settings dataPools []NamedPoolSpec The data pool settings, with optional predefined pool name. preservePoolsOnDelete bool (Optional) Preserve pools on filesystem deletion preserveFilesystemOnDelete bool (Optional) Preserve the fs in the cluster on CephFilesystem CR deletion. Setting this to true automatically implies PreservePoolsOnDelete is true. metadataServer MetadataServerSpec The mds pod info mirroring FSMirroringSpec (Optional) The mirroring settings statusCheck MirrorHealthCheckSpec The mirroring statusCheck FilesystemsSpec ( Appears on: FilesystemMirroringInfo ) FilesystemsSpec is spec for the mirrored filesystem Field Description filesystem_id int (Optional) FilesystemID is the filesystem identifier name string (Optional) Name is name of the filesystem directory_count int (Optional) DirectoryCount is the number of directories in the filesystem peers []FilesystemMirrorInfoPeerSpec (Optional) Peers represents the mirroring peers GaneshaRADOSSpec ( Appears on: NFSGaneshaSpec ) GaneshaRADOSSpec represents the specification of a Ganesha RADOS object Field Description pool string (Optional) The Ceph pool used store the shared configuration for NFS-Ganesha daemons. This setting is required for Ceph v15 and ignored for Ceph v16. As of Ceph Pacific 16.2.7+, this is internally hardcoded to \u201c.nfs\u201d. namespace string (Optional) The namespace inside the Ceph pool (set by \u2018pool\u2019) where shared NFS-Ganesha config is stored. This setting is required for Ceph v15 and ignored for Ceph v16. As of Ceph Pacific v16+, this is internally set to the name of the CephNFS. GaneshaServerSpec ( Appears on: NFSGaneshaSpec ) GaneshaServerSpec represents the specification of a Ganesha Server Field Description active int The number of active Ganesha servers placement Placement (Optional) The affinity to place the ganesha pods annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) Resources set resource requests and limits priorityClassName string (Optional) PriorityClassName sets the priority class on the pods logLevel string (Optional) LogLevel set logging level hostNetwork bool (Optional) Whether host networking is enabled for the Ganesha server. If not set, the network settings from the cluster CR will be applied. GatewaySpec ( Appears on: ObjectStoreSpec ) GatewaySpec represents the specification of Ceph Object Store Gateway Field Description port int32 (Optional) The port the rgw service will be listening on (http) securePort int32 (Optional) The port the rgw service will be listening on (https) instances int32 (Optional) The number of pods in the rgw replicaset. sslCertificateRef string (Optional) The name of the secret that stores the ssl certificate for secure rgw connections caBundleRef string (Optional) The name of the secret that stores custom ca-bundle with root and intermediate certificates. placement Placement (Optional) The affinity to place the rgw pods (default is to place on any available node) disableMultisiteSyncTraffic bool (Optional) DisableMultisiteSyncTraffic, when true, prevents this object store\u2019s gateways from transmitting multisite replication data. Note that this value does not affect whether gateways receive multisite replication traffic: see ObjectZone.spec.customEndpoints for that. If false or unset, this object store\u2019s gateways will be able to transmit multisite replication data. annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the rgw pods priorityClassName string (Optional) PriorityClassName sets priority classes on the rgw pods externalRgwEndpoints []EndpointAddress (Optional) ExternalRgwEndpoints points to external RGW endpoint(s). Multiple endpoints can be given, but for stability of ObjectBucketClaims, we highly recommend that users give only a single external RGW endpoint that is a load balancer that sends requests to the multiple RGWs. service RGWServiceSpec (Optional) The configuration related to add/set on each rgw service. hostNetwork bool (Optional) Whether host networking is enabled for the rgw daemon. If not set, the network settings from the cluster CR will be applied. dashboardEnabled bool (Optional) Whether rgw dashboard is enabled for the rgw daemon. If not set, the rgw dashboard will be enabled. HTTPEndpointSpec ( Appears on: TopicEndpointSpec ) HTTPEndpointSpec represent the spec of an HTTP endpoint of a Bucket Topic Field Description uri string The URI of the HTTP endpoint to push notification to disableVerifySSL bool (Optional) Indicate whether the server certificate is validated by the client or not sendCloudEvents bool (Optional) Send the notifications with the CloudEvents header: https://github.com/cloudevents/spec/blob/main/cloudevents/adapters/aws-s3.md Supported for Ceph Quincy (v17) or newer. HealthCheckSpec ( Appears on: DaemonHealthSpec , MirrorHealthCheckSpec ) HealthCheckSpec represents the health check of an object store bucket Field Description disabled bool (Optional) interval Kubernetes meta/v1.Duration (Optional) Interval is the internal in second or minute for the health check to run like 60s for 60 seconds timeout string (Optional) HybridStorageSpec ( Appears on: ReplicatedSpec ) HybridStorageSpec represents the settings for hybrid storage pool Field Description primaryDeviceClass string PrimaryDeviceClass represents high performance tier (for example SSD or NVME) for Primary OSD secondaryDeviceClass string SecondaryDeviceClass represents low performance tier (for example HDDs) for remaining OSDs IPFamilyType ( string alias) ( Appears on: NetworkSpec ) IPFamilyType represents the single stack Ipv4 or Ipv6 protocol. Value Description \"IPv4\" IPv4 internet protocol version \"IPv6\" IPv6 internet protocol version KafkaEndpointSpec ( Appears on: TopicEndpointSpec ) KafkaEndpointSpec represent the spec of a Kafka endpoint of a Bucket Topic Field Description uri string The URI of the Kafka endpoint to push notification to useSSL bool (Optional) Indicate whether to use SSL when communicating with the broker disableVerifySSL bool (Optional) Indicate whether the server certificate is validated by the client or not ackLevel string (Optional) The ack level required for this topic (none/broker) KerberosConfigFiles ( Appears on: KerberosSpec ) KerberosConfigFiles represents the source(s) from which Kerberos configuration should come. Field Description volumeSource ConfigFileVolumeSource VolumeSource accepts a pared down version of the standard Kubernetes VolumeSource for Kerberos configuration files like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. The volume may contain multiple files, all of which will be loaded. KerberosKeytabFile ( Appears on: KerberosSpec ) KerberosKeytabFile represents the source(s) from which the Kerberos keytab file should come. Field Description volumeSource ConfigFileVolumeSource VolumeSource accepts a pared down version of the standard Kubernetes VolumeSource for the Kerberos keytab file like what is normally used to configure Volumes for a Pod. For example, a Secret or HostPath. There are two requirements for the source\u2019s content: 1. The config file must be mountable via subPath: krb5.keytab . For example, in a Secret, the data item must be named krb5.keytab , or items must be defined to select the key and give it path krb5.keytab . A HostPath directory must have the krb5.keytab file. 2. The volume or config file must have mode 0600. KerberosSpec ( Appears on: NFSSecuritySpec ) KerberosSpec represents configuration for Kerberos. Field Description principalName string (Optional) PrincipalName corresponds directly to NFS-Ganesha\u2019s NFS_KRB5:PrincipalName config. In practice, this is the service prefix of the principal name. The default is \u201cnfs\u201d. This value is combined with (a) the namespace and name of the CephNFS (with a hyphen between) and (b) the Realm configured in the user-provided krb5.conf to determine the full principal name: / - @ . e.g., nfs/rook-ceph-my-nfs@example.net. See https://github.com/nfs-ganesha/nfs-ganesha/wiki/RPCSEC_GSS for more detail. domainName string (Optional) DomainName should be set to the Kerberos Realm. configFiles KerberosConfigFiles (Optional) ConfigFiles defines where the Kerberos configuration should be sourced from. Config files will be placed into the /etc/krb5.conf.rook/ directory. If this is left empty, Rook will not add any files. This allows you to manage the files yourself however you wish. For example, you may build them into your custom Ceph container image or use the Vault agent injector to securely add the files via annotations on the CephNFS spec (passed to the NFS server pods). Rook configures Kerberos to log to stderr. We suggest removing logging sections from config files to avoid consuming unnecessary disk space from logging to files. keytabFile KerberosKeytabFile (Optional) KeytabFile defines where the Kerberos keytab should be sourced from. The keytab file will be placed into /etc/krb5.keytab . If this is left empty, Rook will not add the file. This allows you to manage the krb5.keytab file yourself however you wish. For example, you may build it into your custom Ceph container image or use the Vault agent injector to securely add the file via annotations on the CephNFS spec (passed to the NFS server pods). KeyManagementServiceSpec ( Appears on: ObjectStoreSecuritySpec , SecuritySpec ) KeyManagementServiceSpec represent various details of the KMS server Field Description connectionDetails map[string]string (Optional) ConnectionDetails contains the KMS connection details (address, port etc) tokenSecretName string (Optional) TokenSecretName is the kubernetes secret containing the KMS token KeyRotationSpec ( Appears on: SecuritySpec ) KeyRotationSpec represents the settings for Key Rotation. Field Description enabled bool (Optional) Enabled represents whether the key rotation is enabled. schedule string (Optional) Schedule represents the cron schedule for key rotation. KeyType ( string alias) KeyType type safety Value Description \"exporter\" \"cleanup\" \"clusterMetadata\" \"crashcollector\" \"mds\" \"mgr\" \"mon\" \"arbiter\" \"monitoring\" \"osd\" \"prepareosd\" \"rgw\" \"keyrotation\" Labels ( map[string]string alias) ( Appears on: FilesystemMirroringSpec , GaneshaServerSpec , GatewaySpec , MetadataServerSpec , RBDMirroringSpec ) Labels are label for a given daemons LabelsSpec ( map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]github.com/rook/rook/pkg/apis/ceph.rook.io/v1.Labels alias) ( Appears on: ClusterSpec ) LabelsSpec is the main spec label for all daemons LogCollectorSpec ( Appears on: ClusterSpec ) LogCollectorSpec is the logging spec Field Description enabled bool (Optional) Enabled represents whether the log collector is enabled periodicity string (Optional) Periodicity is the periodicity of the log rotation. maxLogSize k8s.io/apimachinery/pkg/api/resource.Quantity (Optional) MaxLogSize is the maximum size of the log per ceph daemons. Must be at least 1M. MetadataServerSpec ( Appears on: FilesystemSpec ) MetadataServerSpec represents the specification of a Ceph Metadata Server Field Description activeCount int32 The number of metadata servers that are active. The remaining servers in the cluster will be in standby mode. activeStandby bool (Optional) Whether each active MDS instance will have an active standby with a warm metadata cache for faster failover. If false, standbys will still be available, but will not have a warm metadata cache. placement Placement (Optional) The affinity to place the mds pods (default is to place on all available node) with a daemonset annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the rgw pods priorityClassName string (Optional) PriorityClassName sets priority classes on components livenessProbe ProbeSpec (Optional) startupProbe ProbeSpec (Optional) MgrSpec ( Appears on: ClusterSpec ) MgrSpec represents options to configure a ceph mgr Field Description count int (Optional) Count is the number of manager to run allowMultiplePerNode bool (Optional) AllowMultiplePerNode allows to run multiple managers on the same node (not recommended) modules []Module (Optional) Modules is the list of ceph manager modules to enable/disable MirrorHealthCheckSpec ( Appears on: FilesystemSpec , PoolSpec ) MirrorHealthCheckSpec represents the health specification of a Ceph Storage Pool mirror Field Description mirror HealthCheckSpec (Optional) MirroringInfoSpec ( Appears on: CephBlockPoolStatus ) MirroringInfoSpec is the status of the pool mirroring Field Description PoolMirroringInfo PoolMirroringInfo (Members of PoolMirroringInfo are embedded into this type.) (Optional) lastChecked string (Optional) lastChanged string (Optional) details string (Optional) MirroringPeerSpec ( Appears on: FSMirroringSpec , MirroringSpec , RBDMirroringSpec ) MirroringPeerSpec represents the specification of a mirror peer Field Description secretNames []string (Optional) SecretNames represents the Kubernetes Secret names to add rbd-mirror or cephfs-mirror peers MirroringSpec ( Appears on: PoolSpec ) MirroringSpec represents the setting for a mirrored pool Field Description enabled bool (Optional) Enabled whether this pool is mirrored or not mode string (Optional) Mode is the mirroring mode: either pool or image snapshotSchedules []SnapshotScheduleSpec (Optional) SnapshotSchedules is the scheduling of snapshot for mirrored images/pools peers MirroringPeerSpec (Optional) Peers represents the peers spec MirroringStatusSpec ( Appears on: CephBlockPoolStatus ) MirroringStatusSpec is the status of the pool mirroring Field Description PoolMirroringStatus PoolMirroringStatus (Members of PoolMirroringStatus are embedded into this type.) (Optional) PoolMirroringStatus is the mirroring status of a pool lastChecked string (Optional) LastChecked is the last time time the status was checked lastChanged string (Optional) LastChanged is the last time time the status last changed details string (Optional) Details contains potential status errors Module ( Appears on: MgrSpec ) Module represents mgr modules that the user wants to enable or disable Field Description name string (Optional) Name is the name of the ceph manager module enabled bool (Optional) Enabled determines whether a module should be enabled or not MonSpec ( Appears on: ClusterSpec ) MonSpec represents the specification of the monitor Field Description count int (Optional) Count is the number of Ceph monitors allowMultiplePerNode bool (Optional) AllowMultiplePerNode determines if we can run multiple monitors on the same node (not recommended) failureDomainLabel string (Optional) zones []MonZoneSpec (Optional) Zones are specified when we want to provide zonal awareness to mons stretchCluster StretchClusterSpec (Optional) StretchCluster is the stretch cluster specification volumeClaimTemplate Kubernetes core/v1.PersistentVolumeClaim (Optional) VolumeClaimTemplate is the PVC definition MonZoneSpec ( Appears on: MonSpec , StretchClusterSpec ) MonZoneSpec represents the specification of a zone in a Ceph Cluster Field Description name string (Optional) Name is the name of the zone arbiter bool (Optional) Arbiter determines if the zone contains the arbiter used for stretch cluster mode volumeClaimTemplate Kubernetes core/v1.PersistentVolumeClaim (Optional) VolumeClaimTemplate is the PVC template MonitoringSpec ( Appears on: ClusterSpec ) MonitoringSpec represents the settings for Prometheus based Ceph monitoring Field Description enabled bool (Optional) Enabled determines whether to create the prometheus rules for the ceph cluster. If true, the prometheus types must exist or the creation will fail. Default is false. metricsDisabled bool (Optional) Whether to disable the metrics reported by Ceph. If false, the prometheus mgr module and Ceph exporter are enabled. If true, the prometheus mgr module and Ceph exporter are both disabled. Default is false. externalMgrEndpoints []Kubernetes core/v1.EndpointAddress (Optional) ExternalMgrEndpoints points to an existing Ceph prometheus exporter endpoint externalMgrPrometheusPort uint16 (Optional) ExternalMgrPrometheusPort Prometheus exporter port port int (Optional) Port is the prometheus server port interval Kubernetes meta/v1.Duration (Optional) Interval determines prometheus scrape interval MultiClusterServiceSpec ( Appears on: NetworkSpec ) Field Description enabled bool (Optional) Enable multiClusterService to export the mon and OSD services to peer cluster. Ensure that peer clusters are connected using an MCS API compatible application, like Globalnet Submariner. clusterID string ClusterID uniquely identifies a cluster. It is used as a prefix to nslookup exported services. For example: . . .svc.clusterset.local NFSGaneshaSpec ( Appears on: CephNFS ) NFSGaneshaSpec represents the spec of an nfs ganesha server Field Description rados GaneshaRADOSSpec (Optional) RADOS is the Ganesha RADOS specification server GaneshaServerSpec Server is the Ganesha Server specification security NFSSecuritySpec (Optional) Security allows specifying security configurations for the NFS cluster NFSSecuritySpec ( Appears on: NFSGaneshaSpec ) NFSSecuritySpec represents security configurations for an NFS server pod Field Description sssd SSSDSpec (Optional) SSSD enables integration with System Security Services Daemon (SSSD). SSSD can be used to provide user ID mapping from a number of sources. See https://sssd.io for more information about the SSSD project. kerberos KerberosSpec (Optional) Kerberos configures NFS-Ganesha to secure NFS client connections with Kerberos. NamedBlockPoolSpec ( Appears on: CephBlockPool ) NamedBlockPoolSpec allows a block pool to be created with a non-default name. This is more specific than the NamedPoolSpec so we get schema validation on the allowed pool names that can be specified. Field Description name string (Optional) The desired name of the pool if different from the CephBlockPool CR name. PoolSpec PoolSpec (Members of PoolSpec are embedded into this type.) The core pool configuration NamedPoolSpec ( Appears on: FilesystemSpec ) NamedPoolSpec represents the named ceph pool spec Field Description name string Name of the pool PoolSpec PoolSpec (Members of PoolSpec are embedded into this type.) PoolSpec represents the spec of ceph pool NetworkSpec ( Appears on: ClusterSpec ) NetworkSpec for Ceph includes backward compatibility code Field Description provider string (Optional) Provider is what provides network connectivity to the cluster e.g. \u201chost\u201d or \u201cmultus\u201d selectors map[string]string (Optional) Selectors string values describe what networks will be used to connect the cluster. Meanwhile the keys describe each network respective responsibilities or any metadata storage provider decide. connections ConnectionsSpec (Optional) Settings for network connections such as compression and encryption across the wire. hostNetwork bool (Optional) HostNetwork to enable host network ipFamily IPFamilyType (Optional) IPFamily is the single stack IPv6 or IPv4 protocol dualStack bool (Optional) DualStack determines whether Ceph daemons should listen on both IPv4 and IPv6 multiClusterService MultiClusterServiceSpec (Optional) Enable multiClusterService to export the Services between peer clusters Node ( Appears on: StorageScopeSpec ) Node is a storage nodes Field Description name string (Optional) resources Kubernetes core/v1.ResourceRequirements (Optional) config map[string]string (Optional) Selection Selection (Members of Selection are embedded into this type.) NodesByName ( []github.com/rook/rook/pkg/apis/ceph.rook.io/v1.Node alias) NodesByName implements an interface to sort nodes by name NotificationFilterRule ( Appears on: NotificationFilterSpec ) NotificationFilterRule represent a single rule in the Notification Filter spec Field Description name string Name of the metadata or tag value string Value to filter on NotificationFilterSpec ( Appears on: BucketNotificationSpec ) NotificationFilterSpec represent the spec of a Bucket Notification filter Field Description keyFilters []NotificationKeyFilterRule (Optional) Filters based on the object\u2019s key metadataFilters []NotificationFilterRule (Optional) Filters based on the object\u2019s metadata tagFilters []NotificationFilterRule (Optional) Filters based on the object\u2019s tags NotificationKeyFilterRule ( Appears on: NotificationFilterSpec ) NotificationKeyFilterRule represent a single key rule in the Notification Filter spec Field Description name string Name of the filter - prefix/suffix/regex value string Value to filter on OSDStatus ( Appears on: CephStorage ) OSDStatus represents OSD status of the ceph Cluster Field Description storeType map[string]int StoreType is a mapping between the OSD backend stores and number of OSDs using these stores OSDStore ( Appears on: StorageScopeSpec ) OSDStore is the backend storage type used for creating the OSDs Field Description type string (Optional) Type of backend storage to be used while creating OSDs. If empty, then bluestore will be used updateStore string (Optional) UpdateStore updates the backend store for existing OSDs. It destroys each OSD one at a time, cleans up the backing disk and prepares same OSD on that disk ObjectEndpoints ( Appears on: ObjectStoreStatus ) Field Description insecure []string (Optional) secure []string (Optional) ObjectHealthCheckSpec ( Appears on: ObjectStoreSpec ) ObjectHealthCheckSpec represents the health check of an object store Field Description readinessProbe ProbeSpec (Optional) startupProbe ProbeSpec (Optional) ObjectRealmSpec ( Appears on: CephObjectRealm ) ObjectRealmSpec represent the spec of an ObjectRealm Field Description pull PullSpec ObjectStoreSecuritySpec ( Appears on: ObjectStoreSpec ) ObjectStoreSecuritySpec is spec to define security features like encryption Field Description SecuritySpec SecuritySpec (Optional) s3 KeyManagementServiceSpec (Optional) The settings for supporting AWS-SSE:S3 with RGW ObjectStoreSpec ( Appears on: CephObjectStore ) ObjectStoreSpec represent the spec of a pool Field Description metadataPool PoolSpec (Optional) The metadata pool settings dataPool PoolSpec (Optional) The data pool settings preservePoolsOnDelete bool (Optional) Preserve pools on object store deletion gateway GatewaySpec (Optional) The rgw pod info zone ZoneSpec (Optional) The multisite info healthCheck ObjectHealthCheckSpec (Optional) The RGW health probes security ObjectStoreSecuritySpec (Optional) Security represents security settings ObjectStoreStatus ( Appears on: CephObjectStore ) ObjectStoreStatus represents the status of a Ceph Object Store resource Field Description phase ConditionType (Optional) message string (Optional) endpoints ObjectEndpoints (Optional) info map[string]string (Optional) conditions []Condition observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. ObjectStoreUserSpec ( Appears on: CephObjectStoreUser ) ObjectStoreUserSpec represent the spec of an Objectstoreuser Field Description store string (Optional) The store the user will be created in displayName string (Optional) The display name for the ceph users capabilities ObjectUserCapSpec (Optional) quotas ObjectUserQuotaSpec (Optional) ObjectStoreUserStatus ( Appears on: CephObjectStoreUser ) ObjectStoreUserStatus represents the status Ceph Object Store Gateway User Field Description phase string (Optional) info map[string]string (Optional) observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. ObjectUserCapSpec ( Appears on: ObjectStoreUserSpec ) Additional admin-level capabilities for the Ceph object store user Field Description user string (Optional) Admin capabilities to read/write Ceph object store users. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities users string (Optional) Admin capabilities to read/write Ceph object store users. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities bucket string (Optional) Admin capabilities to read/write Ceph object store buckets. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities buckets string (Optional) Admin capabilities to read/write Ceph object store buckets. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities metadata string (Optional) Admin capabilities to read/write Ceph object store metadata. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities usage string (Optional) Admin capabilities to read/write Ceph object store usage. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities zone string (Optional) Admin capabilities to read/write Ceph object store zones. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities roles string (Optional) Admin capabilities to read/write roles for user. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities info string (Optional) Admin capabilities to read/write information about the user. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities amz-cache string (Optional) Add capabilities for user to send request to RGW Cache API header. Documented in https://docs.ceph.com/en/quincy/radosgw/rgw-cache/#cache-api bilog string (Optional) Add capabilities for user to change bucket index logging. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities mdlog string (Optional) Add capabilities for user to change metadata logging. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities datalog string (Optional) Add capabilities for user to change data logging. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities user-policy string (Optional) Add capabilities for user to change user policies. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities oidc-provider string (Optional) Add capabilities for user to change oidc provider. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities ratelimit string (Optional) Add capabilities for user to set rate limiter for user and bucket. Documented in https://docs.ceph.com/en/latest/radosgw/admin/?#add-remove-admin-capabilities ObjectUserQuotaSpec ( Appears on: ObjectStoreUserSpec ) ObjectUserQuotaSpec can be used to set quotas for the object store user to limit their usage. See the Ceph docs for more Field Description maxBuckets int (Optional) Maximum bucket limit for the ceph user maxSize k8s.io/apimachinery/pkg/api/resource.Quantity (Optional) Maximum size limit of all objects across all the user\u2019s buckets See https://pkg.go.dev/k8s.io/apimachinery/pkg/api/resource#Quantity for more info. maxObjects int64 (Optional) Maximum number of objects across all the user\u2019s buckets ObjectZoneGroupSpec ( Appears on: CephObjectZoneGroup ) ObjectZoneGroupSpec represent the spec of an ObjectZoneGroup Field Description realm string The display name for the ceph users ObjectZoneSpec ( Appears on: CephObjectZone ) ObjectZoneSpec represent the spec of an ObjectZone Field Description zoneGroup string The display name for the ceph users metadataPool PoolSpec The metadata pool settings dataPool PoolSpec The data pool settings customEndpoints []string (Optional) If this zone cannot be accessed from other peer Ceph clusters via the ClusterIP Service endpoint created by Rook, you must set this to the externally reachable endpoint(s). You may include the port in the definition. For example: \u201c https://my-object-store.my-domain.net:443\u201d . In many cases, you should set this to the endpoint of the ingress resource that makes the CephObjectStore associated with this CephObjectStoreZone reachable to peer clusters. The list can have one or more endpoints pointing to different RGW servers in the zone. If a CephObjectStore endpoint is omitted from this list, that object store\u2019s gateways will not receive multisite replication data (see CephObjectStore.spec.gateway.disableMultisiteSyncTraffic). preservePoolsOnDelete bool (Optional) Preserve pools on object zone deletion PeerRemoteSpec ( Appears on: FilesystemMirrorInfoPeerSpec ) Field Description client_name string (Optional) ClientName is cephx name cluster_name string (Optional) ClusterName is the name of the cluster fs_name string (Optional) FsName is the filesystem name PeerStatSpec ( Appears on: FilesystemMirrorInfoPeerSpec ) PeerStatSpec are the mirror stat with a given peer Field Description failure_count int (Optional) FailureCount is the number of mirroring failure recovery_count int (Optional) RecoveryCount is the number of recovery attempted after failures PeersSpec ( Appears on: PoolMirroringInfo ) PeersSpec contains peer details Field Description uuid string (Optional) UUID is the peer UUID direction string (Optional) Direction is the peer mirroring direction site_name string (Optional) SiteName is the current site name mirror_uuid string (Optional) MirrorUUID is the mirror UUID client_name string (Optional) ClientName is the CephX user used to connect to the peer Placement ( Appears on: CephCOSIDriverSpec , FilesystemMirroringSpec , GaneshaServerSpec , GatewaySpec , MetadataServerSpec , RBDMirroringSpec , StorageClassDeviceSet ) Placement is the placement for an object Field Description nodeAffinity Kubernetes core/v1.NodeAffinity (Optional) NodeAffinity is a group of node affinity scheduling rules podAffinity Kubernetes core/v1.PodAffinity (Optional) PodAffinity is a group of inter pod affinity scheduling rules podAntiAffinity Kubernetes core/v1.PodAntiAffinity (Optional) PodAntiAffinity is a group of inter pod anti affinity scheduling rules tolerations []Kubernetes core/v1.Toleration (Optional) The pod this Toleration is attached to tolerates any taint that matches the triple using the matching operator topologySpreadConstraints []Kubernetes core/v1.TopologySpreadConstraint (Optional) TopologySpreadConstraint specifies how to spread matching pods among the given topology PlacementSpec ( map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]github.com/rook/rook/pkg/apis/ceph.rook.io/v1.Placement alias) ( Appears on: ClusterSpec ) PlacementSpec is the placement for core ceph daemons part of the CephCluster CRD PoolMirroringInfo ( Appears on: MirroringInfoSpec ) PoolMirroringInfo is the mirroring info of a given pool Field Description mode string (Optional) Mode is the mirroring mode site_name string (Optional) SiteName is the current site name peers []PeersSpec (Optional) Peers are the list of peer sites connected to that cluster PoolMirroringStatus ( Appears on: MirroringStatusSpec ) PoolMirroringStatus is the pool mirror status Field Description summary PoolMirroringStatusSummarySpec (Optional) Summary is the mirroring status summary PoolMirroringStatusSummarySpec ( Appears on: PoolMirroringStatus ) PoolMirroringStatusSummarySpec is the summary output of the command Field Description health string (Optional) Health is the mirroring health daemon_health string (Optional) DaemonHealth is the health of the mirroring daemon image_health string (Optional) ImageHealth is the health of the mirrored image states StatesSpec (Optional) States is the various state for all mirrored images PoolSpec ( Appears on: FilesystemSpec , NamedBlockPoolSpec , NamedPoolSpec , ObjectStoreSpec , ObjectZoneSpec ) PoolSpec represents the spec of ceph pool Field Description failureDomain string (Optional) The failure domain: osd/host/(region or zone if available) - technically also any type in the crush map crushRoot string (Optional) The root of the crush hierarchy utilized by the pool deviceClass string (Optional) The device class the OSD should set to for use in the pool compressionMode string (Optional) DEPRECATED: use Parameters instead, e.g., Parameters[\u201ccompression_mode\u201d] = \u201cforce\u201d The inline compression mode in Bluestore OSD to set to (options are: none, passive, aggressive, force) Do NOT set a default value for kubebuilder as this will override the Parameters replicated ReplicatedSpec (Optional) The replication settings erasureCoded ErasureCodedSpec (Optional) The erasure code settings parameters map[string]string (Optional) Parameters is a list of properties to enable on a given pool enableRBDStats bool EnableRBDStats is used to enable gathering of statistics for all RBD images in the pool mirroring MirroringSpec The mirroring settings statusCheck MirrorHealthCheckSpec The mirroring statusCheck quotas QuotaSpec (Optional) The quota settings PriorityClassNamesSpec ( map[github.com/rook/rook/pkg/apis/ceph.rook.io/v1.KeyType]string alias) ( Appears on: ClusterSpec ) PriorityClassNamesSpec is a map of priority class names to be assigned to components ProbeSpec ( Appears on: MetadataServerSpec , ObjectHealthCheckSpec ) ProbeSpec is a wrapper around Probe so it can be enabled or disabled for a Ceph daemon Field Description disabled bool (Optional) Disabled determines whether probe is disable or not probe Kubernetes core/v1.Probe (Optional) Probe describes a health check to be performed against a container to determine whether it is alive or ready to receive traffic. PullSpec ( Appears on: ObjectRealmSpec ) PullSpec represents the pulling specification of a Ceph Object Storage Gateway Realm Field Description endpoint string QuotaSpec ( Appears on: PoolSpec ) QuotaSpec represents the spec for quotas in a pool Field Description maxBytes uint64 (Optional) MaxBytes represents the quota in bytes Deprecated in favor of MaxSize maxSize string (Optional) MaxSize represents the quota in bytes as a string maxObjects uint64 (Optional) MaxObjects represents the quota in objects RBDMirroringSpec ( Appears on: CephRBDMirror ) RBDMirroringSpec represents the specification of an RBD mirror daemon Field Description count int Count represents the number of rbd mirror instance to run peers MirroringPeerSpec (Optional) Peers represents the peers spec placement Placement (Optional) The affinity to place the rgw pods (default is to place on any available node) annotations Annotations (Optional) The annotations-related configuration to add/set on each Pod related object. labels Labels (Optional) The labels-related configuration to add/set on each Pod related object. resources Kubernetes core/v1.ResourceRequirements (Optional) The resource requirements for the rbd mirror pods priorityClassName string (Optional) PriorityClassName sets priority class on the rbd mirror pods RGWServiceSpec ( Appears on: GatewaySpec ) RGWServiceSpec represent the spec for RGW service Field Description annotations Annotations The annotations-related configuration to add/set on each rgw service. nullable optional ReplicatedSpec ( Appears on: PoolSpec ) ReplicatedSpec represents the spec for replication in a pool Field Description size uint Size - Number of copies per object in a replicated storage pool, including the object itself (required for replicated pool type) targetSizeRatio float64 (Optional) TargetSizeRatio gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity requireSafeReplicaSize bool (Optional) RequireSafeReplicaSize if false allows you to set replica 1 replicasPerFailureDomain uint (Optional) ReplicasPerFailureDomain the number of replica in the specified failure domain subFailureDomain string (Optional) SubFailureDomain the name of the sub-failure domain hybridStorage HybridStorageSpec (Optional) HybridStorage represents hybrid storage tier settings ResourceSpec ( map[string]k8s.io/api/core/v1.ResourceRequirements alias) ( Appears on: ClusterSpec ) ResourceSpec is a collection of ResourceRequirements that describes the compute resource requirements SSSDSidecar ( Appears on: SSSDSpec ) SSSDSidecar represents configuration when SSSD is run in a sidecar. Field Description image string Image defines the container image that should be used for the SSSD sidecar. sssdConfigFile SSSDSidecarConfigFile (Optional) SSSDConfigFile defines where the SSSD configuration should be sourced from. The config file will be placed into /etc/sssd/sssd.conf . If this is left empty, Rook will not add the file. This allows you to manage the sssd.conf file yourself however you wish. For example, you may build it into your custom Ceph container image or use the Vault agent injector to securely add the file via annotations on the CephNFS spec (passed to the NFS server pods). additionalFiles []SSSDSidecarAdditionalFile (Optional) AdditionalFiles defines any number of additional files that should be mounted into the SSSD sidecar. These files may be referenced by the sssd.conf config file. resources Kubernetes core/v1.ResourceRequirements (Optional) Resources allow specifying resource requests/limits on the SSSD sidecar container. debugLevel int (Optional) DebugLevel sets the debug level for SSSD. If unset or set to 0, Rook does nothing. Otherwise, this may be a value between 1 and 10. See SSSD docs for more info: https://sssd.io/troubleshooting/basics.html#sssd-debug-logs SSSDSidecarAdditionalFile ( Appears on: SSSDSidecar ) SSSDSidecarAdditionalFile represents the source from where additional files for the the SSSD configuration should come from and are made available. Field Description subPath string SubPath defines the sub-path in /etc/sssd/rook-additional/ where the additional file(s) will be placed. Each subPath definition must be unique and must not contain \u2018:\u2019. volumeSource ConfigFileVolumeSource VolumeSource accepts a pared down version of the standard Kubernetes VolumeSource for the additional file(s) like what is normally used to configure Volumes for a Pod. Fore example, a ConfigMap, Secret, or HostPath. Each VolumeSource adds one or more additional files to the SSSD sidecar container in the /etc/sssd/rook-additional/ directory. Be aware that some files may need to have a specific file mode like 0600 due to requirements by SSSD for some files. For example, CA or TLS certificates. SSSDSidecarConfigFile ( Appears on: SSSDSidecar ) SSSDSidecarConfigFile represents the source(s) from which the SSSD configuration should come. Field Description volumeSource ConfigFileVolumeSource VolumeSource accepts a pared down version of the standard Kubernetes VolumeSource for the SSSD configuration file like what is normally used to configure Volumes for a Pod. For example, a ConfigMap, Secret, or HostPath. There are two requirements for the source\u2019s content: 1. The config file must be mountable via subPath: sssd.conf . For example, in a ConfigMap, the data item must be named sssd.conf , or items must be defined to select the key and give it path sssd.conf . A HostPath directory must have the sssd.conf file. 2. The volume or config file must have mode 0600. SSSDSpec ( Appears on: NFSSecuritySpec ) SSSDSpec represents configuration for System Security Services Daemon (SSSD). Field Description sidecar SSSDSidecar (Optional) Sidecar tells Rook to run SSSD in a sidecar alongside the NFS-Ganesha server in each NFS pod. SanitizeDataSourceProperty ( string alias) ( Appears on: SanitizeDisksSpec ) SanitizeDataSourceProperty represents a sanitizing data source Value Description \"random\" SanitizeDataSourceRandom uses `shred\u2019s default entropy source \"zero\" SanitizeDataSourceZero uses /dev/zero as sanitize source SanitizeDisksSpec ( Appears on: CleanupPolicySpec ) SanitizeDisksSpec represents a disk sanitizing specification Field Description method SanitizeMethodProperty (Optional) Method is the method we use to sanitize disks dataSource SanitizeDataSourceProperty (Optional) DataSource is the data source to use to sanitize the disk with iteration int32 (Optional) Iteration is the number of pass to apply the sanitizing SanitizeMethodProperty ( string alias) ( Appears on: SanitizeDisksSpec ) SanitizeMethodProperty represents a disk sanitizing method Value Description \"complete\" SanitizeMethodComplete will sanitize everything on the disk \"quick\" SanitizeMethodQuick will sanitize metadata only on the disk SecuritySpec ( Appears on: ClusterSpec , ObjectStoreSecuritySpec ) SecuritySpec is security spec to include various security items such as kms Field Description kms KeyManagementServiceSpec (Optional) KeyManagementService is the main Key Management option keyRotation KeyRotationSpec (Optional) KeyRotation defines options for Key Rotation. Selection ( Appears on: Node , StorageScopeSpec ) Field Description useAllDevices bool (Optional) Whether to consume all the storage devices found on a machine deviceFilter string (Optional) A regular expression to allow more fine-grained selection of devices on nodes across the cluster devicePathFilter string (Optional) A regular expression to allow more fine-grained selection of devices with path names devices []Device (Optional) List of devices to use as storage devices volumeClaimTemplates []Kubernetes core/v1.PersistentVolumeClaim (Optional) PersistentVolumeClaims to use as storage SnapshotSchedule ( Appears on: SnapshotSchedulesSpec ) SnapshotSchedule is a schedule Field Description interval string (Optional) Interval is the interval in which snapshots will be taken start_time string (Optional) StartTime is the snapshot starting time SnapshotScheduleRetentionSpec ( Appears on: FSMirroringSpec ) SnapshotScheduleRetentionSpec is a retention policy Field Description path string (Optional) Path is the path to snapshot duration string (Optional) Duration represents the retention duration for a snapshot SnapshotScheduleSpec ( Appears on: FSMirroringSpec , MirroringSpec ) SnapshotScheduleSpec represents the snapshot scheduling settings of a mirrored pool Field Description path string (Optional) Path is the path to snapshot, only valid for CephFS interval string (Optional) Interval represent the periodicity of the snapshot. startTime string (Optional) StartTime indicates when to start the snapshot SnapshotScheduleStatusSpec ( Appears on: CephBlockPoolStatus ) SnapshotScheduleStatusSpec is the status of the snapshot schedule Field Description snapshotSchedules []SnapshotSchedulesSpec (Optional) SnapshotSchedules is the list of snapshots scheduled lastChecked string (Optional) LastChecked is the last time time the status was checked lastChanged string (Optional) LastChanged is the last time time the status last changed details string (Optional) Details contains potential status errors SnapshotSchedulesSpec ( Appears on: SnapshotScheduleStatusSpec ) SnapshotSchedulesSpec is the list of snapshot scheduled for images in a pool Field Description pool string (Optional) Pool is the pool name namespace string (Optional) Namespace is the RADOS namespace the image is part of image string (Optional) Image is the mirrored image items []SnapshotSchedule (Optional) Items is the list schedules times for a given snapshot StatesSpec ( Appears on: PoolMirroringStatusSummarySpec ) StatesSpec are rbd images mirroring state Field Description starting_replay int (Optional) StartingReplay is when the replay of the mirroring journal starts replaying int (Optional) Replaying is when the replay of the mirroring journal is on-going syncing int (Optional) Syncing is when the image is syncing stopping_replay int (Optional) StopReplaying is when the replay of the mirroring journal stops stopped int (Optional) Stopped is when the mirroring state is stopped unknown int (Optional) Unknown is when the mirroring state is unknown error int (Optional) Error is when the mirroring state is errored Status ( Appears on: CephBucketNotification , CephFilesystemMirror , CephNFS , CephObjectRealm , CephObjectZone , CephObjectZoneGroup , CephRBDMirror ) Status represents the status of an object Field Description phase string (Optional) observedGeneration int64 (Optional) ObservedGeneration is the latest generation observed by the controller. conditions []Condition StatusConditionGetter A StatusConditionGetter allows getting a pointer to an object\u2019s conditions. StorageClassDeviceSet ( Appears on: StorageScopeSpec ) StorageClassDeviceSet is a storage class device set Field Description name string Name is a unique identifier for the set count int Count is the number of devices in this set resources Kubernetes core/v1.ResourceRequirements (Optional) placement Placement (Optional) preparePlacement Placement (Optional) config map[string]string (Optional) Provider-specific device configuration volumeClaimTemplates []Kubernetes core/v1.PersistentVolumeClaim VolumeClaimTemplates is a list of PVC templates for the underlying storage devices portable bool (Optional) Portable represents OSD portability across the hosts tuneDeviceClass bool (Optional) TuneSlowDeviceClass Tune the OSD when running on a slow Device Class tuneFastDeviceClass bool (Optional) TuneFastDeviceClass Tune the OSD when running on a fast Device Class schedulerName string (Optional) Scheduler name for OSD pod placement encrypted bool (Optional) Whether to encrypt the deviceSet StorageScopeSpec ( Appears on: ClusterSpec ) Field Description nodes []Node (Optional) useAllNodes bool (Optional) onlyApplyOSDPlacement bool (Optional) config map[string]string (Optional) Selection Selection (Members of Selection are embedded into this type.) storageClassDeviceSets []StorageClassDeviceSet (Optional) store OSDStore (Optional) StoreType ( string alias) Value Description \"bluestore\" StoreTypeBlueStore is the bluestore backend storage for OSDs \"bluestore-rdr\" StoreTypeBlueStoreRDR is the bluestore-rdr backed storage for OSDs StretchClusterSpec ( Appears on: MonSpec ) StretchClusterSpec represents the specification of a stretched Ceph Cluster Field Description failureDomainLabel string (Optional) FailureDomainLabel the failure domain name (e,g: zone) subFailureDomain string (Optional) SubFailureDomain is the failure domain within a zone zones []MonZoneSpec (Optional) Zones is the list of zones TopicEndpointSpec ( Appears on: BucketTopicSpec ) TopicEndpointSpec contains exactly one of the endpoint specs of a Bucket Topic Field Description http HTTPEndpointSpec (Optional) Spec of HTTP endpoint amqp AMQPEndpointSpec (Optional) Spec of AMQP endpoint kafka KafkaEndpointSpec (Optional) Spec of Kafka endpoint ZoneSpec ( Appears on: ObjectStoreSpec ) ZoneSpec represents a Ceph Object Store Gateway Zone specification Field Description name string RGW Zone the Object Store is in Generated with gen-crd-api-reference-docs .","title":"Specification"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/","text":"Rook allows creation and customization of storage pools through the custom resource definitions (CRDs). The following settings are available for pools. Examples \u00b6 Replicated \u00b6 For optimal performance, while also adding redundancy, this sample will configure Ceph to make three full copies of the data on multiple nodes. Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because the failureDomain is set to host and the replicated.size is set to 3 . 1 2 3 4 5 6 7 8 9 10 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 deviceClass : hdd Hybrid Storage Pools \u00b6 Hybrid storage is a combination of two different storage tiers. For example, SSD and HDD. This helps to improve the read performance of cluster by placing, say, 1st copy of data on the higher performance tier (SSD or NVME) and remaining replicated copies on lower cost tier (HDDs). WARNING Hybrid storage pools are likely to suffer from lower availability if a node goes down. The data across the two tiers may actually end up on the same node, instead of being spread across unique nodes (or failure domains) as expected. Instead of using hybrid pools, consider configuring primary affinity from the toolbox. 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 hybridStorage : primaryDeviceClass : ssd secondaryDeviceClass : hdd Important The device classes primaryDeviceClass and secondaryDeviceClass must have at least one OSD associated with them or else the pool creation will fail. Erasure Coded \u00b6 This sample will lower the overall storage capacity requirement, while also adding redundancy by using erasure coding . Note This sample requires at least 3 bluestore OSDs . The OSDs can be located on a single Ceph node or spread across multiple nodes, because the failureDomain is set to osd and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : ecpool namespace : rook-ceph spec : failureDomain : osd erasureCoded : dataChunks : 2 codingChunks : 1 deviceClass : hdd High performance applications typically will not use erasure coding due to the performance overhead of creating and distributing the chunks in the cluster. When creating an erasure-coded pool, it is highly recommended to create the pool when you have bluestore OSDs in your cluster (see the OSD configuration settings . Filestore OSDs have limitations that are unsafe and lower performance. Mirroring \u00b6 RADOS Block Device (RBD) mirroring is a process of asynchronous replication of Ceph block device images between two or more Ceph clusters. Mirroring ensures point-in-time consistent replicas of all changes to an image, including reads and writes, block device resizing, snapshots, clones and flattening. It is generally useful when planning for Disaster Recovery. Mirroring is for clusters that are geographically distributed and stretching a single cluster is not possible due to high latencies. The following will enable mirroring of the pool at the image level: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : replicated : size : 3 mirroring : enabled : true mode : image # schedule(s) of snapshot snapshotSchedules : - interval : 24h # daily snapshots startTime : 14:00:00-05:00 Once mirroring is enabled, Rook will by default create its own bootstrap peer token so that it can be used by another cluster. The bootstrap peer token can be found in a Kubernetes Secret. The name of the Secret is present in the Status field of the CephBlockPool CR: 1 2 3 status : info : rbdMirrorBootstrapPeerSecretName : pool-peer-token-replicapool This secret can then be fetched like so: 1 2 kubectl get secret -n rook-ceph pool-peer-token-replicapool -o jsonpath='{.data.token}'|base64 -d eyJmc2lkIjoiOTFlYWUwZGQtMDZiMS00ZDJjLTkxZjMtMTMxMWM5ZGYzODJiIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFEN1psOWZ3V1VGRHhBQWdmY0gyZi8xeUhYeGZDUTU5L1N0NEE9PSIsIm1vbl9ob3N0IjoiW3YyOjEwLjEwMS4xOC4yMjM6MzMwMCx2MToxMC4xMDEuMTguMjIzOjY3ODldIn0= The secret must be decoded. The result will be another base64 encoded blob that you will import in the destination cluster: 1 external-cluster-console # rbd mirror pool peer bootstrap import  See the official rbd mirror documentation on how to add a bootstrap peer . Data spread across subdomains \u00b6 Imagine the following topology with datacenters containing racks and then hosts: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 . \u251c\u2500\u2500 datacenter-1 \u2502 \u251c\u2500\u2500 rack-1 \u2502 \u2502 \u251c\u2500\u2500 host-1 \u2502 \u2502 \u251c\u2500\u2500 host-2 \u2502 \u2514\u2500\u2500 rack-2 \u2502 \u251c\u2500\u2500 host-3 \u2502 \u251c\u2500\u2500 host-4 \u2514\u2500\u2500 datacenter-2 \u251c\u2500\u2500 rack-3 \u2502 \u251c\u2500\u2500 host-5 \u2502 \u251c\u2500\u2500 host-6 \u2514\u2500\u2500 rack-4 \u251c\u2500\u2500 host-7 \u2514\u2500\u2500 host-8 As an administrator I would like to place 4 copies across both datacenter where each copy inside a datacenter is across a rack. This can be achieved by the following: 1 2 3 4 5 6 7 8 9 10 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : replicated : size : 4 replicasPerFailureDomain : 2 subFailureDomain : rack Pool Settings \u00b6 Metadata \u00b6 name : The name of the pool to create. namespace : The namespace of the Rook cluster where the pool is created. Spec \u00b6 replicated : Settings for a replicated pool. If specified, erasureCoded settings must not be specified. size : The desired number of copies to make of the data in the pool. requireSafeReplicaSize : set to false if you want to create a pool with size 1, setting pool size 1 could lead to data loss without recovery. Make sure you are ABSOLUTELY CERTAIN that is what you want. replicasPerFailureDomain : Sets up the number of replicas to place in a given failure domain. For instance, if the failure domain is a datacenter (cluster is stretched) then you will have 2 replicas per datacenter where each replica ends up on a different host. This gives you a total of 4 replicas and for this, the size must be set to 4. The default is 1. subFailureDomain : Name of the CRUSH bucket representing a sub-failure domain. In a stretched configuration this option represent the \"last\" bucket where replicas will end up being written. Imagine the cluster is stretched across two datacenters, you can then have 2 copies per datacenter and each copy on a different CRUSH bucket. The default is \"host\". erasureCoded : Settings for an erasure-coded pool. If specified, replicated settings must not be specified. See below for more details on erasure coding . dataChunks : Number of chunks to divide the original object into codingChunks : Number of coding chunks to generate failureDomain : The failure domain across which the data will be spread. This can be set to a value of either osd or host , with host being the default setting. A failure domain can also be set to a different type (e.g. rack ), if the OSDs are created on nodes with the supported topology labels . If the failureDomain is changed on the pool, the operator will create a new CRUSH rule and update the pool. If a replicated pool of size 3 is configured and the failureDomain is set to host , all three copies of the replicated data will be placed on OSDs located on 3 different Ceph hosts. This case is guaranteed to tolerate a failure of two hosts without a loss of data. Similarly, a failure domain set to osd , can tolerate a loss of two OSD devices. If erasure coding is used, the data and coding chunks are spread across the configured failure domain. Caution Neither Rook, nor Ceph, prevent the creation of a cluster where the replicated data (or Erasure Coded chunks) can be written safely. By design, Ceph will delay checking for suitable OSDs until a write request is made and this write can hang if there are not sufficient OSDs to satisfy the request. deviceClass : Sets up the CRUSH rule for the pool to distribute data only on the specified device class. If left empty or unspecified, the pool will use the cluster's default CRUSH root, which usually distributes data over all OSDs, regardless of their class. If deviceClass is specified on any pool, ensure that it is added to every pool in the cluster, otherwise Ceph will warn about pools with overlapping roots. crushRoot : The root in the crush map to be used by the pool. If left empty or unspecified, the default root will be used. Creating a crush hierarchy for the OSDs currently requires the Rook toolbox to run the Ceph tools described here . enableRBDStats : Enables collecting RBD per-image IO statistics by enabling dynamic OSD performance counters. Defaults to false. For more info see the ceph documentation . name : The name of Ceph pools is based on the metadata.name of the CephBlockPool CR. Some built-in Ceph pools require names that are incompatible with K8s resource names. These special pools can be configured by setting this name to override the name of the Ceph pool that is created instead of using the metadata.name for the pool. Only the following pool names are supported: device_health_metrics , .nfs , and .mgr . See the example builtin mgr pool . parameters : Sets any parameters listed to the given pool target_size_ratio: gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool, for more info see the ceph documentation compression_mode : Sets up the pool for inline compression when using a Bluestore OSD. If left unspecified does not setup any compression mode for the pool. Values supported are the same as Bluestore inline compression modes , such as none , passive , aggressive , and force . mirroring : Sets up mirroring of the pool enabled : whether mirroring is enabled on that pool (default: false) mode : mirroring mode to run, possible values are \"pool\" or \"image\" (required). Refer to the mirroring modes Ceph documentation for more details. snapshotSchedules : schedule(s) snapshot at the pool level. One or more schedules are supported. interval : frequency of the snapshots. The interval can be specified in days, hours, or minutes using d, h, m suffix respectively. startTime : optional, determines at what time the snapshot process starts, specified using the ISO 8601 time format. peers : to configure mirroring peers. See the prerequisite RBD Mirror documentation first. secretNames : a list of peers to connect to. Currently only a single peer is supported where a peer represents a Ceph cluster. statusCheck : Sets up pool mirroring status mirror : displays the mirroring status disabled : whether to enable or disable pool mirroring status interval : time interval to refresh the mirroring status (default 60s) quotas : Set byte and object quotas. See the ceph documentation for more info. maxSize : quota in bytes as a string with quantity suffixes (e.g. \"10Gi\") maxObjects : quota in objects as an integer Note A value of 0 disables the quota. Add specific pool properties \u00b6 With poolProperties you can set any pool property: 1 2 3 spec : parameters :  :  For instance: 1 2 3 spec : parameters : min_size : 1 Erasure Coding \u00b6 Erasure coding allows you to keep your data safe while reducing the storage overhead. Instead of creating multiple replicas of the data, erasure coding divides the original data into chunks of equal size, then generates extra chunks of that same size for redundancy. For example, if you have an object of size 2MB, the simplest erasure coding with two data chunks would divide the object into two chunks of size 1MB each (data chunks). One more chunk (coding chunk) of size 1MB will be generated. In total, 3MB will be stored in the cluster. The object will be able to suffer the loss of any one of the chunks and still be able to reconstruct the original object. The number of data and coding chunks you choose will depend on your resiliency to loss and how much storage overhead is acceptable in your storage cluster. Here are some examples to illustrate how the number of chunks affects the storage and loss toleration. Data chunks (k) Coding chunks (m) Total storage Losses Tolerated OSDs required 2 1 1.5x 1 3 2 2 2x 2 4 4 2 1.5x 2 6 16 4 1.25x 4 20 The failureDomain must be also be taken into account when determining the number of chunks. The failure domain determines the level in the Ceph CRUSH hierarchy where the chunks must be uniquely distributed. This decision will impact whether node losses or disk losses are tolerated. There could also be performance differences of placing the data across nodes or osds. host : All chunks will be placed on unique hosts osd : All chunks will be placed on unique OSDs If you do not have a sufficient number of hosts or OSDs for unique placement the pool can be created, writing to the pool will hang. Rook currently only configures two levels in the CRUSH map. It is also possible to configure other levels such as rack with by adding topology labels to the nodes.","title":"CephBlockPool CRD"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#examples","text":"","title":"Examples"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#replicated","text":"For optimal performance, while also adding redundancy, this sample will configure Ceph to make three full copies of the data on multiple nodes. Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because the failureDomain is set to host and the replicated.size is set to 3 . 1 2 3 4 5 6 7 8 9 10 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 deviceClass : hdd","title":"Replicated"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#hybrid-storage-pools","text":"Hybrid storage is a combination of two different storage tiers. For example, SSD and HDD. This helps to improve the read performance of cluster by placing, say, 1st copy of data on the higher performance tier (SSD or NVME) and remaining replicated copies on lower cost tier (HDDs). WARNING Hybrid storage pools are likely to suffer from lower availability if a node goes down. The data across the two tiers may actually end up on the same node, instead of being spread across unique nodes (or failure domains) as expected. Instead of using hybrid pools, consider configuring primary affinity from the toolbox. 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 hybridStorage : primaryDeviceClass : ssd secondaryDeviceClass : hdd Important The device classes primaryDeviceClass and secondaryDeviceClass must have at least one OSD associated with them or else the pool creation will fail.","title":"Hybrid Storage Pools"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#erasure-coded","text":"This sample will lower the overall storage capacity requirement, while also adding redundancy by using erasure coding . Note This sample requires at least 3 bluestore OSDs . The OSDs can be located on a single Ceph node or spread across multiple nodes, because the failureDomain is set to osd and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : ecpool namespace : rook-ceph spec : failureDomain : osd erasureCoded : dataChunks : 2 codingChunks : 1 deviceClass : hdd High performance applications typically will not use erasure coding due to the performance overhead of creating and distributing the chunks in the cluster. When creating an erasure-coded pool, it is highly recommended to create the pool when you have bluestore OSDs in your cluster (see the OSD configuration settings . Filestore OSDs have limitations that are unsafe and lower performance.","title":"Erasure Coded"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#mirroring","text":"RADOS Block Device (RBD) mirroring is a process of asynchronous replication of Ceph block device images between two or more Ceph clusters. Mirroring ensures point-in-time consistent replicas of all changes to an image, including reads and writes, block device resizing, snapshots, clones and flattening. It is generally useful when planning for Disaster Recovery. Mirroring is for clusters that are geographically distributed and stretching a single cluster is not possible due to high latencies. The following will enable mirroring of the pool at the image level: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : replicated : size : 3 mirroring : enabled : true mode : image # schedule(s) of snapshot snapshotSchedules : - interval : 24h # daily snapshots startTime : 14:00:00-05:00 Once mirroring is enabled, Rook will by default create its own bootstrap peer token so that it can be used by another cluster. The bootstrap peer token can be found in a Kubernetes Secret. The name of the Secret is present in the Status field of the CephBlockPool CR: 1 2 3 status : info : rbdMirrorBootstrapPeerSecretName : pool-peer-token-replicapool This secret can then be fetched like so: 1 2 kubectl get secret -n rook-ceph pool-peer-token-replicapool -o jsonpath='{.data.token}'|base64 -d eyJmc2lkIjoiOTFlYWUwZGQtMDZiMS00ZDJjLTkxZjMtMTMxMWM5ZGYzODJiIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFEN1psOWZ3V1VGRHhBQWdmY0gyZi8xeUhYeGZDUTU5L1N0NEE9PSIsIm1vbl9ob3N0IjoiW3YyOjEwLjEwMS4xOC4yMjM6MzMwMCx2MToxMC4xMDEuMTguMjIzOjY3ODldIn0= The secret must be decoded. The result will be another base64 encoded blob that you will import in the destination cluster: 1 external-cluster-console # rbd mirror pool peer bootstrap import  See the official rbd mirror documentation on how to add a bootstrap peer .","title":"Mirroring"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#data-spread-across-subdomains","text":"Imagine the following topology with datacenters containing racks and then hosts: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 . \u251c\u2500\u2500 datacenter-1 \u2502 \u251c\u2500\u2500 rack-1 \u2502 \u2502 \u251c\u2500\u2500 host-1 \u2502 \u2502 \u251c\u2500\u2500 host-2 \u2502 \u2514\u2500\u2500 rack-2 \u2502 \u251c\u2500\u2500 host-3 \u2502 \u251c\u2500\u2500 host-4 \u2514\u2500\u2500 datacenter-2 \u251c\u2500\u2500 rack-3 \u2502 \u251c\u2500\u2500 host-5 \u2502 \u251c\u2500\u2500 host-6 \u2514\u2500\u2500 rack-4 \u251c\u2500\u2500 host-7 \u2514\u2500\u2500 host-8 As an administrator I would like to place 4 copies across both datacenter where each copy inside a datacenter is across a rack. This can be achieved by the following: 1 2 3 4 5 6 7 8 9 10 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : replicated : size : 4 replicasPerFailureDomain : 2 subFailureDomain : rack","title":"Data spread across subdomains"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#pool-settings","text":"","title":"Pool Settings"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#metadata","text":"name : The name of the pool to create. namespace : The namespace of the Rook cluster where the pool is created.","title":"Metadata"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#spec","text":"replicated : Settings for a replicated pool. If specified, erasureCoded settings must not be specified. size : The desired number of copies to make of the data in the pool. requireSafeReplicaSize : set to false if you want to create a pool with size 1, setting pool size 1 could lead to data loss without recovery. Make sure you are ABSOLUTELY CERTAIN that is what you want. replicasPerFailureDomain : Sets up the number of replicas to place in a given failure domain. For instance, if the failure domain is a datacenter (cluster is stretched) then you will have 2 replicas per datacenter where each replica ends up on a different host. This gives you a total of 4 replicas and for this, the size must be set to 4. The default is 1. subFailureDomain : Name of the CRUSH bucket representing a sub-failure domain. In a stretched configuration this option represent the \"last\" bucket where replicas will end up being written. Imagine the cluster is stretched across two datacenters, you can then have 2 copies per datacenter and each copy on a different CRUSH bucket. The default is \"host\". erasureCoded : Settings for an erasure-coded pool. If specified, replicated settings must not be specified. See below for more details on erasure coding . dataChunks : Number of chunks to divide the original object into codingChunks : Number of coding chunks to generate failureDomain : The failure domain across which the data will be spread. This can be set to a value of either osd or host , with host being the default setting. A failure domain can also be set to a different type (e.g. rack ), if the OSDs are created on nodes with the supported topology labels . If the failureDomain is changed on the pool, the operator will create a new CRUSH rule and update the pool. If a replicated pool of size 3 is configured and the failureDomain is set to host , all three copies of the replicated data will be placed on OSDs located on 3 different Ceph hosts. This case is guaranteed to tolerate a failure of two hosts without a loss of data. Similarly, a failure domain set to osd , can tolerate a loss of two OSD devices. If erasure coding is used, the data and coding chunks are spread across the configured failure domain. Caution Neither Rook, nor Ceph, prevent the creation of a cluster where the replicated data (or Erasure Coded chunks) can be written safely. By design, Ceph will delay checking for suitable OSDs until a write request is made and this write can hang if there are not sufficient OSDs to satisfy the request. deviceClass : Sets up the CRUSH rule for the pool to distribute data only on the specified device class. If left empty or unspecified, the pool will use the cluster's default CRUSH root, which usually distributes data over all OSDs, regardless of their class. If deviceClass is specified on any pool, ensure that it is added to every pool in the cluster, otherwise Ceph will warn about pools with overlapping roots. crushRoot : The root in the crush map to be used by the pool. If left empty or unspecified, the default root will be used. Creating a crush hierarchy for the OSDs currently requires the Rook toolbox to run the Ceph tools described here . enableRBDStats : Enables collecting RBD per-image IO statistics by enabling dynamic OSD performance counters. Defaults to false. For more info see the ceph documentation . name : The name of Ceph pools is based on the metadata.name of the CephBlockPool CR. Some built-in Ceph pools require names that are incompatible with K8s resource names. These special pools can be configured by setting this name to override the name of the Ceph pool that is created instead of using the metadata.name for the pool. Only the following pool names are supported: device_health_metrics , .nfs , and .mgr . See the example builtin mgr pool . parameters : Sets any parameters listed to the given pool target_size_ratio: gives a hint (%) to Ceph in terms of expected consumption of the total cluster capacity of a given pool, for more info see the ceph documentation compression_mode : Sets up the pool for inline compression when using a Bluestore OSD. If left unspecified does not setup any compression mode for the pool. Values supported are the same as Bluestore inline compression modes , such as none , passive , aggressive , and force . mirroring : Sets up mirroring of the pool enabled : whether mirroring is enabled on that pool (default: false) mode : mirroring mode to run, possible values are \"pool\" or \"image\" (required). Refer to the mirroring modes Ceph documentation for more details. snapshotSchedules : schedule(s) snapshot at the pool level. One or more schedules are supported. interval : frequency of the snapshots. The interval can be specified in days, hours, or minutes using d, h, m suffix respectively. startTime : optional, determines at what time the snapshot process starts, specified using the ISO 8601 time format. peers : to configure mirroring peers. See the prerequisite RBD Mirror documentation first. secretNames : a list of peers to connect to. Currently only a single peer is supported where a peer represents a Ceph cluster. statusCheck : Sets up pool mirroring status mirror : displays the mirroring status disabled : whether to enable or disable pool mirroring status interval : time interval to refresh the mirroring status (default 60s) quotas : Set byte and object quotas. See the ceph documentation for more info. maxSize : quota in bytes as a string with quantity suffixes (e.g. \"10Gi\") maxObjects : quota in objects as an integer Note A value of 0 disables the quota.","title":"Spec"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#add-specific-pool-properties","text":"With poolProperties you can set any pool property: 1 2 3 spec : parameters :  :  For instance: 1 2 3 spec : parameters : min_size : 1","title":"Add specific pool properties"},{"location":"CRDs/Block-Storage/ceph-block-pool-crd/#erasure-coding","text":"Erasure coding allows you to keep your data safe while reducing the storage overhead. Instead of creating multiple replicas of the data, erasure coding divides the original data into chunks of equal size, then generates extra chunks of that same size for redundancy. For example, if you have an object of size 2MB, the simplest erasure coding with two data chunks would divide the object into two chunks of size 1MB each (data chunks). One more chunk (coding chunk) of size 1MB will be generated. In total, 3MB will be stored in the cluster. The object will be able to suffer the loss of any one of the chunks and still be able to reconstruct the original object. The number of data and coding chunks you choose will depend on your resiliency to loss and how much storage overhead is acceptable in your storage cluster. Here are some examples to illustrate how the number of chunks affects the storage and loss toleration. Data chunks (k) Coding chunks (m) Total storage Losses Tolerated OSDs required 2 1 1.5x 1 3 2 2 2x 2 4 4 2 1.5x 2 6 16 4 1.25x 4 20 The failureDomain must be also be taken into account when determining the number of chunks. The failure domain determines the level in the Ceph CRUSH hierarchy where the chunks must be uniquely distributed. This decision will impact whether node losses or disk losses are tolerated. There could also be performance differences of placing the data across nodes or osds. host : All chunks will be placed on unique hosts osd : All chunks will be placed on unique OSDs If you do not have a sufficient number of hosts or OSDs for unique placement the pool can be created, writing to the pool will hang. Rook currently only configures two levels in the CRUSH map. It is also possible to configure other levels such as rack with by adding topology labels to the nodes.","title":"Erasure Coding"},{"location":"CRDs/Block-Storage/ceph-block-pool-rados-namespace-crd/","text":"This guide assumes you have created a Rook cluster as explained in the main Quickstart guide RADOS currently uses pools both for data distribution (pools are shared into PGs, which map to OSDs) and as the granularity for security (capabilities can restrict access by pool). Overloading pools for both purposes makes it hard to do multi-tenancy because it not a good idea to have a very large number of pools. A namespace would be a division of a pool into separate logical namespaces. For more information about BlockPool and namespace refer to the Ceph docs Having multiple namespaces in a pool would allow multiple Kubernetes clusters to share one unique ceph cluster without creating a pool per kubernetes cluster and it will also allow to have tenant isolation between multiple tenants in a single Kubernetes cluster without creating multiple pools for tenants. Rook allows creation of Ceph BlockPool RadosNamespaces through the custom resource definitions (CRDs). Example \u00b6 To get you started, here is a simple example of a CR to create a CephBlockPoolRadosNamespace on the CephBlockPool \"replicapool\". 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephBlockPoolRadosNamespace metadata : name : namespace-a namespace : rook-ceph # namespace:cluster spec : # The name of the CephBlockPool CR where the namespace is created. blockPoolName : replicapool Settings \u00b6 If any setting is unspecified, a suitable default will be used automatically. Metadata \u00b6 name : The name that will be used for the Ceph BlockPool rados namespace. Spec \u00b6 blockPoolName : The metadata name of the CephBlockPool CR where the rados namespace will be created.","title":"CephBlockPoolRados Namespace CRD"},{"location":"CRDs/Block-Storage/ceph-block-pool-rados-namespace-crd/#example","text":"To get you started, here is a simple example of a CR to create a CephBlockPoolRadosNamespace on the CephBlockPool \"replicapool\". 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephBlockPoolRadosNamespace metadata : name : namespace-a namespace : rook-ceph # namespace:cluster spec : # The name of the CephBlockPool CR where the namespace is created. blockPoolName : replicapool","title":"Example"},{"location":"CRDs/Block-Storage/ceph-block-pool-rados-namespace-crd/#settings","text":"If any setting is unspecified, a suitable default will be used automatically.","title":"Settings"},{"location":"CRDs/Block-Storage/ceph-block-pool-rados-namespace-crd/#metadata","text":"name : The name that will be used for the Ceph BlockPool rados namespace.","title":"Metadata"},{"location":"CRDs/Block-Storage/ceph-block-pool-rados-namespace-crd/#spec","text":"blockPoolName : The metadata name of the CephBlockPool CR where the rados namespace will be created.","title":"Spec"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/","text":"Rook allows creation and updating rbd-mirror daemon(s) through the custom resource definitions (CRDs). RBD images can be asynchronously mirrored between two Ceph clusters. For more information about user management and capabilities see the Ceph docs . Creating daemons \u00b6 To get you started, here is a simple example of a CRD to deploy an rbd-mirror daemon. 1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephRBDMirror metadata : name : my-rbd-mirror namespace : rook-ceph spec : count : 1 Prerequisites \u00b6 This guide assumes you have created a Rook cluster as explained in the main Quickstart guide Settings \u00b6 If any setting is unspecified, a suitable default will be used automatically. RBDMirror metadata \u00b6 name : The name that will be used for the Ceph RBD Mirror daemon. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace. RBDMirror Settings \u00b6 count : The number of rbd mirror instance to run. placement : The rbd mirror pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD .. annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. resources : The resource requirements for the rbd mirror pods. priorityClassName : The priority class to set on the rbd mirror pods. Configuring mirroring peers \u00b6 Configure mirroring peers individually for each CephBlockPool. Refer to the CephBlockPool documentation for more detail.","title":"CephRBDMirror CRD"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#creating-daemons","text":"To get you started, here is a simple example of a CRD to deploy an rbd-mirror daemon. 1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephRBDMirror metadata : name : my-rbd-mirror namespace : rook-ceph spec : count : 1","title":"Creating daemons"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#prerequisites","text":"This guide assumes you have created a Rook cluster as explained in the main Quickstart guide","title":"Prerequisites"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#settings","text":"If any setting is unspecified, a suitable default will be used automatically.","title":"Settings"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#rbdmirror-metadata","text":"name : The name that will be used for the Ceph RBD Mirror daemon. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace.","title":"RBDMirror metadata"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#rbdmirror-settings","text":"count : The number of rbd mirror instance to run. placement : The rbd mirror pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD .. annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. resources : The resource requirements for the rbd mirror pods. priorityClassName : The priority class to set on the rbd mirror pods.","title":"RBDMirror Settings"},{"location":"CRDs/Block-Storage/ceph-rbd-mirror-crd/#configuring-mirroring-peers","text":"Configure mirroring peers individually for each CephBlockPool. Refer to the CephBlockPool documentation for more detail.","title":"Configuring mirroring peers"},{"location":"CRDs/Cluster/ceph-cluster-crd/","text":"Rook allows creation and customization of storage clusters through the custom resource definitions (CRDs). There are primarily four different modes in which to create your cluster. Host Storage Cluster : Consume storage from host paths and raw devices PVC Storage Cluster : Dynamically provision storage underneath Rook by specifying the storage class Rook should use to consume storage (via PVCs) Stretched Storage Cluster : Distribute Ceph mons across three zones, while storage (OSDs) is only configured in two zones External Ceph Cluster : Connect your K8s applications to an external Ceph cluster See the separate topics for a description and examples of each of these scenarios. Settings \u00b6 Settings can be specified at the global level to apply to the cluster as a whole, while other settings can be specified at more fine-grained levels. If any setting is unspecified, a suitable default will be used automatically. Cluster metadata \u00b6 name : The name that will be used internally for the Ceph cluster. Most commonly the name is the same as the namespace since multiple clusters are not supported in the same namespace. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace. The common scenario is to create a single Rook cluster. If multiple clusters are created, they must not have conflicting devices or host paths. Cluster Settings \u00b6 external : enable : if true , the cluster will not be managed by Rook but via an external entity. This mode is intended to connect to an existing cluster. In this case, Rook will only consume the external cluster. However, Rook will be able to deploy various daemons in Kubernetes such as object gateways, mds and nfs if an image is provided and will refuse otherwise. If this setting is enabled all the other options will be ignored except cephVersion.image and dataDirHostPath . See external cluster configuration . If cephVersion.image is left blank, Rook will refuse the creation of extra CRs like object, file and nfs. cephVersion : The version information for launching the ceph daemons. image : The image used for running the ceph daemons. For example, quay.io/ceph/ceph:v16.2.11 or v17.2.6 . For more details read the container images section . For the latest ceph images, see the Ceph DockerHub . To ensure a consistent version of the image is running across all nodes in the cluster, it is recommended to use a very specific image version. Tags also exist that would give the latest version, but they are only recommended for test environments. For example, the tag v17 will be updated each time a new Quincy build is released. Using the v17 tag is not recommended in production because it may lead to inconsistent versions of the image running across different nodes in the cluster. allowUnsupported : If true , allow an unsupported major version of the Ceph release. Currently pacific and quincy are supported. Future versions such as reef (v18) would require this to be set to true . Should be set to false in production. imagePullPolicy : The image pull policy for the ceph daemon pods. Possible values are Always , IfNotPresent , and Never . The default is IfNotPresent . dataDirHostPath : The path on the host ( hostPath ) where config and data should be stored for each of the services. If the directory does not exist, it will be created. Because this directory persists on the host, it will remain after pods are deleted. Following paths and any of their subpaths must not be used : /etc/ceph , /rook or /var/log/ceph . WARNING : For test scenarios, if you delete a cluster and start a new cluster on the same hosts, the path used by dataDirHostPath must be deleted. Otherwise, stale keys and other config will remain from the previous cluster and the new mons will fail to start. If this value is empty, each pod will get an ephemeral directory to store their config files that is tied to the lifetime of the pod running on that node. More details can be found in the Kubernetes empty dir docs . skipUpgradeChecks : if set to true Rook won't perform any upgrade checks on Ceph daemons during an upgrade. Use this at YOUR OWN RISK , only if you know what you're doing. To understand Rook's upgrade process of Ceph, read the upgrade doc . continueUpgradeAfterChecksEvenIfNotHealthy : if set to true Rook will continue the OSD daemon upgrade process even if the PGs are not clean, or continue with the MDS upgrade even the file system is not healthy. dashboard : Settings for the Ceph dashboard. To view the dashboard in your browser see the dashboard guide . enabled : Whether to enable the dashboard to view cluster status urlPrefix : Allows to serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy) port : Allows to change the default port where the dashboard is served ssl : Whether to serve the dashboard via SSL, ignored on Ceph versions older than 13.2.2 monitoring : Settings for monitoring Ceph using Prometheus. To enable monitoring on your cluster see the monitoring guide . enabled : Whether to enable the prometheus service monitor for an internal cluster. For an external cluster, whether to create an endpoint port for the metrics. Default is false. metricsDisabled : Whether to disable the metrics reported by Ceph. If false, the prometheus mgr module and Ceph exporter are enabled. If true, the prometheus mgr module and Ceph exporter are both disabled. Default is false. externalMgrEndpoints : external cluster manager endpoints externalMgrPrometheusPort : external prometheus manager module port. See external cluster configuration for more details. port : The internal prometheus manager module port where the prometheus mgr module listens. The port may need to be configured when host networking is enabled. interval : The interval for the prometheus module to to scrape targets. network : For the network settings for the cluster, refer to the network configuration settings mon : contains mon related options mon settings For more details on the mons and when to choose a number other than 3 , see the mon health doc . mgr : manager top level section count : set number of ceph managers between 1 to 2 . The default value is 2. If there are two managers, it is important for all mgr services point to the active mgr and not the standby mgr. Rook automatically updates the label mgr_role on the mgr pods to be either active or standby . Therefore, services need just to add the label mgr_role=active to their selector to point to the active mgr. This applies to all services that rely on the ceph mgr such as the dashboard or the prometheus metrics collector. modules : is the list of Ceph manager modules to enable crashCollector : The settings for crash collector daemon(s). disable : is set to true , the crash collector will not run on any node where a Ceph daemon runs daysToRetain : specifies the number of days to keep crash entries in the Ceph cluster. By default the entries are kept indefinitely. logCollector : The settings for log collector daemon. enabled : if set to true , the log collector will run as a side-car next to each Ceph daemon. The Ceph configuration option log_to_file will be turned on, meaning Ceph daemons will log on files in addition to still logging to container's stdout. These logs will be rotated. In case a daemon terminates with a segfault, the coredump files will be commonly be generated in /var/lib/systemd/coredump directory on the host, depending on the underlying OS location. (default: true ) periodicity : how often to rotate daemon's log. (default: 24h). Specified with a time suffix which may be h for hours or d for days. Rotating too often will slightly impact the daemon's performance since the signal briefly interrupts the program. annotations : annotations configuration settings labels : labels configuration settings placement : placement configuration settings resources : resources configuration settings priorityClassNames : priority class names configuration settings storage : Storage selection and configuration that will be used across the cluster. Note that these settings can be overridden for specific nodes. useAllNodes : true or false , indicating if all nodes in the cluster should be used for storage according to the cluster level storage selection and configuration values. If individual nodes are specified under the nodes field, then useAllNodes must be set to false . nodes : Names of individual nodes in the cluster that should have their storage included in accordance with either the cluster level configuration specified above or any node specific overrides described in the next section below. useAllNodes must be set to false to use specific nodes and their config. See node settings below. config : Config settings applied to all OSDs on the node unless overridden by devices . See the config settings below. storage selection settings Storage Class Device Sets onlyApplyOSDPlacement : Whether the placement specific for OSDs is merged with the all placement. If false , the OSD placement will be merged with the all placement. If true, the OSD placement will be applied and the all placement will be ignored. The placement for OSDs is computed from several different places depending on the type of OSD: For non-PVCs: placement.all and placement.osd For PVCs: placement.all and inside the storageClassDeviceSets from the placement or preparePlacement disruptionManagement : The section for configuring management of daemon disruptions managePodBudgets : if true , the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically via the strategy outlined in the design . The operator will block eviction of OSDs by default and unblock them safely when drains are detected. osdMaintenanceTimeout : is a duration in minutes that determines how long an entire failureDomain like region/zone/host will be held in noout (in addition to the default DOWN/OUT interval) when it is draining. This is only relevant when managePodBudgets is true . The default value is 30 minutes. removeOSDsIfOutAndSafeToRemove : If true the operator will remove the OSDs that are down and whose data has been restored to other OSDs. In Ceph terms, the OSDs are out and safe-to-destroy when they are removed. cleanupPolicy : cleanup policy settings security : security page for key management configuration Ceph container images \u00b6 Official releases of Ceph Container images are available from Docker Hub . These are general purpose Ceph container with all necessary daemons and dependencies installed. TAG MEANING vRELNUM Latest release in this series (e.g., v17 = Quincy) vRELNUM.Y Latest stable release in this stable series (e.g., v17.2) vRELNUM.Y.Z A specific release (e.g., v17.2.6) vRELNUM.Y.Z-YYYYMMDD A specific build (e.g., v17.2.6-20230410) A specific will contain a specific release of Ceph as well as security fixes from the Operating System. Mon Settings \u00b6 count : Set the number of mons to be started. The number must be between 1 and 9 . The recommended value is most commonly 3 . For highest availability, an odd number of mons should be specified. For higher durability in case of mon loss, an even number can be specified although availability may be lower. To maintain quorum a majority of mons must be up. For example, if there are three mons, two must be up. If there are four mons, three must be up. If there are two mons, both must be up. If quorum is lost, see the disaster recovery guide to restore quorum from a single mon. allowMultiplePerNode : Whether to allow the placement of multiple mons on a single node. Default is false for production. Should only be set to true in test environments. volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . failureDomainLabel : The label that is expected on each node where the mons are expected to be deployed. The labels must be found in the list of well-known topology labels . zones : The failure domain names where the Mons are expected to be deployed. There must be at least three zones specified in the list. Each zone can be backed by a different storage class by specifying the volumeClaimTemplate . name : The name of the zone, which is the value of the domain label. volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . stretchCluster : The stretch cluster settings that define the zones (or other failure domain labels) across which to configure the cluster. failureDomainLabel : The label that is expected on each node where the cluster is expected to be deployed. The labels must be found in the list of well-known topology labels . subFailureDomain : With a zone, the data replicas must be spread across OSDs in the subFailureDomain. The default is host . zones : The failure domain names where the Mons and OSDs are expected to be deployed. There must be three zones specified in the list. This element is always named zone even if a non-default failureDomainLabel is specified. The elements have two values: name : The name of the zone, which is the value of the domain label. arbiter : Whether the zone is expected to be the arbiter zone which only runs a single mon. Exactly one zone must be labeled true . volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . The two zones that are not the arbiter zone are expected to have OSDs deployed. If these settings are changed in the CRD the operator will update the number of mons during a periodic check of the mon health, which by default is every 45 seconds. To change the defaults that the operator uses to determine the mon health and whether to failover a mon, refer to the health settings . The intervals should be small enough that you have confidence the mons will maintain quorum, while also being long enough to ignore network blips where mons are failed over too often. Mgr Settings \u00b6 You can use the cluster CR to enable or disable any manager module. This can be configured like so: 1 2 3 4 mgr : modules : - name :  enabled : true Some modules will have special configuration to ensure the module is fully functional after being enabled. Specifically: pg_autoscaler : Rook will configure all new pools with PG autoscaling by setting: osd_pool_default_pg_autoscale_mode = on Network Configuration Settings \u00b6 If not specified, the default SDN will be used. Configure the network that will be enabled for the cluster and services. provider : Specifies the network provider that will be used to connect the network interface. You can choose between host , and multus . selectors : List the network selector(s) that will be used associated by a key. ipFamily : Specifies the network stack Ceph daemons should listen on. dualStack : Specifies that Ceph daemon should listen on both IPv4 and IPv6 network stacks. connections : Settings for network connections using Ceph's msgr2 protocol requireMsgr2 : Whether to require communication over msgr2. If true, the msgr v1 port (6789) will be disabled and clients will be required to connect to the Ceph cluster with the v2 port (3300). Requires a kernel that supports msgr2 (kernel 5.11 or CentOS 8.4 or newer). Default is false. encryption : Settings for encryption on the wire to Ceph daemons enabled : Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network. The default is false. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted. When encryption is not enabled, clients still establish a strong initial authentication and data integrity is still validated with a crc check. IMPORTANT : Encryption requires the 5.11 kernel for the latest nbd and cephfs drivers. Alternatively for testing only, set \"mounter: rbd-nbd\" in the rbd storage class, or \"mounter: fuse\" in the cephfs storage class. The nbd and fuse drivers are not recommended in production since restarting the csi driver pod will disconnect the volumes. If this setting is enabled, CephFS volumes also require setting CSI_CEPHFS_KERNEL_MOUNT_OPTIONS to \"ms_mode=secure\" in operator.yaml. compression : enabled : Whether to compress the data in transit across the wire. The default is false. Requires Ceph Quincy (v17) or newer. Also see the kernel requirements above for encryption. Caution Changing networking configuration after a Ceph cluster has been deployed is NOT supported and will result in a non-functioning cluster. Host Networking \u00b6 To use host networking, set provider: host . If the host networking setting is changed in a cluster where mons are already running, the existing mons will remain running with the same network settings with which they were created. To complete the conversion to or from host networking after you update this setting, you will need to failover the mons in order to have mons on the desired network configuration. Multus \u00b6 Rook supports addition of public and cluster network for ceph using Multus The selector keys are required to be public and cluster where each represent: public : client communications with the cluster (reads/writes) cluster : internal Ceph replication network If you want to learn more, please read: Ceph Networking reference . Multus documentation Based on the configuration, the operator will do the following: If only the public selector is specified, all communication will happen on that network 1 2 3 4 network : provider : multus selectors : public : rook-ceph/rook-public-nw If only the cluster selector is specified, the internal cluster traffic* will happen on that network. All other traffic to mons, OSDs, and other daemons will be on the default network. 1 2 3 4 network : provider : multus selectors : cluster : rook-ceph/rook-cluster-nw If both public and cluster selectors are specified the first one will run all the communication network and the second the internal cluster network* 1 2 3 4 5 network : provider : multus selectors : public : rook-ceph/rook-public-nw cluster : rook-ceph/rook-cluster-nw * Internal cluster traffic includes OSD heartbeats, data replication, and data recovery Only OSD pods will have both Public and Cluster networks attached. The rest of the Ceph component pods and CSI pods will only have the Public network attached. Rook Ceph operator will not have any networks attached as it proxies the required commands via a sidecar container in the mgr pod. In order to work, each selector value must match a NetworkAttachmentDefinition object name in Multus. For multus network provider, an already working cluster with Multus networking is required. Network attachment definition that later will be attached to the cluster needs to be created before the Cluster CRD. The Network attachment definitions should be using whereabouts cni. If Rook cannot find the provided Network attachment definition it will fail running the Ceph OSD pods. You can add the Multus network attachment selection annotation selecting the created network attachment definition on selectors . A valid NetworkAttachmentDefinition will look like following: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 apiVersion : \"k8s.cni.cncf.io/v1\" kind : NetworkAttachmentDefinition metadata : name : rook-public-nw spec : config : '{ \"cniVersion\": \"0.3.0\", \"name\": \"public-nad\", \"type\": \"macvlan\", \"master\": \"ens5\", \"mode\": \"bridge\", \"ipam\": { \"type\": \"whereabouts\", \"range\": \"192.168.1.0/24\" } }' Ensure that master matches the network interface of the host that you want to use. IPAM type whereabouts is required because it makes sure that all the pods get a unique IP address from the multus network. The NetworkAttachmentDefinition should be referenced along with the namespace in which it is present like public: / . e.g., the network attachment definition are in default namespace: 1 2 public : default/rook-public-nw cluster : default/rook-cluster-nw 1 2 * This format is required in order to use the NetworkAttachmentDefinition across namespaces. * In Openshift, to use a NetworkAttachmentDefinition (NAD) across namespaces, the NAD must be deployed in the `default` namespace. The NAD is then referenced with the namespace: `default/rook-public-nw` Validating Multus configuration \u00b6 We highly recommend validating your Multus configuration before you install Rook. A tool exists to facilitate validating the Multus configuration. After installing the Rook operator and before installing any Custom Resources, run the tool from the operator pod. The tool's CLI is designed to be as helpful as possible. Get help text for the multus validation tool like so: 1 kubectl --namespace rook-ceph exec -it deploy/rook-ceph-operator -- rook multus validation run --help Then, update the args in the multus-validation job template. Minimally, add the NAD names(s) for public and/or cluster as needed and and then, create the job to validate the Multus configuration. If the tool fails, it will suggest what things may be preventing Multus networks from working properly, and it will request the logs and outputs that will help debug issues. Check the logs of the pod created by the job to know the status of the validation test. Known limitations with Multus \u00b6 Daemons leveraging Kubernetes service IPs (Monitors, Managers, Rados Gateways) are not listening on the NAD specified in the selectors . Instead the daemon listens on the default network, however the NAD is attached to the container, allowing the daemon to communicate with the rest of the cluster. There is work in progress to fix this issue in the multus-service repository. At the time of writing it's unclear when this will be supported. IPFamily \u00b6 Provide single-stack IPv4 or IPv6 protocol to assign corresponding addresses to pods and services. This field is optional. Possible inputs are IPv6 and IPv4. Empty value will be treated as IPv4. Kubernetes version should be at least v1.13 to run IPv6. Dual-stack is supported as of ceph Pacific. To turn on dual stack see the network configuration section . Node Settings \u00b6 In addition to the cluster level settings specified above, each individual node can also specify configuration to override the cluster level settings and defaults. If a node does not specify any configuration then it will inherit the cluster level settings. name : The name of the node, which should match its kubernetes.io/hostname label. config : Config settings applied to all OSDs on the node unless overridden by devices . See the config settings below. storage selection settings When useAllNodes is set to true , Rook attempts to make Ceph cluster management as hands-off as possible while still maintaining reasonable data safety. If a usable node comes online, Rook will begin to use it automatically. To maintain a balance between hands-off usability and data safety, Nodes are removed from Ceph as OSD hosts only (1) if the node is deleted from Kubernetes itself or (2) if the node has its taints or affinities modified in such a way that the node is no longer usable by Rook. Any changes to taints or affinities, intentional or unintentional, may affect the data reliability of the Ceph cluster. In order to help protect against this somewhat, deletion of nodes by taint or affinity modifications must be \"confirmed\" by deleting the Rook Ceph operator pod and allowing the operator deployment to restart the pod. For production clusters, we recommend that useAllNodes is set to false to prevent the Ceph cluster from suffering reduced data reliability unintentionally due to a user mistake. When useAllNodes is set to false , Rook relies on the user to be explicit about when nodes are added to or removed from the Ceph cluster. Nodes are only added to the Ceph cluster if the node is added to the Ceph cluster resource. Similarly, nodes are only removed if the node is removed from the Ceph cluster resource. Node Updates \u00b6 Nodes can be added and removed over time by updating the Cluster CRD, for example with kubectl -n rook-ceph edit cephcluster rook-ceph . This will bring up your default text editor and allow you to add and remove storage nodes from the cluster. This feature is only available when useAllNodes has been set to false . Storage Selection Settings \u00b6 Below are the settings for host-based cluster. This type of cluster can specify devices for OSDs, both at the cluster and individual node level, for selecting which storage resources will be included in the cluster. useAllDevices : true or false , indicating whether all devices found on nodes in the cluster should be automatically consumed by OSDs. Not recommended unless you have a very controlled environment where you will not risk formatting of devices with existing data. When true , all devices and partitions will be used. Is overridden by deviceFilter if specified. LVM logical volumes are not picked by useAllDevices . deviceFilter : A regular expression for short kernel names of devices (e.g. sda ) that allows selection of devices and partitions to be consumed by OSDs. LVM logical volumes are not picked by deviceFilter .If individual devices have been specified for a node then this filter will be ignored. This field uses golang regular expression syntax . For example: sdb : Only selects the sdb device if found ^sd. : Selects all devices starting with sd ^sd[a-d] : Selects devices starting with sda , sdb , sdc , and sdd if found ^s : Selects all devices that start with s ^[^r] : Selects all devices that do not start with r devicePathFilter : A regular expression for device paths (e.g. /dev/disk/by-path/pci-0:1:2:3-scsi-1 ) that allows selection of devices and partitions to be consumed by OSDs. LVM logical volumes are not picked by devicePathFilter .If individual devices or deviceFilter have been specified for a node then this filter will be ignored. This field uses golang regular expression syntax . For example: ^/dev/sd. : Selects all devices starting with sd ^/dev/disk/by-path/pci-.* : Selects all devices which are connected to PCI bus devices : A list of individual device names belonging to this node to include in the storage cluster. name : The name of the devices and partitions (e.g., sda ). The full udev path can also be specified for devices, partitions, and logical volumes (e.g. /dev/disk/by-id/ata-ST4000DM004-XXXX - this will not change after reboots). config : Device-specific config settings. See the config settings below Host-based cluster supports raw device, partition, and logical volume. Be sure to see the quickstart doc prerequisites for additional considerations. Below are the settings for a PVC-based cluster. storageClassDeviceSets : Explained in Storage Class Device Sets Storage Class Device Sets \u00b6 The following are the settings for Storage Class Device Sets which can be configured to create OSDs that are backed by block mode PVs. name : A name for the set. count : The number of devices in the set. resources : The CPU and RAM requests/limits for the devices. (Optional) placement : The placement criteria for the devices. (Optional) Default is no placement criteria. The syntax is the same as for other placement configuration . It supports nodeAffinity , podAffinity , podAntiAffinity and tolerations keys. It is recommended to configure the placement such that the OSDs will be as evenly spread across nodes as possible. At a minimum, anti-affinity should be added so at least one OSD will be placed on each available nodes. However, if there are more OSDs than nodes, this anti-affinity will not be effective. Another placement scheme to consider is to add labels to the nodes in such a way that the OSDs can be grouped on those nodes, create multiple storageClassDeviceSets, and add node affinity to each of the device sets that will place the OSDs in those sets of nodes. Rook will automatically add required nodeAffinity to the OSD daemons to match the topology labels that are found on the nodes where the OSD prepare jobs ran. To ensure data durability, the OSDs are required to run in the same topology that the Ceph CRUSH map expects. For example, if the nodes are labeled with rack topology labels, the OSDs will be constrained to a certain rack. Without the topology labels, Rook will not constrain the OSDs beyond what is required by the PVs, for example to run in the zone where provisioned. See the OSD Topology section for the related labels. preparePlacement : The placement criteria for the preparation of the OSD devices. Creating OSDs is a two-step process and the prepare job may require different placement than the OSD daemons. If the preparePlacement is not specified, the placement will instead be applied for consistent placement for the OSD prepare jobs and OSD deployments. The preparePlacement is only useful for portable OSDs in the device sets. OSDs that are not portable will be tied to the host where the OSD prepare job initially runs. For example, provisioning may require topology spread constraints across zones, but the OSD daemons may require constraints across hosts within the zones. portable : If true , the OSDs will be allowed to move between nodes during failover. This requires a storage class that supports portability (e.g. aws-ebs , but not the local storage provisioner). If false , the OSDs will be assigned to a node permanently. Rook will configure Ceph's CRUSH map to support the portability. tuneDeviceClass : For example, Ceph cannot detect AWS volumes as HDDs from the storage class \"gp2\", so you can improve Ceph performance by setting this to true. tuneFastDeviceClass : For example, Ceph cannot detect Azure disks as SSDs from the storage class \"managed-premium\", so you can improve Ceph performance by setting this to true.. volumeClaimTemplates : A list of PVC templates to use for provisioning the underlying storage devices. resources.requests.storage : The desired capacity for the underlying storage devices. storageClassName : The StorageClass to provision PVCs from. Default would be to use the cluster-default StorageClass. This StorageClass should provide a raw block device, multipath device, or logical volume. Other types are not supported. If you want to use logical volume, please see known issue of OSD on LV-backed PVC volumeMode : The volume mode to be set for the PVC. Which should be Block accessModes : The access mode for the PVC to be bound by OSD. schedulerName : Scheduler name for OSD pod placement. (Optional) encrypted : whether to encrypt all the OSDs in a given storageClassDeviceSet OSD Configuration Settings \u00b6 The following storage selection settings are specific to Ceph and do not apply to other backends. All variables are key-value pairs represented as strings. metadataDevice : Name of a device or lvm to use for the metadata of OSDs on each node. Performance can be improved by using a low latency device (such as SSD or NVMe) as the metadata device, while other spinning platter (HDD) devices on a node are used to store data. Provisioning will fail if the user specifies a metadataDevice but that device is not used as a metadata device by Ceph. Notably, ceph-volume will not use a device of the same device class (HDD, SSD, NVMe) as OSD devices for metadata, resulting in this failure. databaseSizeMB : The size in MB of a bluestore database. Include quotes around the size. walSizeMB : The size in MB of a bluestore write ahead log (WAL). Include quotes around the size. deviceClass : The CRUSH device class to use for this selection of storage devices. (By default, if a device's class has not already been set, OSDs will automatically set a device's class to either hdd , ssd , or nvme based on the hardware properties exposed by the Linux kernel.) These storage classes can then be used to select the devices backing a storage pool by specifying them as the value of the pool spec's deviceClass field . initialWeight : The initial OSD weight in TiB units. By default, this value is derived from OSD's capacity. primaryAffinity : The primary-affinity value of an OSD, within range [0, 1] (default: 1 ). osdsPerDevice **: The number of OSDs to create on each device. High performance devices such as NVMe can handle running multiple OSDs. If desired, this can be overridden for each node and each device. encryptedDevice **: Encrypt OSD volumes using dmcrypt (\"true\" or \"false\"). By default this option is disabled. See encryption for more information on encryption in Ceph. crushRoot : The value of the root CRUSH map label. The default is default . Generally, you should not need to change this. However, if any of your topology labels may have the value default , you need to change crushRoot to avoid conflicts, since CRUSH map values need to be unique. Annotations and Labels \u00b6 Annotations and Labels can be specified so that the Rook components will have those annotations / labels added to them. You can set annotations / labels for Rook components for the list of key value pairs: all : Set annotations / labels for all components except clusterMetadata . mgr : Set annotations / labels for MGRs mon : Set annotations / labels for mons osd : Set annotations / labels for OSDs prepareosd : Set annotations / labels for OSD Prepare Jobs monitoring : Set annotations / labels for service monitor crashcollector : Set annotations / labels for crash collectors clusterMetadata : Set annotations only to rook-ceph-mon-endpoints configmap and the rook-ceph-mon and rook-ceph-admin-keyring secrets. These annotations will not be merged with the all annotations. The common usage is for backing up these critical resources with kubed . Note the clusterMetadata annotation will not be merged with the all annotation. When other keys are set, all will be merged together with the specific component. Placement Configuration Settings \u00b6 Placement configuration for the cluster services. It includes the following keys: mgr , mon , arbiter , osd , prepareosd , cleanup , and all . Each service will have its placement configuration generated by merging the generic configuration under all with the most specific one (which will override any attributes). In stretch clusters, if the arbiter placement is specified, that placement will only be applied to the arbiter. Neither will the arbiter placement be merged with the all placement to allow the arbiter to be fully independent of other daemon placement. The remaining mons will still use the mon and/or all sections. Note Placement of OSD pods is controlled using the Storage Class Device Set , not the general placement configuration. A Placement configuration is specified (according to the kubernetes PodSpec) as: nodeAffinity : kubernetes NodeAffinity podAffinity : kubernetes PodAffinity podAntiAffinity : kubernetes PodAntiAffinity tolerations : list of kubernetes Toleration topologySpreadConstraints : kubernetes TopologySpreadConstraints If you use labelSelector for osd pods, you must write two rules both for rook-ceph-osd and rook-ceph-osd-prepare like the example configuration . It comes from the design that there are these two pods for an OSD. For more detail, see the osd design doc and the related issue . The Rook Ceph operator creates a Job called rook-ceph-detect-version to detect the full Ceph version used by the given cephVersion.image . The placement from the mon section is used for the Job except for the PodAntiAffinity field. Placement Example \u00b6 To control where various services will be scheduled by kubernetes, use the placement configuration sections below. The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node . Specific node affinity and tolerations that only apply to the mon daemons in this example require the label role=storage-mon-node` and also tolerate the control plane taint. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false # enable the ceph dashboard for viewing cluster status dashboard : enabled : true placement : all : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - storage-node mon : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - storage-mon-node tolerations : - effect : NoSchedule key : node-role.kubernetes.io/control-plane operator : Exists Cluster-wide Resources Configuration Settings \u00b6 Resources should be specified so that the Rook components are handled after Kubernetes Pod Quality of Service classes . This allows to keep Rook components running when for example a node runs out of memory and the Rook components are not killed depending on their Quality of Service class. You can set resource requests/limits for Rook components through the Resource Requirements/Limits structure in the following keys: mon : Set resource requests/limits for mons osd : Set resource requests/limits for OSDs. This key applies for all OSDs regardless of their device classes. In case of need to apply resource requests/limits for OSDs with particular device class use specific osd keys below. If the memory resource is declared Rook will automatically set the OSD configuration osd_memory_target to the same value. This aims to ensure that the actual OSD memory consumption is consistent with the OSD pods' resource declaration. osd- : Set resource requests/limits for OSDs on a specific device class. Rook will automatically detect hdd , ssd , or nvme device classes. Custom device classes can also be set. mgr : Set resource requests/limits for MGRs mgr-sidecar : Set resource requests/limits for the MGR sidecar, which is only created when mgr.count: 2 . The sidecar requires very few resources since it only executes every 15 seconds to query Ceph for the active mgr and update the mgr services if the active mgr changed. prepareosd : Set resource requests/limits for OSD prepare job crashcollector : Set resource requests/limits for crash. This pod runs wherever there is a Ceph pod running. It scrapes for Ceph daemon core dumps and sends them to the Ceph manager crash module so that core dumps are centralized and can be easily listed/accessed. You can read more about the Ceph Crash module . logcollector : Set resource requests/limits for the log collector. When enabled, this container runs as side-car to each Ceph daemons. cleanup : Set resource requests/limits for cleanup job, responsible for wiping cluster's data after uninstall exporter : Set resource requests/limits for Ceph exporter. In order to provide the best possible experience running Ceph in containers, Rook internally recommends minimum memory limits if resource limits are passed. If a user configures a limit or request value that is too low, Rook will still run the pod(s) and print a warning to the operator log. mon : 1024MB mgr : 512MB osd : 2048MB crashcollector : 60MB mgr-sidecar : 100MB limit, 40MB requests prepareosd : no limits (see the note) exporter : 128MB limit, 50MB requests Note We recommend not setting memory limits on the OSD prepare job to prevent OSD provisioning failure due to memory constraints. The OSD prepare job bursts memory usage during the OSD provisioning depending on the size of the device, typically 1-2Gi for large disks. The OSD prepare job only bursts a single time per OSD. All future runs of the OSD prepare job will detect the OSD is already provisioned and skip the provisioning. Hint The resources for MDS daemons are not configured in the Cluster. Refer to the Ceph Filesystem CRD instead. Resource Requirements/Limits \u00b6 For more information on resource requests/limits see the official Kubernetes documentation: Kubernetes - Managing Compute Resources for Containers requests : Requests for cpu or memory. cpu : Request for CPU (example: one CPU core 1 , 50% of one CPU core 500m ). memory : Limit for Memory (example: one gigabyte of memory 1Gi , half a gigabyte of memory 512Mi ). limits : Limits for cpu or memory. cpu : Limit for CPU (example: one CPU core 1 , 50% of one CPU core 500m ). memory : Limit for Memory (example: one gigabyte of memory 1Gi , half a gigabyte of memory 512Mi ). Warning Before setting resource requests/limits, please take a look at the Ceph documentation for recommendations for each component: Ceph - Hardware Recommendations . Node Specific Resources for OSDs \u00b6 This example shows that you can override these requests/limits for OSDs per node when using useAllNodes: false in the node item in the nodes list. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false storage : useAllNodes : false nodes : - name : \"172.17.4.201\" resources : limits : cpu : \"2\" memory : \"4096Mi\" requests : cpu : \"2\" memory : \"4096Mi\" Priority Class Names \u00b6 Priority class names can be specified so that the Rook components will have those priority class names added to them. You can set priority class names for Rook components for the list of key value pairs: all : Set priority class names for MGRs, Mons, OSDs, and crashcollectors. mgr : Set priority class names for MGRs. Examples default to system-cluster-critical. mon : Set priority class names for Mons. Examples default to system-node-critical. osd : Set priority class names for OSDs. Examples default to system-node-critical. crashcollector : Set priority class names for crashcollectors. The specific component keys will act as overrides to all . Health settings \u00b6 The Rook Ceph operator will monitor the state of the CephCluster on various components by default. The following CRD settings are available: healthCheck : main ceph cluster health monitoring section Currently three health checks are implemented: mon : health check on the ceph monitors, basically check whether monitors are members of the quorum. If after a certain timeout a given monitor has not joined the quorum back it will be failed over and replace by a new monitor. osd : health check on the ceph osds status : ceph health status check, periodically check the Ceph health state and reflects it in the CephCluster CR status field. The liveness probe and startup probe of each daemon can also be controlled via livenessProbe and startupProbe respectively. The settings are valid for mon , mgr and osd . Here is a complete example for both daemonHealth , livenessProbe , and startupProbe : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 healthCheck : daemonHealth : mon : disabled : false interval : 45s timeout : 600s osd : disabled : false interval : 60s status : disabled : false livenessProbe : mon : disabled : false mgr : disabled : false osd : disabled : false startupProbe : mon : disabled : false mgr : disabled : false osd : disabled : false The probe's timing values and thresholds (but not the probe itself) can also be overridden. For more info, refer to the Kubernetes documentation . For example, you could change the mgr probe by applying: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 healthCheck : startupProbe : mgr : disabled : false probe : initialDelaySeconds : 3 periodSeconds : 3 failureThreshold : 30 livenessProbe : mgr : disabled : false probe : initialDelaySeconds : 3 periodSeconds : 3 Changing the liveness probe is an advanced operation and should rarely be necessary. If you want to change these settings then modify the desired settings. Status \u00b6 The operator is regularly configuring and checking the health of the cluster. The results of the configuration and health checks can be seen in the status section of the CephCluster CR. 1 kubectl -n rook-ceph get CephCluster -o yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 [ ... ] status : ceph : health : HEALTH_OK lastChecked : \"2021-03-02T21:22:11Z\" capacity : bytesAvailable : 22530293760 bytesTotal : 25757220864 bytesUsed : 3226927104 lastUpdated : \"2021-03-02T21:22:11Z\" message : Cluster created successfully phase : Ready state : Created storage : deviceClasses : - name : hdd version : image : quay.io/ceph/ceph:v17.2.6 version : 16.2.6-0 conditions : - lastHeartbeatTime : \"2021-03-02T21:22:11Z\" lastTransitionTime : \"2021-03-02T21:21:09Z\" message : Cluster created successfully reason : ClusterCreated status : \"True\" type : Ready Ceph Status \u00b6 Ceph is constantly monitoring the health of the data plane and reporting back if there are any warnings or errors. If everything is healthy from Ceph's perspective, you will see HEALTH_OK . If Ceph reports any warnings or errors, the details will be printed to the status. If further troubleshooting is needed to resolve these issues, the toolbox will likely be needed where you can run ceph commands to find more details. The capacity of the cluster is reported, including bytes available, total, and used. The available space will be less that you may expect due to overhead in the OSDs. Conditions \u00b6 The conditions represent the status of the Rook operator. If the cluster is fully configured and the operator is stable, the Ready condition is raised with ClusterCreated reason and no other conditions. The cluster will remain in the Ready condition after the first successful configuration since it is expected the storage is consumable from this point on. If there are issues preventing the storage layer from working, they are expected to show as Ceph health errors. If the cluster is externally connected successfully, the Ready condition will have the reason ClusterConnected . If the operator is currently being configured or the operator is checking for update, there will be a Progressing condition. If there was a failure, the condition(s) status will be false and the message will give a summary of the error. See the operator log for more details. Other Status \u00b6 There are several other properties for the overall status including: message , phase , and state : A summary of the overall current state of the cluster, which is somewhat duplicated from the conditions for backward compatibility. storage.deviceClasses : The names of the types of storage devices that Ceph discovered in the cluster. These types will be ssd or hdd unless they have been overridden with the crushDeviceClass in the storageClassDeviceSets . version : The version of the Ceph image currently deployed. OSD Topology \u00b6 The topology of the cluster is important in production environments where you want your data spread across failure domains. The topology can be controlled by adding labels to the nodes. When the labels are found on a node at first OSD deployment, Rook will add them to the desired level in the CRUSH map . The complete list of labels in hierarchy order from highest to lowest is: 1 2 3 4 5 6 7 8 9 topology.kubernetes.io/region topology.kubernetes.io/zone topology.rook.io/datacenter topology.rook.io/room topology.rook.io/pod topology.rook.io/pdu topology.rook.io/row topology.rook.io/rack topology.rook.io/chassis For example, if the following labels were added to a node: 1 2 kubectl label node mynode topology.kubernetes.io/zone=zone1 kubectl label node mynode topology.rook.io/rack=zone1-rack1 These labels would result in the following hierarchy for OSDs on that node (this command can be run in the Rook toolbox): 1 2 3 4 5 6 7 8 $ ceph osd tree ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF -1 0.01358 root default -5 0.01358 zone zone1 -4 0.01358 rack rack1 -3 0.01358 host mynode 0 hdd 0.00679 osd.0 up 1.00000 1.00000 1 hdd 0.00679 osd.1 up 1.00000 1.00000 Ceph requires unique names at every level in the hierarchy (CRUSH map). For example, you cannot have two racks with the same name that are in different zones. Racks in different zones must be named uniquely. Note that the host is added automatically to the hierarchy by Rook. The host cannot be specified with a topology label. All topology labels are optional. Hint When setting the node labels prior to CephCluster creation, these settings take immediate effect. However, applying this to an already deployed CephCluster requires removing each node from the cluster first and then re-adding it with new configuration to take effect. Do this node by node to keep your data safe! Check the result with ceph osd tree from the Rook Toolbox . The OSD tree should display the hierarchy for the nodes that already have been re-added. To utilize the failureDomain based on the node labels, specify the corresponding option in the CephBlockPool 1 2 3 4 5 6 7 8 9 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : rack # this matches the topology labels on nodes replicated : size : 3 This configuration will split the replication of volumes across unique racks in the data center setup. Deleting a CephCluster \u00b6 During deletion of a CephCluster resource, Rook protects against accidental or premature destruction of user data by blocking deletion if there are any other Rook Ceph Custom Resources that reference the CephCluster being deleted. Rook will warn about which other resources are blocking deletion in three ways until all blocking resources are deleted: An event will be registered on the CephCluster resource A status condition will be added to the CephCluster resource An error will be added to the Rook Ceph operator log Cleanup policy \u00b6 Rook has the ability to cleanup resources and data that were deployed when a CephCluster is removed. The policy settings indicate which data should be forcibly deleted and in what way the data should be wiped. The cleanupPolicy has several fields: confirmation : Only an empty string and yes-really-destroy-data are valid values for this field. If this setting is empty, the cleanupPolicy settings will be ignored and Rook will not cleanup any resources during cluster removal. To reinstall the cluster, the admin would then be required to follow the cleanup guide to delete the data on hosts. If this setting is yes-really-destroy-data , the operator will automatically delete the data on hosts. Because this cleanup policy is destructive, after the confirmation is set to yes-really-destroy-data Rook will stop configuring the cluster as if the cluster is about to be destroyed. sanitizeDisks : sanitizeDisks represents advanced settings that can be used to delete data on drives. method : indicates if the entire disk should be sanitized or simply ceph's metadata. Possible choices are quick (default) or complete dataSource : indicate where to get random bytes from to write on the disk. Possible choices are zero (default) or random . Using random sources will consume entropy from the system and will take much more time then the zero source iteration : overwrite N times instead of the default (1). Takes an integer value allowUninstallWithVolumes : If set to true, then the cephCluster deletion doesn't wait for the PVCs to be deleted. Default is false . To automate activation of the cleanup, you can use the following command. WARNING: DATA WILL BE PERMANENTLY DELETED : 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Nothing will happen until the deletion of the CR is requested, so this can still be reverted. However, all new configuration by the operator will be blocked with this cleanup policy enabled. Rook waits for the deletion of PVs provisioned using the cephCluster before proceeding to delete the cephCluster. To force deletion of the cephCluster without waiting for the PVs to be deleted, you can set the allowUninstallWithVolumes to true under spec.CleanupPolicy .","title":"CephCluster CRD"},{"location":"CRDs/Cluster/ceph-cluster-crd/#settings","text":"Settings can be specified at the global level to apply to the cluster as a whole, while other settings can be specified at more fine-grained levels. If any setting is unspecified, a suitable default will be used automatically.","title":"Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#cluster-metadata","text":"name : The name that will be used internally for the Ceph cluster. Most commonly the name is the same as the namespace since multiple clusters are not supported in the same namespace. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace. The common scenario is to create a single Rook cluster. If multiple clusters are created, they must not have conflicting devices or host paths.","title":"Cluster metadata"},{"location":"CRDs/Cluster/ceph-cluster-crd/#cluster-settings","text":"external : enable : if true , the cluster will not be managed by Rook but via an external entity. This mode is intended to connect to an existing cluster. In this case, Rook will only consume the external cluster. However, Rook will be able to deploy various daemons in Kubernetes such as object gateways, mds and nfs if an image is provided and will refuse otherwise. If this setting is enabled all the other options will be ignored except cephVersion.image and dataDirHostPath . See external cluster configuration . If cephVersion.image is left blank, Rook will refuse the creation of extra CRs like object, file and nfs. cephVersion : The version information for launching the ceph daemons. image : The image used for running the ceph daemons. For example, quay.io/ceph/ceph:v16.2.11 or v17.2.6 . For more details read the container images section . For the latest ceph images, see the Ceph DockerHub . To ensure a consistent version of the image is running across all nodes in the cluster, it is recommended to use a very specific image version. Tags also exist that would give the latest version, but they are only recommended for test environments. For example, the tag v17 will be updated each time a new Quincy build is released. Using the v17 tag is not recommended in production because it may lead to inconsistent versions of the image running across different nodes in the cluster. allowUnsupported : If true , allow an unsupported major version of the Ceph release. Currently pacific and quincy are supported. Future versions such as reef (v18) would require this to be set to true . Should be set to false in production. imagePullPolicy : The image pull policy for the ceph daemon pods. Possible values are Always , IfNotPresent , and Never . The default is IfNotPresent . dataDirHostPath : The path on the host ( hostPath ) where config and data should be stored for each of the services. If the directory does not exist, it will be created. Because this directory persists on the host, it will remain after pods are deleted. Following paths and any of their subpaths must not be used : /etc/ceph , /rook or /var/log/ceph . WARNING : For test scenarios, if you delete a cluster and start a new cluster on the same hosts, the path used by dataDirHostPath must be deleted. Otherwise, stale keys and other config will remain from the previous cluster and the new mons will fail to start. If this value is empty, each pod will get an ephemeral directory to store their config files that is tied to the lifetime of the pod running on that node. More details can be found in the Kubernetes empty dir docs . skipUpgradeChecks : if set to true Rook won't perform any upgrade checks on Ceph daemons during an upgrade. Use this at YOUR OWN RISK , only if you know what you're doing. To understand Rook's upgrade process of Ceph, read the upgrade doc . continueUpgradeAfterChecksEvenIfNotHealthy : if set to true Rook will continue the OSD daemon upgrade process even if the PGs are not clean, or continue with the MDS upgrade even the file system is not healthy. dashboard : Settings for the Ceph dashboard. To view the dashboard in your browser see the dashboard guide . enabled : Whether to enable the dashboard to view cluster status urlPrefix : Allows to serve the dashboard under a subpath (useful when you are accessing the dashboard via a reverse proxy) port : Allows to change the default port where the dashboard is served ssl : Whether to serve the dashboard via SSL, ignored on Ceph versions older than 13.2.2 monitoring : Settings for monitoring Ceph using Prometheus. To enable monitoring on your cluster see the monitoring guide . enabled : Whether to enable the prometheus service monitor for an internal cluster. For an external cluster, whether to create an endpoint port for the metrics. Default is false. metricsDisabled : Whether to disable the metrics reported by Ceph. If false, the prometheus mgr module and Ceph exporter are enabled. If true, the prometheus mgr module and Ceph exporter are both disabled. Default is false. externalMgrEndpoints : external cluster manager endpoints externalMgrPrometheusPort : external prometheus manager module port. See external cluster configuration for more details. port : The internal prometheus manager module port where the prometheus mgr module listens. The port may need to be configured when host networking is enabled. interval : The interval for the prometheus module to to scrape targets. network : For the network settings for the cluster, refer to the network configuration settings mon : contains mon related options mon settings For more details on the mons and when to choose a number other than 3 , see the mon health doc . mgr : manager top level section count : set number of ceph managers between 1 to 2 . The default value is 2. If there are two managers, it is important for all mgr services point to the active mgr and not the standby mgr. Rook automatically updates the label mgr_role on the mgr pods to be either active or standby . Therefore, services need just to add the label mgr_role=active to their selector to point to the active mgr. This applies to all services that rely on the ceph mgr such as the dashboard or the prometheus metrics collector. modules : is the list of Ceph manager modules to enable crashCollector : The settings for crash collector daemon(s). disable : is set to true , the crash collector will not run on any node where a Ceph daemon runs daysToRetain : specifies the number of days to keep crash entries in the Ceph cluster. By default the entries are kept indefinitely. logCollector : The settings for log collector daemon. enabled : if set to true , the log collector will run as a side-car next to each Ceph daemon. The Ceph configuration option log_to_file will be turned on, meaning Ceph daemons will log on files in addition to still logging to container's stdout. These logs will be rotated. In case a daemon terminates with a segfault, the coredump files will be commonly be generated in /var/lib/systemd/coredump directory on the host, depending on the underlying OS location. (default: true ) periodicity : how often to rotate daemon's log. (default: 24h). Specified with a time suffix which may be h for hours or d for days. Rotating too often will slightly impact the daemon's performance since the signal briefly interrupts the program. annotations : annotations configuration settings labels : labels configuration settings placement : placement configuration settings resources : resources configuration settings priorityClassNames : priority class names configuration settings storage : Storage selection and configuration that will be used across the cluster. Note that these settings can be overridden for specific nodes. useAllNodes : true or false , indicating if all nodes in the cluster should be used for storage according to the cluster level storage selection and configuration values. If individual nodes are specified under the nodes field, then useAllNodes must be set to false . nodes : Names of individual nodes in the cluster that should have their storage included in accordance with either the cluster level configuration specified above or any node specific overrides described in the next section below. useAllNodes must be set to false to use specific nodes and their config. See node settings below. config : Config settings applied to all OSDs on the node unless overridden by devices . See the config settings below. storage selection settings Storage Class Device Sets onlyApplyOSDPlacement : Whether the placement specific for OSDs is merged with the all placement. If false , the OSD placement will be merged with the all placement. If true, the OSD placement will be applied and the all placement will be ignored. The placement for OSDs is computed from several different places depending on the type of OSD: For non-PVCs: placement.all and placement.osd For PVCs: placement.all and inside the storageClassDeviceSets from the placement or preparePlacement disruptionManagement : The section for configuring management of daemon disruptions managePodBudgets : if true , the operator will create and manage PodDisruptionBudgets for OSD, Mon, RGW, and MDS daemons. OSD PDBs are managed dynamically via the strategy outlined in the design . The operator will block eviction of OSDs by default and unblock them safely when drains are detected. osdMaintenanceTimeout : is a duration in minutes that determines how long an entire failureDomain like region/zone/host will be held in noout (in addition to the default DOWN/OUT interval) when it is draining. This is only relevant when managePodBudgets is true . The default value is 30 minutes. removeOSDsIfOutAndSafeToRemove : If true the operator will remove the OSDs that are down and whose data has been restored to other OSDs. In Ceph terms, the OSDs are out and safe-to-destroy when they are removed. cleanupPolicy : cleanup policy settings security : security page for key management configuration","title":"Cluster Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#ceph-container-images","text":"Official releases of Ceph Container images are available from Docker Hub . These are general purpose Ceph container with all necessary daemons and dependencies installed. TAG MEANING vRELNUM Latest release in this series (e.g., v17 = Quincy) vRELNUM.Y Latest stable release in this stable series (e.g., v17.2) vRELNUM.Y.Z A specific release (e.g., v17.2.6) vRELNUM.Y.Z-YYYYMMDD A specific build (e.g., v17.2.6-20230410) A specific will contain a specific release of Ceph as well as security fixes from the Operating System.","title":"Ceph container images"},{"location":"CRDs/Cluster/ceph-cluster-crd/#mon-settings","text":"count : Set the number of mons to be started. The number must be between 1 and 9 . The recommended value is most commonly 3 . For highest availability, an odd number of mons should be specified. For higher durability in case of mon loss, an even number can be specified although availability may be lower. To maintain quorum a majority of mons must be up. For example, if there are three mons, two must be up. If there are four mons, three must be up. If there are two mons, both must be up. If quorum is lost, see the disaster recovery guide to restore quorum from a single mon. allowMultiplePerNode : Whether to allow the placement of multiple mons on a single node. Default is false for production. Should only be set to true in test environments. volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . failureDomainLabel : The label that is expected on each node where the mons are expected to be deployed. The labels must be found in the list of well-known topology labels . zones : The failure domain names where the Mons are expected to be deployed. There must be at least three zones specified in the list. Each zone can be backed by a different storage class by specifying the volumeClaimTemplate . name : The name of the zone, which is the value of the domain label. volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . stretchCluster : The stretch cluster settings that define the zones (or other failure domain labels) across which to configure the cluster. failureDomainLabel : The label that is expected on each node where the cluster is expected to be deployed. The labels must be found in the list of well-known topology labels . subFailureDomain : With a zone, the data replicas must be spread across OSDs in the subFailureDomain. The default is host . zones : The failure domain names where the Mons and OSDs are expected to be deployed. There must be three zones specified in the list. This element is always named zone even if a non-default failureDomainLabel is specified. The elements have two values: name : The name of the zone, which is the value of the domain label. arbiter : Whether the zone is expected to be the arbiter zone which only runs a single mon. Exactly one zone must be labeled true . volumeClaimTemplate : A PersistentVolumeSpec used by Rook to create PVCs for monitor storage. This field is optional, and when not provided, HostPath volume mounts are used. The current set of fields from template that are used are storageClassName and the storage resource request and limit. The default storage size request for new PVCs is 10Gi . Ensure that associated storage class is configured to use volumeBindingMode: WaitForFirstConsumer . This setting only applies to new monitors that are created when the requested number of monitors increases, or when a monitor fails and is recreated. An example CRD configuration is provided below . The two zones that are not the arbiter zone are expected to have OSDs deployed. If these settings are changed in the CRD the operator will update the number of mons during a periodic check of the mon health, which by default is every 45 seconds. To change the defaults that the operator uses to determine the mon health and whether to failover a mon, refer to the health settings . The intervals should be small enough that you have confidence the mons will maintain quorum, while also being long enough to ignore network blips where mons are failed over too often.","title":"Mon Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#mgr-settings","text":"You can use the cluster CR to enable or disable any manager module. This can be configured like so: 1 2 3 4 mgr : modules : - name :  enabled : true Some modules will have special configuration to ensure the module is fully functional after being enabled. Specifically: pg_autoscaler : Rook will configure all new pools with PG autoscaling by setting: osd_pool_default_pg_autoscale_mode = on","title":"Mgr Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#network-configuration-settings","text":"If not specified, the default SDN will be used. Configure the network that will be enabled for the cluster and services. provider : Specifies the network provider that will be used to connect the network interface. You can choose between host , and multus . selectors : List the network selector(s) that will be used associated by a key. ipFamily : Specifies the network stack Ceph daemons should listen on. dualStack : Specifies that Ceph daemon should listen on both IPv4 and IPv6 network stacks. connections : Settings for network connections using Ceph's msgr2 protocol requireMsgr2 : Whether to require communication over msgr2. If true, the msgr v1 port (6789) will be disabled and clients will be required to connect to the Ceph cluster with the v2 port (3300). Requires a kernel that supports msgr2 (kernel 5.11 or CentOS 8.4 or newer). Default is false. encryption : Settings for encryption on the wire to Ceph daemons enabled : Whether to encrypt the data in transit across the wire to prevent eavesdropping the data on the network. The default is false. When encryption is enabled, all communication between clients and Ceph daemons, or between Ceph daemons will be encrypted. When encryption is not enabled, clients still establish a strong initial authentication and data integrity is still validated with a crc check. IMPORTANT : Encryption requires the 5.11 kernel for the latest nbd and cephfs drivers. Alternatively for testing only, set \"mounter: rbd-nbd\" in the rbd storage class, or \"mounter: fuse\" in the cephfs storage class. The nbd and fuse drivers are not recommended in production since restarting the csi driver pod will disconnect the volumes. If this setting is enabled, CephFS volumes also require setting CSI_CEPHFS_KERNEL_MOUNT_OPTIONS to \"ms_mode=secure\" in operator.yaml. compression : enabled : Whether to compress the data in transit across the wire. The default is false. Requires Ceph Quincy (v17) or newer. Also see the kernel requirements above for encryption. Caution Changing networking configuration after a Ceph cluster has been deployed is NOT supported and will result in a non-functioning cluster.","title":"Network Configuration Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#host-networking","text":"To use host networking, set provider: host . If the host networking setting is changed in a cluster where mons are already running, the existing mons will remain running with the same network settings with which they were created. To complete the conversion to or from host networking after you update this setting, you will need to failover the mons in order to have mons on the desired network configuration.","title":"Host Networking"},{"location":"CRDs/Cluster/ceph-cluster-crd/#multus","text":"Rook supports addition of public and cluster network for ceph using Multus The selector keys are required to be public and cluster where each represent: public : client communications with the cluster (reads/writes) cluster : internal Ceph replication network If you want to learn more, please read: Ceph Networking reference . Multus documentation Based on the configuration, the operator will do the following: If only the public selector is specified, all communication will happen on that network 1 2 3 4 network : provider : multus selectors : public : rook-ceph/rook-public-nw If only the cluster selector is specified, the internal cluster traffic* will happen on that network. All other traffic to mons, OSDs, and other daemons will be on the default network. 1 2 3 4 network : provider : multus selectors : cluster : rook-ceph/rook-cluster-nw If both public and cluster selectors are specified the first one will run all the communication network and the second the internal cluster network* 1 2 3 4 5 network : provider : multus selectors : public : rook-ceph/rook-public-nw cluster : rook-ceph/rook-cluster-nw * Internal cluster traffic includes OSD heartbeats, data replication, and data recovery Only OSD pods will have both Public and Cluster networks attached. The rest of the Ceph component pods and CSI pods will only have the Public network attached. Rook Ceph operator will not have any networks attached as it proxies the required commands via a sidecar container in the mgr pod. In order to work, each selector value must match a NetworkAttachmentDefinition object name in Multus. For multus network provider, an already working cluster with Multus networking is required. Network attachment definition that later will be attached to the cluster needs to be created before the Cluster CRD. The Network attachment definitions should be using whereabouts cni. If Rook cannot find the provided Network attachment definition it will fail running the Ceph OSD pods. You can add the Multus network attachment selection annotation selecting the created network attachment definition on selectors . A valid NetworkAttachmentDefinition will look like following: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 apiVersion : \"k8s.cni.cncf.io/v1\" kind : NetworkAttachmentDefinition metadata : name : rook-public-nw spec : config : '{ \"cniVersion\": \"0.3.0\", \"name\": \"public-nad\", \"type\": \"macvlan\", \"master\": \"ens5\", \"mode\": \"bridge\", \"ipam\": { \"type\": \"whereabouts\", \"range\": \"192.168.1.0/24\" } }' Ensure that master matches the network interface of the host that you want to use. IPAM type whereabouts is required because it makes sure that all the pods get a unique IP address from the multus network. The NetworkAttachmentDefinition should be referenced along with the namespace in which it is present like public: / . e.g., the network attachment definition are in default namespace: 1 2 public : default/rook-public-nw cluster : default/rook-cluster-nw 1 2 * This format is required in order to use the NetworkAttachmentDefinition across namespaces. * In Openshift, to use a NetworkAttachmentDefinition (NAD) across namespaces, the NAD must be deployed in the `default` namespace. The NAD is then referenced with the namespace: `default/rook-public-nw`","title":"Multus"},{"location":"CRDs/Cluster/ceph-cluster-crd/#validating-multus-configuration","text":"We highly recommend validating your Multus configuration before you install Rook. A tool exists to facilitate validating the Multus configuration. After installing the Rook operator and before installing any Custom Resources, run the tool from the operator pod. The tool's CLI is designed to be as helpful as possible. Get help text for the multus validation tool like so: 1 kubectl --namespace rook-ceph exec -it deploy/rook-ceph-operator -- rook multus validation run --help Then, update the args in the multus-validation job template. Minimally, add the NAD names(s) for public and/or cluster as needed and and then, create the job to validate the Multus configuration. If the tool fails, it will suggest what things may be preventing Multus networks from working properly, and it will request the logs and outputs that will help debug issues. Check the logs of the pod created by the job to know the status of the validation test.","title":"Validating Multus configuration"},{"location":"CRDs/Cluster/ceph-cluster-crd/#known-limitations-with-multus","text":"Daemons leveraging Kubernetes service IPs (Monitors, Managers, Rados Gateways) are not listening on the NAD specified in the selectors . Instead the daemon listens on the default network, however the NAD is attached to the container, allowing the daemon to communicate with the rest of the cluster. There is work in progress to fix this issue in the multus-service repository. At the time of writing it's unclear when this will be supported.","title":"Known limitations with Multus"},{"location":"CRDs/Cluster/ceph-cluster-crd/#ipfamily","text":"Provide single-stack IPv4 or IPv6 protocol to assign corresponding addresses to pods and services. This field is optional. Possible inputs are IPv6 and IPv4. Empty value will be treated as IPv4. Kubernetes version should be at least v1.13 to run IPv6. Dual-stack is supported as of ceph Pacific. To turn on dual stack see the network configuration section .","title":"IPFamily"},{"location":"CRDs/Cluster/ceph-cluster-crd/#node-settings","text":"In addition to the cluster level settings specified above, each individual node can also specify configuration to override the cluster level settings and defaults. If a node does not specify any configuration then it will inherit the cluster level settings. name : The name of the node, which should match its kubernetes.io/hostname label. config : Config settings applied to all OSDs on the node unless overridden by devices . See the config settings below. storage selection settings When useAllNodes is set to true , Rook attempts to make Ceph cluster management as hands-off as possible while still maintaining reasonable data safety. If a usable node comes online, Rook will begin to use it automatically. To maintain a balance between hands-off usability and data safety, Nodes are removed from Ceph as OSD hosts only (1) if the node is deleted from Kubernetes itself or (2) if the node has its taints or affinities modified in such a way that the node is no longer usable by Rook. Any changes to taints or affinities, intentional or unintentional, may affect the data reliability of the Ceph cluster. In order to help protect against this somewhat, deletion of nodes by taint or affinity modifications must be \"confirmed\" by deleting the Rook Ceph operator pod and allowing the operator deployment to restart the pod. For production clusters, we recommend that useAllNodes is set to false to prevent the Ceph cluster from suffering reduced data reliability unintentionally due to a user mistake. When useAllNodes is set to false , Rook relies on the user to be explicit about when nodes are added to or removed from the Ceph cluster. Nodes are only added to the Ceph cluster if the node is added to the Ceph cluster resource. Similarly, nodes are only removed if the node is removed from the Ceph cluster resource.","title":"Node Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#node-updates","text":"Nodes can be added and removed over time by updating the Cluster CRD, for example with kubectl -n rook-ceph edit cephcluster rook-ceph . This will bring up your default text editor and allow you to add and remove storage nodes from the cluster. This feature is only available when useAllNodes has been set to false .","title":"Node Updates"},{"location":"CRDs/Cluster/ceph-cluster-crd/#storage-selection-settings","text":"Below are the settings for host-based cluster. This type of cluster can specify devices for OSDs, both at the cluster and individual node level, for selecting which storage resources will be included in the cluster. useAllDevices : true or false , indicating whether all devices found on nodes in the cluster should be automatically consumed by OSDs. Not recommended unless you have a very controlled environment where you will not risk formatting of devices with existing data. When true , all devices and partitions will be used. Is overridden by deviceFilter if specified. LVM logical volumes are not picked by useAllDevices . deviceFilter : A regular expression for short kernel names of devices (e.g. sda ) that allows selection of devices and partitions to be consumed by OSDs. LVM logical volumes are not picked by deviceFilter .If individual devices have been specified for a node then this filter will be ignored. This field uses golang regular expression syntax . For example: sdb : Only selects the sdb device if found ^sd. : Selects all devices starting with sd ^sd[a-d] : Selects devices starting with sda , sdb , sdc , and sdd if found ^s : Selects all devices that start with s ^[^r] : Selects all devices that do not start with r devicePathFilter : A regular expression for device paths (e.g. /dev/disk/by-path/pci-0:1:2:3-scsi-1 ) that allows selection of devices and partitions to be consumed by OSDs. LVM logical volumes are not picked by devicePathFilter .If individual devices or deviceFilter have been specified for a node then this filter will be ignored. This field uses golang regular expression syntax . For example: ^/dev/sd. : Selects all devices starting with sd ^/dev/disk/by-path/pci-.* : Selects all devices which are connected to PCI bus devices : A list of individual device names belonging to this node to include in the storage cluster. name : The name of the devices and partitions (e.g., sda ). The full udev path can also be specified for devices, partitions, and logical volumes (e.g. /dev/disk/by-id/ata-ST4000DM004-XXXX - this will not change after reboots). config : Device-specific config settings. See the config settings below Host-based cluster supports raw device, partition, and logical volume. Be sure to see the quickstart doc prerequisites for additional considerations. Below are the settings for a PVC-based cluster. storageClassDeviceSets : Explained in Storage Class Device Sets","title":"Storage Selection Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#storage-class-device-sets","text":"The following are the settings for Storage Class Device Sets which can be configured to create OSDs that are backed by block mode PVs. name : A name for the set. count : The number of devices in the set. resources : The CPU and RAM requests/limits for the devices. (Optional) placement : The placement criteria for the devices. (Optional) Default is no placement criteria. The syntax is the same as for other placement configuration . It supports nodeAffinity , podAffinity , podAntiAffinity and tolerations keys. It is recommended to configure the placement such that the OSDs will be as evenly spread across nodes as possible. At a minimum, anti-affinity should be added so at least one OSD will be placed on each available nodes. However, if there are more OSDs than nodes, this anti-affinity will not be effective. Another placement scheme to consider is to add labels to the nodes in such a way that the OSDs can be grouped on those nodes, create multiple storageClassDeviceSets, and add node affinity to each of the device sets that will place the OSDs in those sets of nodes. Rook will automatically add required nodeAffinity to the OSD daemons to match the topology labels that are found on the nodes where the OSD prepare jobs ran. To ensure data durability, the OSDs are required to run in the same topology that the Ceph CRUSH map expects. For example, if the nodes are labeled with rack topology labels, the OSDs will be constrained to a certain rack. Without the topology labels, Rook will not constrain the OSDs beyond what is required by the PVs, for example to run in the zone where provisioned. See the OSD Topology section for the related labels. preparePlacement : The placement criteria for the preparation of the OSD devices. Creating OSDs is a two-step process and the prepare job may require different placement than the OSD daemons. If the preparePlacement is not specified, the placement will instead be applied for consistent placement for the OSD prepare jobs and OSD deployments. The preparePlacement is only useful for portable OSDs in the device sets. OSDs that are not portable will be tied to the host where the OSD prepare job initially runs. For example, provisioning may require topology spread constraints across zones, but the OSD daemons may require constraints across hosts within the zones. portable : If true , the OSDs will be allowed to move between nodes during failover. This requires a storage class that supports portability (e.g. aws-ebs , but not the local storage provisioner). If false , the OSDs will be assigned to a node permanently. Rook will configure Ceph's CRUSH map to support the portability. tuneDeviceClass : For example, Ceph cannot detect AWS volumes as HDDs from the storage class \"gp2\", so you can improve Ceph performance by setting this to true. tuneFastDeviceClass : For example, Ceph cannot detect Azure disks as SSDs from the storage class \"managed-premium\", so you can improve Ceph performance by setting this to true.. volumeClaimTemplates : A list of PVC templates to use for provisioning the underlying storage devices. resources.requests.storage : The desired capacity for the underlying storage devices. storageClassName : The StorageClass to provision PVCs from. Default would be to use the cluster-default StorageClass. This StorageClass should provide a raw block device, multipath device, or logical volume. Other types are not supported. If you want to use logical volume, please see known issue of OSD on LV-backed PVC volumeMode : The volume mode to be set for the PVC. Which should be Block accessModes : The access mode for the PVC to be bound by OSD. schedulerName : Scheduler name for OSD pod placement. (Optional) encrypted : whether to encrypt all the OSDs in a given storageClassDeviceSet","title":"Storage Class Device Sets"},{"location":"CRDs/Cluster/ceph-cluster-crd/#osd-configuration-settings","text":"The following storage selection settings are specific to Ceph and do not apply to other backends. All variables are key-value pairs represented as strings. metadataDevice : Name of a device or lvm to use for the metadata of OSDs on each node. Performance can be improved by using a low latency device (such as SSD or NVMe) as the metadata device, while other spinning platter (HDD) devices on a node are used to store data. Provisioning will fail if the user specifies a metadataDevice but that device is not used as a metadata device by Ceph. Notably, ceph-volume will not use a device of the same device class (HDD, SSD, NVMe) as OSD devices for metadata, resulting in this failure. databaseSizeMB : The size in MB of a bluestore database. Include quotes around the size. walSizeMB : The size in MB of a bluestore write ahead log (WAL). Include quotes around the size. deviceClass : The CRUSH device class to use for this selection of storage devices. (By default, if a device's class has not already been set, OSDs will automatically set a device's class to either hdd , ssd , or nvme based on the hardware properties exposed by the Linux kernel.) These storage classes can then be used to select the devices backing a storage pool by specifying them as the value of the pool spec's deviceClass field . initialWeight : The initial OSD weight in TiB units. By default, this value is derived from OSD's capacity. primaryAffinity : The primary-affinity value of an OSD, within range [0, 1] (default: 1 ). osdsPerDevice **: The number of OSDs to create on each device. High performance devices such as NVMe can handle running multiple OSDs. If desired, this can be overridden for each node and each device. encryptedDevice **: Encrypt OSD volumes using dmcrypt (\"true\" or \"false\"). By default this option is disabled. See encryption for more information on encryption in Ceph. crushRoot : The value of the root CRUSH map label. The default is default . Generally, you should not need to change this. However, if any of your topology labels may have the value default , you need to change crushRoot to avoid conflicts, since CRUSH map values need to be unique.","title":"OSD Configuration Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#annotations-and-labels","text":"Annotations and Labels can be specified so that the Rook components will have those annotations / labels added to them. You can set annotations / labels for Rook components for the list of key value pairs: all : Set annotations / labels for all components except clusterMetadata . mgr : Set annotations / labels for MGRs mon : Set annotations / labels for mons osd : Set annotations / labels for OSDs prepareosd : Set annotations / labels for OSD Prepare Jobs monitoring : Set annotations / labels for service monitor crashcollector : Set annotations / labels for crash collectors clusterMetadata : Set annotations only to rook-ceph-mon-endpoints configmap and the rook-ceph-mon and rook-ceph-admin-keyring secrets. These annotations will not be merged with the all annotations. The common usage is for backing up these critical resources with kubed . Note the clusterMetadata annotation will not be merged with the all annotation. When other keys are set, all will be merged together with the specific component.","title":"Annotations and Labels"},{"location":"CRDs/Cluster/ceph-cluster-crd/#placement-configuration-settings","text":"Placement configuration for the cluster services. It includes the following keys: mgr , mon , arbiter , osd , prepareosd , cleanup , and all . Each service will have its placement configuration generated by merging the generic configuration under all with the most specific one (which will override any attributes). In stretch clusters, if the arbiter placement is specified, that placement will only be applied to the arbiter. Neither will the arbiter placement be merged with the all placement to allow the arbiter to be fully independent of other daemon placement. The remaining mons will still use the mon and/or all sections. Note Placement of OSD pods is controlled using the Storage Class Device Set , not the general placement configuration. A Placement configuration is specified (according to the kubernetes PodSpec) as: nodeAffinity : kubernetes NodeAffinity podAffinity : kubernetes PodAffinity podAntiAffinity : kubernetes PodAntiAffinity tolerations : list of kubernetes Toleration topologySpreadConstraints : kubernetes TopologySpreadConstraints If you use labelSelector for osd pods, you must write two rules both for rook-ceph-osd and rook-ceph-osd-prepare like the example configuration . It comes from the design that there are these two pods for an OSD. For more detail, see the osd design doc and the related issue . The Rook Ceph operator creates a Job called rook-ceph-detect-version to detect the full Ceph version used by the given cephVersion.image . The placement from the mon section is used for the Job except for the PodAntiAffinity field.","title":"Placement Configuration Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#placement-example","text":"To control where various services will be scheduled by kubernetes, use the placement configuration sections below. The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=storage-node . Specific node affinity and tolerations that only apply to the mon daemons in this example require the label role=storage-mon-node` and also tolerate the control plane taint. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false # enable the ceph dashboard for viewing cluster status dashboard : enabled : true placement : all : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - storage-node mon : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - storage-mon-node tolerations : - effect : NoSchedule key : node-role.kubernetes.io/control-plane operator : Exists","title":"Placement Example"},{"location":"CRDs/Cluster/ceph-cluster-crd/#cluster-wide-resources-configuration-settings","text":"Resources should be specified so that the Rook components are handled after Kubernetes Pod Quality of Service classes . This allows to keep Rook components running when for example a node runs out of memory and the Rook components are not killed depending on their Quality of Service class. You can set resource requests/limits for Rook components through the Resource Requirements/Limits structure in the following keys: mon : Set resource requests/limits for mons osd : Set resource requests/limits for OSDs. This key applies for all OSDs regardless of their device classes. In case of need to apply resource requests/limits for OSDs with particular device class use specific osd keys below. If the memory resource is declared Rook will automatically set the OSD configuration osd_memory_target to the same value. This aims to ensure that the actual OSD memory consumption is consistent with the OSD pods' resource declaration. osd- : Set resource requests/limits for OSDs on a specific device class. Rook will automatically detect hdd , ssd , or nvme device classes. Custom device classes can also be set. mgr : Set resource requests/limits for MGRs mgr-sidecar : Set resource requests/limits for the MGR sidecar, which is only created when mgr.count: 2 . The sidecar requires very few resources since it only executes every 15 seconds to query Ceph for the active mgr and update the mgr services if the active mgr changed. prepareosd : Set resource requests/limits for OSD prepare job crashcollector : Set resource requests/limits for crash. This pod runs wherever there is a Ceph pod running. It scrapes for Ceph daemon core dumps and sends them to the Ceph manager crash module so that core dumps are centralized and can be easily listed/accessed. You can read more about the Ceph Crash module . logcollector : Set resource requests/limits for the log collector. When enabled, this container runs as side-car to each Ceph daemons. cleanup : Set resource requests/limits for cleanup job, responsible for wiping cluster's data after uninstall exporter : Set resource requests/limits for Ceph exporter. In order to provide the best possible experience running Ceph in containers, Rook internally recommends minimum memory limits if resource limits are passed. If a user configures a limit or request value that is too low, Rook will still run the pod(s) and print a warning to the operator log. mon : 1024MB mgr : 512MB osd : 2048MB crashcollector : 60MB mgr-sidecar : 100MB limit, 40MB requests prepareosd : no limits (see the note) exporter : 128MB limit, 50MB requests Note We recommend not setting memory limits on the OSD prepare job to prevent OSD provisioning failure due to memory constraints. The OSD prepare job bursts memory usage during the OSD provisioning depending on the size of the device, typically 1-2Gi for large disks. The OSD prepare job only bursts a single time per OSD. All future runs of the OSD prepare job will detect the OSD is already provisioned and skip the provisioning. Hint The resources for MDS daemons are not configured in the Cluster. Refer to the Ceph Filesystem CRD instead.","title":"Cluster-wide Resources Configuration Settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#resource-requirementslimits","text":"For more information on resource requests/limits see the official Kubernetes documentation: Kubernetes - Managing Compute Resources for Containers requests : Requests for cpu or memory. cpu : Request for CPU (example: one CPU core 1 , 50% of one CPU core 500m ). memory : Limit for Memory (example: one gigabyte of memory 1Gi , half a gigabyte of memory 512Mi ). limits : Limits for cpu or memory. cpu : Limit for CPU (example: one CPU core 1 , 50% of one CPU core 500m ). memory : Limit for Memory (example: one gigabyte of memory 1Gi , half a gigabyte of memory 512Mi ). Warning Before setting resource requests/limits, please take a look at the Ceph documentation for recommendations for each component: Ceph - Hardware Recommendations .","title":"Resource Requirements/Limits"},{"location":"CRDs/Cluster/ceph-cluster-crd/#node-specific-resources-for-osds","text":"This example shows that you can override these requests/limits for OSDs per node when using useAllNodes: false in the node item in the nodes list. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false storage : useAllNodes : false nodes : - name : \"172.17.4.201\" resources : limits : cpu : \"2\" memory : \"4096Mi\" requests : cpu : \"2\" memory : \"4096Mi\"","title":"Node Specific Resources for OSDs"},{"location":"CRDs/Cluster/ceph-cluster-crd/#priority-class-names","text":"Priority class names can be specified so that the Rook components will have those priority class names added to them. You can set priority class names for Rook components for the list of key value pairs: all : Set priority class names for MGRs, Mons, OSDs, and crashcollectors. mgr : Set priority class names for MGRs. Examples default to system-cluster-critical. mon : Set priority class names for Mons. Examples default to system-node-critical. osd : Set priority class names for OSDs. Examples default to system-node-critical. crashcollector : Set priority class names for crashcollectors. The specific component keys will act as overrides to all .","title":"Priority Class Names"},{"location":"CRDs/Cluster/ceph-cluster-crd/#health-settings","text":"The Rook Ceph operator will monitor the state of the CephCluster on various components by default. The following CRD settings are available: healthCheck : main ceph cluster health monitoring section Currently three health checks are implemented: mon : health check on the ceph monitors, basically check whether monitors are members of the quorum. If after a certain timeout a given monitor has not joined the quorum back it will be failed over and replace by a new monitor. osd : health check on the ceph osds status : ceph health status check, periodically check the Ceph health state and reflects it in the CephCluster CR status field. The liveness probe and startup probe of each daemon can also be controlled via livenessProbe and startupProbe respectively. The settings are valid for mon , mgr and osd . Here is a complete example for both daemonHealth , livenessProbe , and startupProbe : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 healthCheck : daemonHealth : mon : disabled : false interval : 45s timeout : 600s osd : disabled : false interval : 60s status : disabled : false livenessProbe : mon : disabled : false mgr : disabled : false osd : disabled : false startupProbe : mon : disabled : false mgr : disabled : false osd : disabled : false The probe's timing values and thresholds (but not the probe itself) can also be overridden. For more info, refer to the Kubernetes documentation . For example, you could change the mgr probe by applying: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 healthCheck : startupProbe : mgr : disabled : false probe : initialDelaySeconds : 3 periodSeconds : 3 failureThreshold : 30 livenessProbe : mgr : disabled : false probe : initialDelaySeconds : 3 periodSeconds : 3 Changing the liveness probe is an advanced operation and should rarely be necessary. If you want to change these settings then modify the desired settings.","title":"Health settings"},{"location":"CRDs/Cluster/ceph-cluster-crd/#status","text":"The operator is regularly configuring and checking the health of the cluster. The results of the configuration and health checks can be seen in the status section of the CephCluster CR. 1 kubectl -n rook-ceph get CephCluster -o yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 [ ... ] status : ceph : health : HEALTH_OK lastChecked : \"2021-03-02T21:22:11Z\" capacity : bytesAvailable : 22530293760 bytesTotal : 25757220864 bytesUsed : 3226927104 lastUpdated : \"2021-03-02T21:22:11Z\" message : Cluster created successfully phase : Ready state : Created storage : deviceClasses : - name : hdd version : image : quay.io/ceph/ceph:v17.2.6 version : 16.2.6-0 conditions : - lastHeartbeatTime : \"2021-03-02T21:22:11Z\" lastTransitionTime : \"2021-03-02T21:21:09Z\" message : Cluster created successfully reason : ClusterCreated status : \"True\" type : Ready","title":"Status"},{"location":"CRDs/Cluster/ceph-cluster-crd/#ceph-status","text":"Ceph is constantly monitoring the health of the data plane and reporting back if there are any warnings or errors. If everything is healthy from Ceph's perspective, you will see HEALTH_OK . If Ceph reports any warnings or errors, the details will be printed to the status. If further troubleshooting is needed to resolve these issues, the toolbox will likely be needed where you can run ceph commands to find more details. The capacity of the cluster is reported, including bytes available, total, and used. The available space will be less that you may expect due to overhead in the OSDs.","title":"Ceph Status"},{"location":"CRDs/Cluster/ceph-cluster-crd/#conditions","text":"The conditions represent the status of the Rook operator. If the cluster is fully configured and the operator is stable, the Ready condition is raised with ClusterCreated reason and no other conditions. The cluster will remain in the Ready condition after the first successful configuration since it is expected the storage is consumable from this point on. If there are issues preventing the storage layer from working, they are expected to show as Ceph health errors. If the cluster is externally connected successfully, the Ready condition will have the reason ClusterConnected . If the operator is currently being configured or the operator is checking for update, there will be a Progressing condition. If there was a failure, the condition(s) status will be false and the message will give a summary of the error. See the operator log for more details.","title":"Conditions"},{"location":"CRDs/Cluster/ceph-cluster-crd/#other-status","text":"There are several other properties for the overall status including: message , phase , and state : A summary of the overall current state of the cluster, which is somewhat duplicated from the conditions for backward compatibility. storage.deviceClasses : The names of the types of storage devices that Ceph discovered in the cluster. These types will be ssd or hdd unless they have been overridden with the crushDeviceClass in the storageClassDeviceSets . version : The version of the Ceph image currently deployed.","title":"Other Status"},{"location":"CRDs/Cluster/ceph-cluster-crd/#osd-topology","text":"The topology of the cluster is important in production environments where you want your data spread across failure domains. The topology can be controlled by adding labels to the nodes. When the labels are found on a node at first OSD deployment, Rook will add them to the desired level in the CRUSH map . The complete list of labels in hierarchy order from highest to lowest is: 1 2 3 4 5 6 7 8 9 topology.kubernetes.io/region topology.kubernetes.io/zone topology.rook.io/datacenter topology.rook.io/room topology.rook.io/pod topology.rook.io/pdu topology.rook.io/row topology.rook.io/rack topology.rook.io/chassis For example, if the following labels were added to a node: 1 2 kubectl label node mynode topology.kubernetes.io/zone=zone1 kubectl label node mynode topology.rook.io/rack=zone1-rack1 These labels would result in the following hierarchy for OSDs on that node (this command can be run in the Rook toolbox): 1 2 3 4 5 6 7 8 $ ceph osd tree ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF -1 0.01358 root default -5 0.01358 zone zone1 -4 0.01358 rack rack1 -3 0.01358 host mynode 0 hdd 0.00679 osd.0 up 1.00000 1.00000 1 hdd 0.00679 osd.1 up 1.00000 1.00000 Ceph requires unique names at every level in the hierarchy (CRUSH map). For example, you cannot have two racks with the same name that are in different zones. Racks in different zones must be named uniquely. Note that the host is added automatically to the hierarchy by Rook. The host cannot be specified with a topology label. All topology labels are optional. Hint When setting the node labels prior to CephCluster creation, these settings take immediate effect. However, applying this to an already deployed CephCluster requires removing each node from the cluster first and then re-adding it with new configuration to take effect. Do this node by node to keep your data safe! Check the result with ceph osd tree from the Rook Toolbox . The OSD tree should display the hierarchy for the nodes that already have been re-added. To utilize the failureDomain based on the node labels, specify the corresponding option in the CephBlockPool 1 2 3 4 5 6 7 8 9 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : rack # this matches the topology labels on nodes replicated : size : 3 This configuration will split the replication of volumes across unique racks in the data center setup.","title":"OSD Topology"},{"location":"CRDs/Cluster/ceph-cluster-crd/#deleting-a-cephcluster","text":"During deletion of a CephCluster resource, Rook protects against accidental or premature destruction of user data by blocking deletion if there are any other Rook Ceph Custom Resources that reference the CephCluster being deleted. Rook will warn about which other resources are blocking deletion in three ways until all blocking resources are deleted: An event will be registered on the CephCluster resource A status condition will be added to the CephCluster resource An error will be added to the Rook Ceph operator log","title":"Deleting a CephCluster"},{"location":"CRDs/Cluster/ceph-cluster-crd/#cleanup-policy","text":"Rook has the ability to cleanup resources and data that were deployed when a CephCluster is removed. The policy settings indicate which data should be forcibly deleted and in what way the data should be wiped. The cleanupPolicy has several fields: confirmation : Only an empty string and yes-really-destroy-data are valid values for this field. If this setting is empty, the cleanupPolicy settings will be ignored and Rook will not cleanup any resources during cluster removal. To reinstall the cluster, the admin would then be required to follow the cleanup guide to delete the data on hosts. If this setting is yes-really-destroy-data , the operator will automatically delete the data on hosts. Because this cleanup policy is destructive, after the confirmation is set to yes-really-destroy-data Rook will stop configuring the cluster as if the cluster is about to be destroyed. sanitizeDisks : sanitizeDisks represents advanced settings that can be used to delete data on drives. method : indicates if the entire disk should be sanitized or simply ceph's metadata. Possible choices are quick (default) or complete dataSource : indicate where to get random bytes from to write on the disk. Possible choices are zero (default) or random . Using random sources will consume entropy from the system and will take much more time then the zero source iteration : overwrite N times instead of the default (1). Takes an integer value allowUninstallWithVolumes : If set to true, then the cephCluster deletion doesn't wait for the PVCs to be deleted. Default is false . To automate activation of the cleanup, you can use the following command. WARNING: DATA WILL BE PERMANENTLY DELETED : 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Nothing will happen until the deletion of the CR is requested, so this can still be reverted. However, all new configuration by the operator will be blocked with this cleanup policy enabled. Rook waits for the deletion of PVs provisioned using the cephCluster before proceeding to delete the cephCluster. To force deletion of the cephCluster without waiting for the PVs to be deleted, you can set the allowUninstallWithVolumes to true under spec.CleanupPolicy .","title":"Cleanup policy"},{"location":"CRDs/Cluster/external-cluster/","text":"An external cluster is a Ceph configuration that is managed outside of the local K8s cluster. The external cluster could be managed by cephadm, or it could be another Rook cluster that is configured to allow the access (usually configured with host networking). In external mode, Rook will provide the configuration for the CSI driver and other basic resources that allows your applications to connect to Ceph in the external cluster. External configuration \u00b6 Source cluster: The cluster providing the data, usually configured by cephadm Consumer cluster: The K8s cluster that will be consuming the external source cluster Prerequisites \u00b6 Create the desired types of storage in the provider Ceph cluster: RBD pools CephFS filesystem Commands on the source Ceph cluster \u00b6 In order to configure an external Ceph cluster with Rook, we need to extract some information in order to connect to that cluster. 1. Create all users and keys \u00b6 Run the python script create-external-cluster-resources.py for creating all users and keys. 1 python3 create-external-cluster-resources.py --rbd-data-pool-name  --cephfs-filesystem-name  --rgw-endpoint  --namespace  --format bash --namespace : Namespace where CephCluster will run, for example rook-ceph-external --format bash : The format of the output --rbd-data-pool-name : The name of the RBD data pool --alias-rbd-data-pool-name : Provides an alias for the RBD data pool name, necessary if a special character is present in the pool name such as a period or underscore --rgw-endpoint : (optional) The RADOS Gateway endpoint in the format : or : . --rgw-pool-prefix : (optional) The prefix of the RGW pools. If not specified, the default prefix is default --rgw-tls-cert-path : (optional) RADOS Gateway endpoint TLS certificate file path --rgw-skip-tls : (optional) Ignore TLS certification validation when a self-signed certificate is provided (NOT RECOMMENDED) --rbd-metadata-ec-pool-name : (optional) Provides the name of erasure coded RBD metadata pool, used for creating ECRBDStorageClass. --monitoring-endpoint : (optional) Ceph Manager prometheus exporter endpoints (comma separated list of entries of active and standby mgrs) --monitoring-endpoint-port : (optional) Ceph Manager prometheus exporter port --skip-monitoring-endpoint : (optional) Skip prometheus exporter endpoints, even if they are available. Useful if the prometheus module is not enabled --ceph-conf : (optional) Provide a Ceph conf file --keyring : (optional) Path to Ceph keyring file, to be used with --ceph-conf --cluster-name : (optional) Ceph cluster name --output : (optional) Output will be stored into the provided file --dry-run : (optional) Prints the executed commands without running them --run-as-user : (optional) Provides a user name to check the cluster's health status, must be prefixed by client . --cephfs-metadata-pool-name : (optional) Provides the name of the cephfs metadata pool --cephfs-filesystem-name : (optional) The name of the filesystem, used for creating CephFS StorageClass --cephfs-data-pool-name : (optional) Provides the name of the CephFS data pool, used for creating CephFS StorageClass --rados-namespace : (optional) Divides a pool into separate logical namespaces, used for creating RBD PVC in a RadosNamespaces --subvolume-group : (optional) Provides the name of the subvolume group, used for creating CephFS PVC in a subvolumeGroup --rgw-realm-name : (optional) Provides the name of the rgw-realm --rgw-zone-name : (optional) Provides the name of the rgw-zone --rgw-zonegroup-name : (optional) Provides the name of the rgw-zone-group --upgrade : (optional) Upgrades the 'Ceph CSI keyrings (For example: client.csi-cephfs-provisioner) with new permissions needed for the new cluster version and older permission will still be applied. --restricted-auth-permission : (optional) Restrict cephCSIKeyrings auth permissions to specific pools, and cluster. Mandatory flags that need to be set are --rbd-data-pool-name , and --cluster-name . --cephfs-filesystem-name flag can also be passed in case of CephFS user restriction, so it can restrict users to particular CephFS filesystem. --v2-port-enable : (optional) Enables the v2 mon port (3300) for mons. Multi-tenancy \u00b6 To enable multi-tenancy, run the script with the --restricted-auth-permission flag and pass the mandatory flags with it, It will generate the secrets which you can use for creating new Consumer cluster deployment using the same Source cluster (ceph cluster). So you would be running different isolated consumer clusters on top of single Source cluster . Note Restricting the csi-users per pool, and per cluster will require creating new csi-users and new secrets for that csi-users. So apply these secrets only to new Consumer cluster deployment while using the same Source cluster . 1 python3 create-external-cluster-resources.py --cephfs-filesystem-name  --rbd-data-pool-name  --cluster-name  --restricted-auth-permission true --format  --rgw-endpoint  --namespace  RGW Multisite \u00b6 Pass the --rgw-realm-name , --rgw-zonegroup-name and --rgw-zone-name flags to create the admin ops user in a master zone, zonegroup and realm. See the Multisite doc for creating a zone, zonegroup and realm. 1 python3 create-external-cluster-resources.py --rbd-data-pool-name  --format bash --rgw-endpoint  --rgw-realm-name > --rgw-zonegroup-name  --rgw-zone-name > Upgrade Example \u00b6 1) If consumer cluster doesn't have restricted caps, this will upgrade all the default csi-users (non-restricted): 1 python3 create-external-cluster-resources.py --upgrade 2) If the consumer cluster has restricted caps: Restricted users created using --restricted-auth-permission flag need to pass mandatory flags: ' --rbd-data-pool-name (if it is a rbd user), --cluster-name and --run-as-user ' flags while upgrading, in case of cephfs users if you have passed --cephfs-filesystem-name flag while creating csi-users then while upgrading it will be mandatory too. In this example the user would be client.csi-rbd-node-rookstorage-replicapool (following the pattern csi-user-clusterName-poolName ) 1 python3 create-external-cluster-resources.py --upgrade --rbd-data-pool-name replicapool --cluster-name rookstorage --run-as-user client.csi-rbd-node-rookstorage-replicapool Note An existing non-restricted user cannot be converted to a restricted user by upgrading. The upgrade flag should only be used to append new permissions to users. It shouldn't be used for changing a csi user already applied permissions. For example, you shouldn't change the pool(s) a user has access to. 2. Copy the bash output \u00b6 Example Output: 1 2 3 4 5 6 7 8 9 10 11 export ROOK_EXTERNAL_FSID=797f411a-aafe-11ec-a254-fa163e1539f5 export ROOK_EXTERNAL_USERNAME=client.healthchecker export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-rados-upstream-w4pdvq-node1-installer=10.0.210.83:6789 export ROOK_EXTERNAL_USER_SECRET=AQAdm0FilZDSJxAAMucfuu/j0ZYYP4Bia8Us+w== export ROOK_EXTERNAL_DASHBOARD_LINK=https://10.0.210.83:8443/ export CSI_RBD_NODE_SECRET=AQC1iDxip45JDRAAVahaBhKz1z0WW98+ACLqMQ== export CSI_RBD_PROVISIONER_SECRET=AQC1iDxiMM+LLhAA0PucjNZI8sG9Eh+pcvnWhQ== export MONITORING_ENDPOINT=10.0.210.83 export MONITORING_ENDPOINT_PORT=9283 export RBD_POOL_NAME=replicated_2g export RGW_POOL_PREFIX=default Commands on the K8s consumer cluster \u00b6 Import the Source Data \u00b6 Paste the above output from create-external-cluster-resources.py into your current shell to allow importing the source data. Run the import script. !!! note If your Rook cluster nodes are running a kernel earlier than or equivalent to 5.4, remove fast-diff,object-map,deep-flatten,exclusive-lock from the imageFeatures line. 1 2 3 ```console . import-external-cluster.sh ``` Helm Installation \u00b6 To install with Helm, the rook cluster helm chart will configure the necessary resources for the external cluster with the example values-external.yaml . 1 2 3 4 5 6 7 clusterNamespace=rook-ceph operatorNamespace=rook-ceph cd deploy/examples/charts/rook-ceph-cluster helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace $clusterNamespace rook-ceph rook-release/rook-ceph -f values.yaml helm install --create-namespace --namespace $clusterNamespace rook-ceph-cluster \\ --set operatorNamespace=$operatorNamespace rook-release/rook-ceph-cluster -f values-external.yaml Skip the manifest installation section and continue with Cluster Verification . Manifest Installation \u00b6 If not installing with Helm, here are the steps to install with manifests. Deploy Rook, create common.yaml , crds.yaml and operator.yaml manifests. Create common-external.yaml and cluster-external.yaml Cluster Verification \u00b6 Verify the consumer cluster is connected to the source ceph cluster: 1 2 3 $ kubectl -n rook-ceph-external get CephCluster NAME DATADIRHOSTPATH MONCOUNT AGE STATE HEALTH rook-ceph-external /var/lib/rook 162m Connected HEALTH_OK Verify the creation of the storage class depending on the rbd pools and filesystem provided. ceph-rbd and cephfs would be the respective names for the RBD and CephFS storage classes. 1 kubectl -n rook-ceph-external get sc Then you can now create a persistent volume based on these StorageClass. Connect to an External Object Store \u00b6 Create the object store resources: Create the external object store CR to configure connection to external gateways. Create an Object store user for credentials to access the S3 endpoint. Create a bucket storage class where a client can request creating buckets. Create the Object Bucket Claim , which will create an individual bucket for reading and writing objects. 1 2 3 4 5 cd deploy/examples kubectl create -f object-external.yaml kubectl create -f object-user.yaml kubectl create -f storageclass-bucket-delete.yaml kubectl create -f object-bucket-claim-delete.yaml Hint For more details see the Object Store topic Connect to v2 mon port \u00b6 If encryption or compression on the wire is needed, specify the --v2-port-enable flag. If the v2 address type is present in the ceph quorum_status , then the output of 'ceph mon data' i.e, ROOK_EXTERNAL_CEPH_MON_DATA will use the v2 port( 3300 ). Exporting Rook to another cluster \u00b6 If you have multiple K8s clusters running, and want to use the local rook-ceph cluster as the central storage, you can export the settings from this cluster with the following steps. 1) Copy create-external-cluster-resources.py into the directory /etc/ceph/ of the toolbox. 1 2 toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}') kubectl -n rook-ceph cp deploy/examples/create-external-cluster-resources.py $toolbox:/etc/ceph 2) Exec to the toolbox pod and execute create-external-cluster-resources.py with needed options to create required users and keys . Important For other clusters to connect to storage in this cluster, Rook must be configured with a networking configuration that is accessible from other clusters. Most commonly this is done by enabling host networking in the CephCluster CR so the Ceph daemons will be addressable by their host IPs.","title":"External Storage Cluster"},{"location":"CRDs/Cluster/external-cluster/#external-configuration","text":"Source cluster: The cluster providing the data, usually configured by cephadm Consumer cluster: The K8s cluster that will be consuming the external source cluster","title":"External configuration"},{"location":"CRDs/Cluster/external-cluster/#prerequisites","text":"Create the desired types of storage in the provider Ceph cluster: RBD pools CephFS filesystem","title":"Prerequisites"},{"location":"CRDs/Cluster/external-cluster/#commands-on-the-source-ceph-cluster","text":"In order to configure an external Ceph cluster with Rook, we need to extract some information in order to connect to that cluster.","title":"Commands on the source Ceph cluster"},{"location":"CRDs/Cluster/external-cluster/#1-create-all-users-and-keys","text":"Run the python script create-external-cluster-resources.py for creating all users and keys. 1 python3 create-external-cluster-resources.py --rbd-data-pool-name  --cephfs-filesystem-name  --rgw-endpoint  --namespace  --format bash --namespace : Namespace where CephCluster will run, for example rook-ceph-external --format bash : The format of the output --rbd-data-pool-name : The name of the RBD data pool --alias-rbd-data-pool-name : Provides an alias for the RBD data pool name, necessary if a special character is present in the pool name such as a period or underscore --rgw-endpoint : (optional) The RADOS Gateway endpoint in the format : or : . --rgw-pool-prefix : (optional) The prefix of the RGW pools. If not specified, the default prefix is default --rgw-tls-cert-path : (optional) RADOS Gateway endpoint TLS certificate file path --rgw-skip-tls : (optional) Ignore TLS certification validation when a self-signed certificate is provided (NOT RECOMMENDED) --rbd-metadata-ec-pool-name : (optional) Provides the name of erasure coded RBD metadata pool, used for creating ECRBDStorageClass. --monitoring-endpoint : (optional) Ceph Manager prometheus exporter endpoints (comma separated list of entries of active and standby mgrs) --monitoring-endpoint-port : (optional) Ceph Manager prometheus exporter port --skip-monitoring-endpoint : (optional) Skip prometheus exporter endpoints, even if they are available. Useful if the prometheus module is not enabled --ceph-conf : (optional) Provide a Ceph conf file --keyring : (optional) Path to Ceph keyring file, to be used with --ceph-conf --cluster-name : (optional) Ceph cluster name --output : (optional) Output will be stored into the provided file --dry-run : (optional) Prints the executed commands without running them --run-as-user : (optional) Provides a user name to check the cluster's health status, must be prefixed by client . --cephfs-metadata-pool-name : (optional) Provides the name of the cephfs metadata pool --cephfs-filesystem-name : (optional) The name of the filesystem, used for creating CephFS StorageClass --cephfs-data-pool-name : (optional) Provides the name of the CephFS data pool, used for creating CephFS StorageClass --rados-namespace : (optional) Divides a pool into separate logical namespaces, used for creating RBD PVC in a RadosNamespaces --subvolume-group : (optional) Provides the name of the subvolume group, used for creating CephFS PVC in a subvolumeGroup --rgw-realm-name : (optional) Provides the name of the rgw-realm --rgw-zone-name : (optional) Provides the name of the rgw-zone --rgw-zonegroup-name : (optional) Provides the name of the rgw-zone-group --upgrade : (optional) Upgrades the 'Ceph CSI keyrings (For example: client.csi-cephfs-provisioner) with new permissions needed for the new cluster version and older permission will still be applied. --restricted-auth-permission : (optional) Restrict cephCSIKeyrings auth permissions to specific pools, and cluster. Mandatory flags that need to be set are --rbd-data-pool-name , and --cluster-name . --cephfs-filesystem-name flag can also be passed in case of CephFS user restriction, so it can restrict users to particular CephFS filesystem. --v2-port-enable : (optional) Enables the v2 mon port (3300) for mons.","title":"1. Create all users and keys"},{"location":"CRDs/Cluster/external-cluster/#multi-tenancy","text":"To enable multi-tenancy, run the script with the --restricted-auth-permission flag and pass the mandatory flags with it, It will generate the secrets which you can use for creating new Consumer cluster deployment using the same Source cluster (ceph cluster). So you would be running different isolated consumer clusters on top of single Source cluster . Note Restricting the csi-users per pool, and per cluster will require creating new csi-users and new secrets for that csi-users. So apply these secrets only to new Consumer cluster deployment while using the same Source cluster . 1 python3 create-external-cluster-resources.py --cephfs-filesystem-name  --rbd-data-pool-name  --cluster-name  --restricted-auth-permission true --format  --rgw-endpoint  --namespace ","title":"Multi-tenancy"},{"location":"CRDs/Cluster/external-cluster/#rgw-multisite","text":"Pass the --rgw-realm-name , --rgw-zonegroup-name and --rgw-zone-name flags to create the admin ops user in a master zone, zonegroup and realm. See the Multisite doc for creating a zone, zonegroup and realm. 1 python3 create-external-cluster-resources.py --rbd-data-pool-name  --format bash --rgw-endpoint  --rgw-realm-name > --rgw-zonegroup-name  --rgw-zone-name >","title":"RGW Multisite"},{"location":"CRDs/Cluster/external-cluster/#upgrade-example","text":"1) If consumer cluster doesn't have restricted caps, this will upgrade all the default csi-users (non-restricted): 1 python3 create-external-cluster-resources.py --upgrade 2) If the consumer cluster has restricted caps: Restricted users created using --restricted-auth-permission flag need to pass mandatory flags: ' --rbd-data-pool-name (if it is a rbd user), --cluster-name and --run-as-user ' flags while upgrading, in case of cephfs users if you have passed --cephfs-filesystem-name flag while creating csi-users then while upgrading it will be mandatory too. In this example the user would be client.csi-rbd-node-rookstorage-replicapool (following the pattern csi-user-clusterName-poolName ) 1 python3 create-external-cluster-resources.py --upgrade --rbd-data-pool-name replicapool --cluster-name rookstorage --run-as-user client.csi-rbd-node-rookstorage-replicapool Note An existing non-restricted user cannot be converted to a restricted user by upgrading. The upgrade flag should only be used to append new permissions to users. It shouldn't be used for changing a csi user already applied permissions. For example, you shouldn't change the pool(s) a user has access to.","title":"Upgrade Example"},{"location":"CRDs/Cluster/external-cluster/#2-copy-the-bash-output","text":"Example Output: 1 2 3 4 5 6 7 8 9 10 11 export ROOK_EXTERNAL_FSID=797f411a-aafe-11ec-a254-fa163e1539f5 export ROOK_EXTERNAL_USERNAME=client.healthchecker export ROOK_EXTERNAL_CEPH_MON_DATA=ceph-rados-upstream-w4pdvq-node1-installer=10.0.210.83:6789 export ROOK_EXTERNAL_USER_SECRET=AQAdm0FilZDSJxAAMucfuu/j0ZYYP4Bia8Us+w== export ROOK_EXTERNAL_DASHBOARD_LINK=https://10.0.210.83:8443/ export CSI_RBD_NODE_SECRET=AQC1iDxip45JDRAAVahaBhKz1z0WW98+ACLqMQ== export CSI_RBD_PROVISIONER_SECRET=AQC1iDxiMM+LLhAA0PucjNZI8sG9Eh+pcvnWhQ== export MONITORING_ENDPOINT=10.0.210.83 export MONITORING_ENDPOINT_PORT=9283 export RBD_POOL_NAME=replicated_2g export RGW_POOL_PREFIX=default","title":"2. Copy the bash output"},{"location":"CRDs/Cluster/external-cluster/#commands-on-the-k8s-consumer-cluster","text":"","title":"Commands on the K8s consumer cluster"},{"location":"CRDs/Cluster/external-cluster/#import-the-source-data","text":"Paste the above output from create-external-cluster-resources.py into your current shell to allow importing the source data. Run the import script. !!! note If your Rook cluster nodes are running a kernel earlier than or equivalent to 5.4, remove fast-diff,object-map,deep-flatten,exclusive-lock from the imageFeatures line. 1 2 3 ```console . import-external-cluster.sh ```","title":"Import the Source Data"},{"location":"CRDs/Cluster/external-cluster/#helm-installation","text":"To install with Helm, the rook cluster helm chart will configure the necessary resources for the external cluster with the example values-external.yaml . 1 2 3 4 5 6 7 clusterNamespace=rook-ceph operatorNamespace=rook-ceph cd deploy/examples/charts/rook-ceph-cluster helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace $clusterNamespace rook-ceph rook-release/rook-ceph -f values.yaml helm install --create-namespace --namespace $clusterNamespace rook-ceph-cluster \\ --set operatorNamespace=$operatorNamespace rook-release/rook-ceph-cluster -f values-external.yaml Skip the manifest installation section and continue with Cluster Verification .","title":"Helm Installation"},{"location":"CRDs/Cluster/external-cluster/#manifest-installation","text":"If not installing with Helm, here are the steps to install with manifests. Deploy Rook, create common.yaml , crds.yaml and operator.yaml manifests. Create common-external.yaml and cluster-external.yaml","title":"Manifest Installation"},{"location":"CRDs/Cluster/external-cluster/#cluster-verification","text":"Verify the consumer cluster is connected to the source ceph cluster: 1 2 3 $ kubectl -n rook-ceph-external get CephCluster NAME DATADIRHOSTPATH MONCOUNT AGE STATE HEALTH rook-ceph-external /var/lib/rook 162m Connected HEALTH_OK Verify the creation of the storage class depending on the rbd pools and filesystem provided. ceph-rbd and cephfs would be the respective names for the RBD and CephFS storage classes. 1 kubectl -n rook-ceph-external get sc Then you can now create a persistent volume based on these StorageClass.","title":"Cluster Verification"},{"location":"CRDs/Cluster/external-cluster/#connect-to-an-external-object-store","text":"Create the object store resources: Create the external object store CR to configure connection to external gateways. Create an Object store user for credentials to access the S3 endpoint. Create a bucket storage class where a client can request creating buckets. Create the Object Bucket Claim , which will create an individual bucket for reading and writing objects. 1 2 3 4 5 cd deploy/examples kubectl create -f object-external.yaml kubectl create -f object-user.yaml kubectl create -f storageclass-bucket-delete.yaml kubectl create -f object-bucket-claim-delete.yaml Hint For more details see the Object Store topic","title":"Connect to an External Object Store"},{"location":"CRDs/Cluster/external-cluster/#connect-to-v2-mon-port","text":"If encryption or compression on the wire is needed, specify the --v2-port-enable flag. If the v2 address type is present in the ceph quorum_status , then the output of 'ceph mon data' i.e, ROOK_EXTERNAL_CEPH_MON_DATA will use the v2 port( 3300 ).","title":"Connect to v2 mon port"},{"location":"CRDs/Cluster/external-cluster/#exporting-rook-to-another-cluster","text":"If you have multiple K8s clusters running, and want to use the local rook-ceph cluster as the central storage, you can export the settings from this cluster with the following steps. 1) Copy create-external-cluster-resources.py into the directory /etc/ceph/ of the toolbox. 1 2 toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}') kubectl -n rook-ceph cp deploy/examples/create-external-cluster-resources.py $toolbox:/etc/ceph 2) Exec to the toolbox pod and execute create-external-cluster-resources.py with needed options to create required users and keys . Important For other clusters to connect to storage in this cluster, Rook must be configured with a networking configuration that is accessible from other clusters. Most commonly this is done by enabling host networking in the CephCluster CR so the Ceph daemons will be addressable by their host IPs.","title":"Exporting Rook to another cluster"},{"location":"CRDs/Cluster/host-cluster/","text":"A host storage cluster is one where Rook configures Ceph to store data directly on the host. The Ceph mons will store the metadata on the host (at a path defined by the dataDirHostPath ), and the OSDs will consume raw devices or partitions. The Ceph persistent data is stored directly on a host path (Ceph Mons) and on raw devices (Ceph OSDs). To get you started, here are several example of the Cluster CR to configure the host. All Devices \u00b6 For the simplest possible configuration, this example shows that all devices or partitions should be consumed by Ceph. The mons will store the metadata on the host node under /var/lib/rook . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : # see the \"Cluster Settings\" section below for more details on which image of ceph to run image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false storage : useAllNodes : true useAllDevices : true Node and Device Filters \u00b6 More commonly, you will want to be more specific about which nodes and devices where Rook should configure the storage. The placement settings are very flexible to add node affinity, anti-affinity, or tolerations. For more options, see the placement documentation . In this example, Rook will only configure Ceph daemons to run on nodes that are labeled with role=rook-node , and more specifically the OSDs will only be created on nodes labeled with role=rook-osd-node . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false dashboard : enabled : true # cluster level storage configuration and selection storage : useAllNodes : true useAllDevices : true # Only create OSDs on devices that match the regular expression filter, \"sdb\" in this example deviceFilter : sdb # To control where various services will be scheduled by kubernetes, use the placement configuration sections below. # The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=rook-node' and # the OSDs would specifically only be created on nodes labeled with roke=rook-osd-node. placement : all : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - rook-node osd : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - rook-osd-node Specific Nodes and Devices \u00b6 If you need fine-grained control for every node and every device that is being configured, individual nodes and their config can be specified. In this example, we see that specific node names and devices can be specified. Hint Each node's 'name' field should match their 'kubernetes.io/hostname' label. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false dashboard : enabled : true # cluster level storage configuration and selection storage : useAllNodes : false useAllDevices : false deviceFilter : config : metadataDevice : databaseSizeMB : \"1024\" # this value can be removed for environments with normal sized disks (100 GB or larger) nodes : - name : \"172.17.4.201\" devices : # specific devices to use for storage can be specified for each node - name : \"sdb\" # Whole storage device - name : \"sdc1\" # One specific partition. Should not have a file system on it. - name : \"/dev/disk/by-id/ata-ST4000DM004-XXXX\" # both device name and explicit udev links are supported config : # configuration can be specified at the node level which overrides the cluster level config - name : \"172.17.4.301\" deviceFilter : \"^sd.\"","title":"Host Storage Cluster"},{"location":"CRDs/Cluster/host-cluster/#all-devices","text":"For the simplest possible configuration, this example shows that all devices or partitions should be consumed by Ceph. The mons will store the metadata on the host node under /var/lib/rook . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : # see the \"Cluster Settings\" section below for more details on which image of ceph to run image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false storage : useAllNodes : true useAllDevices : true","title":"All Devices"},{"location":"CRDs/Cluster/host-cluster/#node-and-device-filters","text":"More commonly, you will want to be more specific about which nodes and devices where Rook should configure the storage. The placement settings are very flexible to add node affinity, anti-affinity, or tolerations. For more options, see the placement documentation . In this example, Rook will only configure Ceph daemons to run on nodes that are labeled with role=rook-node , and more specifically the OSDs will only be created on nodes labeled with role=rook-osd-node . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false dashboard : enabled : true # cluster level storage configuration and selection storage : useAllNodes : true useAllDevices : true # Only create OSDs on devices that match the regular expression filter, \"sdb\" in this example deviceFilter : sdb # To control where various services will be scheduled by kubernetes, use the placement configuration sections below. # The example under 'all' would have all services scheduled on kubernetes nodes labeled with 'role=rook-node' and # the OSDs would specifically only be created on nodes labeled with roke=rook-osd-node. placement : all : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - rook-node osd : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : role operator : In values : - rook-osd-node","title":"Node and Device Filters"},{"location":"CRDs/Cluster/host-cluster/#specific-nodes-and-devices","text":"If you need fine-grained control for every node and every device that is being configured, individual nodes and their config can be specified. In this example, we see that specific node names and devices can be specified. Hint Each node's 'name' field should match their 'kubernetes.io/hostname' label. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false dashboard : enabled : true # cluster level storage configuration and selection storage : useAllNodes : false useAllDevices : false deviceFilter : config : metadataDevice : databaseSizeMB : \"1024\" # this value can be removed for environments with normal sized disks (100 GB or larger) nodes : - name : \"172.17.4.201\" devices : # specific devices to use for storage can be specified for each node - name : \"sdb\" # Whole storage device - name : \"sdc1\" # One specific partition. Should not have a file system on it. - name : \"/dev/disk/by-id/ata-ST4000DM004-XXXX\" # both device name and explicit udev links are supported config : # configuration can be specified at the node level which overrides the cluster level config - name : \"172.17.4.301\" deviceFilter : \"^sd.\"","title":"Specific Nodes and Devices"},{"location":"CRDs/Cluster/pvc-cluster/","text":"In a \"PVC-based cluster\", the Ceph persistent data is stored on volumes requested from a storage class of your choice. This type of cluster is recommended in a cloud environment where volumes can be dynamically created and also in clusters where a local PV provisioner is available. AWS Storage Example \u00b6 In this example, the mon and OSD volumes are provisioned from the AWS gp2 storage class. This storage class can be replaced by any storage class that provides file mode (for mons) and block mode (for OSDs). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : gp2 resources : requests : storage : 10Gi storage : storageClassDeviceSets : - name : set1 count : 3 portable : false encrypted : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce onlyApplyOSDPlacement : false Local Storage Example \u00b6 In the CRD specification below, 3 OSDs (having specific placement and resource values) and 3 mons with each using a 10Gi PVC, are created by Rook using the local-storage storage class. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : local-storage resources : requests : storage : 10Gi cephVersion : image : quay.io/ceph/ceph:v17.2.6 allowUnsupported : false dashboard : enabled : true network : hostNetwork : false storage : storageClassDeviceSets : - name : set1 count : 3 portable : false resources : limits : cpu : \"500m\" memory : \"4Gi\" requests : cpu : \"500m\" memory : \"4Gi\" placement : podAntiAffinity : preferredDuringSchedulingIgnoredDuringExecution : - weight : 100 podAffinityTerm : labelSelector : matchExpressions : - key : \"rook.io/cluster\" operator : In values : - cluster1 topologyKey : \"topology.kubernetes.io/zone\" volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi storageClassName : local-storage volumeMode : Block accessModes : - ReadWriteOnce PVC storage only for monitors \u00b6 In the CRD specification below three monitors are created each using a 10Gi PVC created by Rook using the local-storage storage class. Even while the mons consume PVCs, the OSDs in this example will still consume raw devices on the host. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : local-storage resources : requests : storage : 10Gi dashboard : enabled : true storage : useAllNodes : true useAllDevices : true Dedicated metadata and wal device for OSD on PVC \u00b6 In the simplest case, Ceph OSD BlueStore consumes a single (primary) storage device. BlueStore is the engine used by the OSD to store data. The storage device is normally used as a whole, occupying the full device that is managed directly by BlueStore. It is also possible to deploy BlueStore across additional devices such as a DB device. This device can be used for storing BlueStore\u2019s internal metadata. BlueStore (or rather, the embedded RocksDB) will put as much metadata as it can on the DB device to improve performance. If the DB device fills up, metadata will spill back onto the primary device (where it would have been otherwise). Again, it is only helpful to provision a DB device if it is faster than the primary device. You can have multiple volumeClaimTemplates where each might either represent a device or a metadata device. An example of the storage section when specifying the metadata device is: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce Note Note that Rook only supports three naming convention for a given template: \"data\": represents the main OSD block device, where your data is being stored. \"metadata\": represents the metadata (including block.db and block.wal) device used to store the Ceph Bluestore database for an OSD. \"wal\": represents the block.wal device used to store the Ceph Bluestore database for an OSD. If this device is set, \"metadata\" device will refer specifically to block.db device. It is recommended to use a faster storage class for the metadata or wal device, with a slower device for the data. Otherwise, having a separate metadata device will not improve the performance. The bluestore partition has the following reference combinations supported by the ceph-volume utility: A single \"data\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device and a \"metadata\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device and a \"wal\" device. A WAL device can be used for BlueStore\u2019s internal journal or write-ahead log (block.wal), it is only useful to use a WAL device if the device is faster than the primary device (data device). There is no separate \"metadata\" device in this case, the data of main OSD block and block.db located in \"data\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : wal spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device, a \"metadata\" device and a \"wal\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : wal spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce To determine the size of the metadata block follow the official Ceph sizing guide . With the present configuration, each OSD will have its main block allocated a 10GB device as well a 5GB device to act as a bluestore database.","title":"PVC Storage Cluster"},{"location":"CRDs/Cluster/pvc-cluster/#aws-storage-example","text":"In this example, the mon and OSD volumes are provisioned from the AWS gp2 storage class. This storage class can be replaced by any storage class that provides file mode (for mons) and block mode (for OSDs). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : gp2 resources : requests : storage : 10Gi storage : storageClassDeviceSets : - name : set1 count : 3 portable : false encrypted : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce onlyApplyOSDPlacement : false","title":"AWS Storage Example"},{"location":"CRDs/Cluster/pvc-cluster/#local-storage-example","text":"In the CRD specification below, 3 OSDs (having specific placement and resource values) and 3 mons with each using a 10Gi PVC, are created by Rook using the local-storage storage class. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : local-storage resources : requests : storage : 10Gi cephVersion : image : quay.io/ceph/ceph:v17.2.6 allowUnsupported : false dashboard : enabled : true network : hostNetwork : false storage : storageClassDeviceSets : - name : set1 count : 3 portable : false resources : limits : cpu : \"500m\" memory : \"4Gi\" requests : cpu : \"500m\" memory : \"4Gi\" placement : podAntiAffinity : preferredDuringSchedulingIgnoredDuringExecution : - weight : 100 podAffinityTerm : labelSelector : matchExpressions : - key : \"rook.io/cluster\" operator : In values : - cluster1 topologyKey : \"topology.kubernetes.io/zone\" volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi storageClassName : local-storage volumeMode : Block accessModes : - ReadWriteOnce","title":"Local Storage Example"},{"location":"CRDs/Cluster/pvc-cluster/#pvc-storage-only-for-monitors","text":"In the CRD specification below three monitors are created each using a 10Gi PVC created by Rook using the local-storage storage class. Even while the mons consume PVCs, the OSDs in this example will still consume raw devices on the host. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : cephVersion : image : quay.io/ceph/ceph:v17.2.6 dataDirHostPath : /var/lib/rook mon : count : 3 allowMultiplePerNode : false volumeClaimTemplate : spec : storageClassName : local-storage resources : requests : storage : 10Gi dashboard : enabled : true storage : useAllNodes : true useAllDevices : true","title":"PVC storage only for monitors"},{"location":"CRDs/Cluster/pvc-cluster/#dedicated-metadata-and-wal-device-for-osd-on-pvc","text":"In the simplest case, Ceph OSD BlueStore consumes a single (primary) storage device. BlueStore is the engine used by the OSD to store data. The storage device is normally used as a whole, occupying the full device that is managed directly by BlueStore. It is also possible to deploy BlueStore across additional devices such as a DB device. This device can be used for storing BlueStore\u2019s internal metadata. BlueStore (or rather, the embedded RocksDB) will put as much metadata as it can on the DB device to improve performance. If the DB device fills up, metadata will spill back onto the primary device (where it would have been otherwise). Again, it is only helpful to provision a DB device if it is faster than the primary device. You can have multiple volumeClaimTemplates where each might either represent a device or a metadata device. An example of the storage section when specifying the metadata device is: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce Note Note that Rook only supports three naming convention for a given template: \"data\": represents the main OSD block device, where your data is being stored. \"metadata\": represents the metadata (including block.db and block.wal) device used to store the Ceph Bluestore database for an OSD. \"wal\": represents the block.wal device used to store the Ceph Bluestore database for an OSD. If this device is set, \"metadata\" device will refer specifically to block.db device. It is recommended to use a faster storage class for the metadata or wal device, with a slower device for the data. Otherwise, having a separate metadata device will not improve the performance. The bluestore partition has the following reference combinations supported by the ceph-volume utility: A single \"data\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device and a \"metadata\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device and a \"wal\" device. A WAL device can be used for BlueStore\u2019s internal journal or write-ahead log (block.wal), it is only useful to use a WAL device if the device is faster than the primary device (data device). There is no separate \"metadata\" device in this case, the data of main OSD block and block.db located in \"data\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : wal spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce A \"data\" device, a \"metadata\" device and a \"wal\" device. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 storage : storageClassDeviceSets : - name : set1 count : 3 portable : false volumeClaimTemplates : - metadata : name : data spec : resources : requests : storage : 10Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, gp2) storageClassName : gp2 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : metadata spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce - metadata : name : wal spec : resources : requests : # Find the right size https://docs.ceph.com/docs/master/rados/configuration/bluestore-config-ref/#sizing storage : 5Gi # IMPORTANT: Change the storage class depending on your environment (e.g. local-storage, io1) storageClassName : io1 volumeMode : Block accessModes : - ReadWriteOnce To determine the size of the metadata block follow the official Ceph sizing guide . With the present configuration, each OSD will have its main block allocated a 10GB device as well a 5GB device to act as a bluestore database.","title":"Dedicated metadata and wal device for OSD on PVC"},{"location":"CRDs/Cluster/stretch-cluster/","text":"For environments that only have two failure domains available where data can be replicated, consider the case where one failure domain is down and the data is still fully available in the remaining failure domain. To support this scenario, Ceph has integrated support for \"stretch\" clusters. Rook requires three zones. Two zones (A and B) will each run all types of Rook pods, which we call the \"data\" zones. Two mons run in each of the two data zones, while two replicas of the data are in each zone for a total of four data replicas. The third zone (arbiter) runs a single mon. No other Rook or Ceph daemons need to be run in the arbiter zone. For this example, we assume the desired failure domain is a zone. Another failure domain can also be specified with a known topology node label which is already being used for OSD failure domains. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph spec : dataDirHostPath : /var/lib/rook mon : # Five mons must be created for stretch mode count : 5 allowMultiplePerNode : false stretchCluster : failureDomainLabel : topology.kubernetes.io/zone subFailureDomain : host zones : - name : a arbiter : true - name : b - name : c cephVersion : # Stretch cluster is supported in Ceph Pacific or newer. image : quay.io/ceph/ceph:v17.2.6 allowUnsupported : true # Either storageClassDeviceSets or the storage section can be specified for creating OSDs. # This example uses all devices for simplicity. storage : useAllNodes : true useAllDevices : true deviceFilter : \"\" # OSD placement is expected to include the non-arbiter zones placement : osd : nodeAffinity : requiredDuringSchedulingIgnoredDuringExecution : nodeSelectorTerms : - matchExpressions : - key : topology.kubernetes.io/zone operator : In values : - b - c For more details, see the Stretch Cluster design doc .","title":"Stretch Storage Cluster"},{"location":"CRDs/Object-Storage/ceph-object-realm-crd/","text":"Rook allows creation of a realm in a Ceph Object Multisite configuration through a CRD. The following settings are available for Ceph object store realms. Example \u00b6 1 2 3 4 5 6 7 8 9 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : realm-a namespace : rook-ceph # This endpoint in this section needs is an endpoint from the master zone in the master zone group of realm-a. See object-multisite.md for more details. spec : pull : endpoint : http://10.2.105.133:80 Settings \u00b6 Metadata \u00b6 name : The name of the object realm to create namespace : The namespace of the Rook cluster where the object realm is created. Spec \u00b6 pull : This optional section is for the pulling the realm for another ceph cluster. endpoint : The endpoint in the realm from another ceph cluster you want to pull from. This endpoint must be in the master zone of the master zone group of the realm.","title":"CephObjectRealm CRD"},{"location":"CRDs/Object-Storage/ceph-object-realm-crd/#example","text":"1 2 3 4 5 6 7 8 9 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : realm-a namespace : rook-ceph # This endpoint in this section needs is an endpoint from the master zone in the master zone group of realm-a. See object-multisite.md for more details. spec : pull : endpoint : http://10.2.105.133:80","title":"Example"},{"location":"CRDs/Object-Storage/ceph-object-realm-crd/#settings","text":"","title":"Settings"},{"location":"CRDs/Object-Storage/ceph-object-realm-crd/#metadata","text":"name : The name of the object realm to create namespace : The namespace of the Rook cluster where the object realm is created.","title":"Metadata"},{"location":"CRDs/Object-Storage/ceph-object-realm-crd/#spec","text":"pull : This optional section is for the pulling the realm for another ceph cluster. endpoint : The endpoint in the realm from another ceph cluster you want to pull from. This endpoint must be in the master zone of the master zone group of the realm.","title":"Spec"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/","text":"Rook allows creation and customization of object stores through the custom resource definitions (CRDs). The following settings are available for Ceph object stores. Example \u00b6 Erasure Coded \u00b6 Erasure coded pools can only be used with dataPools . The metadataPool must use a replicated pool. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : host erasureCoded : dataChunks : 2 codingChunks : 1 preservePoolsOnDelete : true gateway : # sslCertificateRef: # caBundleRef: port : 80 # securePort: 443 instances : 1 # A key/value list of annotations annotations : # key: value placement : # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: # nodeSelectorTerms: # - matchExpressions: # - key: role # operator: In # values: # - rgw-node # tolerations: # - key: rgw-node # operator: Exists # podAffinity: # podAntiAffinity: # topologySpreadConstraints: resources : # limits: # cpu: \"500m\" # memory: \"1024Mi\" # requests: # cpu: \"500m\" # memory: \"1024Mi\" #zone: #name: zone-a Object Store Settings \u00b6 Metadata \u00b6 name : The name of the object store to create, which will be reflected in the pool and other resource names. namespace : The namespace of the Rook cluster where the object store is created. Pools \u00b6 The pools allow all of the settings defined in the Block Pool CRD spec. For more details, see the Block Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least three devices (2 data + 1 coding chunks) in the cluster. When the zone section is set pools with the object stores name will not be created since the object-store will the using the pools created by the ceph-object-zone. metadataPool : The settings used to create all of the object store metadata pools. Must use replication. dataPool : The settings to create the object store data pool. Can use replication or erasure coding. preservePoolsOnDelete : If it is set to 'true' the pools used to support the object store will remain when the object store will be deleted. This is a security measure to avoid accidental loss of data. It is set to 'false' by default. If not specified is also deemed as 'false'. Gateway Settings \u00b6 The gateway settings correspond to the RGW daemon settings. type : S3 is supported sslCertificateRef : If specified, this is the name of the Kubernetes secret( opaque or tls type) that contains the TLS certificate to be used for secure connections to the object store. If it is an opaque Kubernetes Secret, Rook will look in the secret provided at the cert key name. The value of the cert key must be in the format expected by the RGW service : \"The server key, server certificate, and any other CA or intermediate certificates be supplied in one file. Each of these items must be in PEM form.\" They are scenarios where the certificate DNS is set for a particular domain that does not include the local Kubernetes DNS, namely the object store DNS service endpoint. If adding the service DNS name to the certificate is not empty another key can be specified in the secret's data: insecureSkipVerify: true to skip the certificate verification. It is not recommended to enable this option since TLS is susceptible to machine-in-the-middle attacks unless custom verification is used. caBundleRef : If specified, this is the name of the Kubernetes secret (type opaque ) that contains additional custom ca-bundle to use. The secret must be in the same namespace as the Rook cluster. Rook will look in the secret provided at the cabundle key name. hostNetwork : Whether host networking is enabled for the rgw daemon. If not set, the network settings from the cluster CR will be applied. port : The port on which the Object service will be reachable. If host networking is enabled, the RGW daemons will also listen on that port. If running on SDN, the RGW daemon listening port will be 8080 internally. securePort : The secure port on which RGW pods will be listening. A TLS certificate must be specified either via sslCerticateRef or service.annotations instances : The number of pods that will be started to load balance this object store. externalRgwEndpoints : A list of IP addresses to connect to external existing Rados Gateways (works with external mode). This setting will be ignored if the CephCluster does not have external spec enabled. Refer to the external cluster section for more details. Multiple endpoints can be given, but for stability of ObjectBucketClaims, we highly recommend that users give only a single external RGW endpoint that is a load balancer that sends requests to the multiple RGWs. annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. placement : The Kubernetes placement settings to determine where the RGW pods should be started in the cluster. resources : Set resource requests/limits for the Gateway Pod(s), see Resource Requirements/Limits . priorityClassName : Set priority class name for the Gateway Pod(s) service : The annotations to set on to the Kubernetes Service of RGW. The service serving cert feature supported in Openshift is enabled by the following example: 1 2 3 4 gateway : service : annotations : service.beta.openshift.io/serving-cert-secret-name :  Example of external rgw endpoints to connect to: 1 2 3 4 5 gateway : port : 80 externalRgwEndpoints : - ip : 192.168.39.182 # hostname: example.com This will create a service with the endpoint 192.168.39.182 on port 80 , pointing to the Ceph object external gateway. All the other settings from the gateway section will be ignored, except for securePort . Zone Settings \u00b6 The zone settings allow the object store to join custom created ceph-object-zone . name : the name of the ceph-object-zone the object store will be in. Runtime settings \u00b6 MIME types \u00b6 Rook provides a default mime.types file for each Ceph object store. This file is stored in a Kubernetes ConfigMap with the name rook-ceph-rgw--mime-types . For most users, the default file should suffice, however, the option is available to users to edit the mime.types file in the ConfigMap as they desire. Users may have their own special file types, and particularly security conscious users may wish to pare down the file to reduce the possibility of a file type execution attack. Rook will not overwrite an existing mime.types ConfigMap so that user modifications will not be destroyed. If the object store is destroyed and recreated, the ConfigMap will also be destroyed and created anew. Health settings \u00b6 Rook will be default monitor the state of the object store endpoints. The following CRD settings are available: healthCheck : main object store health monitoring section startupProbe : Disable, or override timing and threshold values of the object gateway startup probe. readinessProbe : Disable, or override timing and threshold values of the object gateway readiness probe. Here is a complete example: 1 2 3 4 5 6 7 healthCheck : startupProbe : disabled : false readinessProbe : disabled : false periodSeconds : 5 failureThreshold : 2 You can monitor the health of a CephObjectStore by monitoring the gateway deployments it creates. The primary deployment created is named rook-ceph-rgw--a where store-name is the name of the CephObjectStore (don't forget the -a at the end). Security settings \u00b6 Ceph RGW supports Server Side Encryption as defined in AWS S3 protocol with three different modes: AWS-SSE:C, AWS-SSE:KMS and AWS-SSE:S3. The last two modes require a Key Management System (KMS) like HashiCorp Vault. Currently, Vault is the only supported KMS backend for CephObjectStore. Refer to the Vault KMS section for details about Vault. If these settings are defined, then RGW will establish a connection between Vault and whenever S3 client sends request with Server Side Encryption. Ceph's Vault documentation has more details. The security section contains settings related to KMS encryption of the RGW. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 security : kms : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : http://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rgw VAULT_SECRET_ENGINE : kv VAULT_BACKEND : v2 # name of the k8s secret containing the kms authentication token tokenSecretName : rgw-vault-kms-token s3 : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : http://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rgw VAULT_SECRET_ENGINE : transit # name of the k8s secret containing the kms authentication token tokenSecretName : rgw-vault-s3-token For RGW, please note the following: VAULT_SECRET_ENGINE : the secret engine which Vault should use. Currently supports kv and transit . AWS-SSE:KMS supports transit engine and kv engine version 2. AWS-SSE:S3 only supports transit engine. The Storage administrator needs to create a secret in the Vault server so that S3 clients use that key for encryption for AWS-SSE:KMS 1 2 vault kv put rook/ key=$(openssl rand -base64 32) # kv engine vault write -f transit/keys/ exportable=true # transit engine TLS authentication with custom certificates between Vault and CephObjectStore RGWs are supported from ceph v16.2.6 onwards tokenSecretName can be (and often will be) the same for both kms and s3 configurations. AWS-SSE:S3 requires Ceph Quincy (v17.2.3) and later. Deleting a CephObjectStore \u00b6 During deletion of a CephObjectStore resource, Rook protects against accidental or premature destruction of user data by blocking deletion if there are any object buckets in the object store being deleted. Buckets may have been created by users or by ObjectBucketClaims. For deletion to be successful, all buckets in the object store must be removed. This may require manual deletion or removal of all ObjectBucketClaims. Alternately, the cephobjectstore.ceph.rook.io finalizer on the CephObjectStore can be removed to remove the Kubernetes Custom Resource, but the Ceph pools which store the data will not be removed in this case. Rook will warn about which buckets are blocking deletion in three ways: An event will be registered on the CephObjectStore resource A status condition will be added to the CephObjectStore resource An error will be added to the Rook Ceph Operator log If the CephObjectStore is configured in a multisite setup the above conditions are applicable only to stores that belong to a single master zone. Otherwise the conditions are ignored. Even if the store is removed the user can access the data from a peer object store.","title":"CephObjectStore CRD"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#example","text":"","title":"Example"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#erasure-coded","text":"Erasure coded pools can only be used with dataPools . The metadataPool must use a replicated pool. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : host erasureCoded : dataChunks : 2 codingChunks : 1 preservePoolsOnDelete : true gateway : # sslCertificateRef: # caBundleRef: port : 80 # securePort: 443 instances : 1 # A key/value list of annotations annotations : # key: value placement : # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: # nodeSelectorTerms: # - matchExpressions: # - key: role # operator: In # values: # - rgw-node # tolerations: # - key: rgw-node # operator: Exists # podAffinity: # podAntiAffinity: # topologySpreadConstraints: resources : # limits: # cpu: \"500m\" # memory: \"1024Mi\" # requests: # cpu: \"500m\" # memory: \"1024Mi\" #zone: #name: zone-a","title":"Erasure Coded"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#object-store-settings","text":"","title":"Object Store Settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#metadata","text":"name : The name of the object store to create, which will be reflected in the pool and other resource names. namespace : The namespace of the Rook cluster where the object store is created.","title":"Metadata"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#pools","text":"The pools allow all of the settings defined in the Block Pool CRD spec. For more details, see the Block Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least three devices (2 data + 1 coding chunks) in the cluster. When the zone section is set pools with the object stores name will not be created since the object-store will the using the pools created by the ceph-object-zone. metadataPool : The settings used to create all of the object store metadata pools. Must use replication. dataPool : The settings to create the object store data pool. Can use replication or erasure coding. preservePoolsOnDelete : If it is set to 'true' the pools used to support the object store will remain when the object store will be deleted. This is a security measure to avoid accidental loss of data. It is set to 'false' by default. If not specified is also deemed as 'false'.","title":"Pools"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#gateway-settings","text":"The gateway settings correspond to the RGW daemon settings. type : S3 is supported sslCertificateRef : If specified, this is the name of the Kubernetes secret( opaque or tls type) that contains the TLS certificate to be used for secure connections to the object store. If it is an opaque Kubernetes Secret, Rook will look in the secret provided at the cert key name. The value of the cert key must be in the format expected by the RGW service : \"The server key, server certificate, and any other CA or intermediate certificates be supplied in one file. Each of these items must be in PEM form.\" They are scenarios where the certificate DNS is set for a particular domain that does not include the local Kubernetes DNS, namely the object store DNS service endpoint. If adding the service DNS name to the certificate is not empty another key can be specified in the secret's data: insecureSkipVerify: true to skip the certificate verification. It is not recommended to enable this option since TLS is susceptible to machine-in-the-middle attacks unless custom verification is used. caBundleRef : If specified, this is the name of the Kubernetes secret (type opaque ) that contains additional custom ca-bundle to use. The secret must be in the same namespace as the Rook cluster. Rook will look in the secret provided at the cabundle key name. hostNetwork : Whether host networking is enabled for the rgw daemon. If not set, the network settings from the cluster CR will be applied. port : The port on which the Object service will be reachable. If host networking is enabled, the RGW daemons will also listen on that port. If running on SDN, the RGW daemon listening port will be 8080 internally. securePort : The secure port on which RGW pods will be listening. A TLS certificate must be specified either via sslCerticateRef or service.annotations instances : The number of pods that will be started to load balance this object store. externalRgwEndpoints : A list of IP addresses to connect to external existing Rados Gateways (works with external mode). This setting will be ignored if the CephCluster does not have external spec enabled. Refer to the external cluster section for more details. Multiple endpoints can be given, but for stability of ObjectBucketClaims, we highly recommend that users give only a single external RGW endpoint that is a load balancer that sends requests to the multiple RGWs. annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. placement : The Kubernetes placement settings to determine where the RGW pods should be started in the cluster. resources : Set resource requests/limits for the Gateway Pod(s), see Resource Requirements/Limits . priorityClassName : Set priority class name for the Gateway Pod(s) service : The annotations to set on to the Kubernetes Service of RGW. The service serving cert feature supported in Openshift is enabled by the following example: 1 2 3 4 gateway : service : annotations : service.beta.openshift.io/serving-cert-secret-name :  Example of external rgw endpoints to connect to: 1 2 3 4 5 gateway : port : 80 externalRgwEndpoints : - ip : 192.168.39.182 # hostname: example.com This will create a service with the endpoint 192.168.39.182 on port 80 , pointing to the Ceph object external gateway. All the other settings from the gateway section will be ignored, except for securePort .","title":"Gateway Settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#zone-settings","text":"The zone settings allow the object store to join custom created ceph-object-zone . name : the name of the ceph-object-zone the object store will be in.","title":"Zone Settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#runtime-settings","text":"","title":"Runtime settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#mime-types","text":"Rook provides a default mime.types file for each Ceph object store. This file is stored in a Kubernetes ConfigMap with the name rook-ceph-rgw--mime-types . For most users, the default file should suffice, however, the option is available to users to edit the mime.types file in the ConfigMap as they desire. Users may have their own special file types, and particularly security conscious users may wish to pare down the file to reduce the possibility of a file type execution attack. Rook will not overwrite an existing mime.types ConfigMap so that user modifications will not be destroyed. If the object store is destroyed and recreated, the ConfigMap will also be destroyed and created anew.","title":"MIME types"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#health-settings","text":"Rook will be default monitor the state of the object store endpoints. The following CRD settings are available: healthCheck : main object store health monitoring section startupProbe : Disable, or override timing and threshold values of the object gateway startup probe. readinessProbe : Disable, or override timing and threshold values of the object gateway readiness probe. Here is a complete example: 1 2 3 4 5 6 7 healthCheck : startupProbe : disabled : false readinessProbe : disabled : false periodSeconds : 5 failureThreshold : 2 You can monitor the health of a CephObjectStore by monitoring the gateway deployments it creates. The primary deployment created is named rook-ceph-rgw--a where store-name is the name of the CephObjectStore (don't forget the -a at the end).","title":"Health settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#security-settings","text":"Ceph RGW supports Server Side Encryption as defined in AWS S3 protocol with three different modes: AWS-SSE:C, AWS-SSE:KMS and AWS-SSE:S3. The last two modes require a Key Management System (KMS) like HashiCorp Vault. Currently, Vault is the only supported KMS backend for CephObjectStore. Refer to the Vault KMS section for details about Vault. If these settings are defined, then RGW will establish a connection between Vault and whenever S3 client sends request with Server Side Encryption. Ceph's Vault documentation has more details. The security section contains settings related to KMS encryption of the RGW. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 security : kms : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : http://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rgw VAULT_SECRET_ENGINE : kv VAULT_BACKEND : v2 # name of the k8s secret containing the kms authentication token tokenSecretName : rgw-vault-kms-token s3 : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : http://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rgw VAULT_SECRET_ENGINE : transit # name of the k8s secret containing the kms authentication token tokenSecretName : rgw-vault-s3-token For RGW, please note the following: VAULT_SECRET_ENGINE : the secret engine which Vault should use. Currently supports kv and transit . AWS-SSE:KMS supports transit engine and kv engine version 2. AWS-SSE:S3 only supports transit engine. The Storage administrator needs to create a secret in the Vault server so that S3 clients use that key for encryption for AWS-SSE:KMS 1 2 vault kv put rook/ key=$(openssl rand -base64 32) # kv engine vault write -f transit/keys/ exportable=true # transit engine TLS authentication with custom certificates between Vault and CephObjectStore RGWs are supported from ceph v16.2.6 onwards tokenSecretName can be (and often will be) the same for both kms and s3 configurations. AWS-SSE:S3 requires Ceph Quincy (v17.2.3) and later.","title":"Security settings"},{"location":"CRDs/Object-Storage/ceph-object-store-crd/#deleting-a-cephobjectstore","text":"During deletion of a CephObjectStore resource, Rook protects against accidental or premature destruction of user data by blocking deletion if there are any object buckets in the object store being deleted. Buckets may have been created by users or by ObjectBucketClaims. For deletion to be successful, all buckets in the object store must be removed. This may require manual deletion or removal of all ObjectBucketClaims. Alternately, the cephobjectstore.ceph.rook.io finalizer on the CephObjectStore can be removed to remove the Kubernetes Custom Resource, but the Ceph pools which store the data will not be removed in this case. Rook will warn about which buckets are blocking deletion in three ways: An event will be registered on the CephObjectStore resource A status condition will be added to the CephObjectStore resource An error will be added to the Rook Ceph Operator log If the CephObjectStore is configured in a multisite setup the above conditions are applicable only to stores that belong to a single master zone. Otherwise the conditions are ignored. Even if the store is removed the user can access the data from a peer object store.","title":"Deleting a CephObjectStore"},{"location":"CRDs/Object-Storage/ceph-object-store-user-crd/","text":"Rook allows creation and customization of object store users through the custom resource definitions (CRDs). The following settings are available for Ceph object store users. Example \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : my-user namespace : rook-ceph spec : store : my-store displayName : my-display-name quotas : maxBuckets : 100 maxSize : 10G maxObjects : 10000 capabilities : user : \"*\" bucket : \"*\" Object Store User Settings \u00b6 Metadata \u00b6 name : The name of the object store user to create, which will be reflected in the secret and other resource names. namespace : The namespace of the Rook cluster where the object store user is created. Spec \u00b6 store : The object store in which the user will be created. This matches the name of the objectstore CRD. displayName : The display name which will be passed to the radosgw-admin user create command. quotas : This represents quota limitation can be set on the user. Please refer here for details. maxBuckets : The maximum bucket limit for the user. maxSize : Maximum size limit of all objects across all the user's buckets. maxObjects : Maximum number of objects across all the user's buckets. capabilities : Ceph allows users to be given additional permissions. Due to missing APIs in go-ceph for updating the user capabilities, this setting can currently only be used during the creation of the object store user. If a user's capabilities need modified, the user must be deleted and re-created. See the Ceph docs for more info. Rook supports adding read , write , read, write , or * permissions for the following resources: users buckets usage metadata zone roles info amz-cache bilog mdlog datalog user-policy odic-provider ratelimit","title":"CephObjectStoreUser CRD"},{"location":"CRDs/Object-Storage/ceph-object-store-user-crd/#example","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : my-user namespace : rook-ceph spec : store : my-store displayName : my-display-name quotas : maxBuckets : 100 maxSize : 10G maxObjects : 10000 capabilities : user : \"*\" bucket : \"*\"","title":"Example"},{"location":"CRDs/Object-Storage/ceph-object-store-user-crd/#object-store-user-settings","text":"","title":"Object Store User Settings"},{"location":"CRDs/Object-Storage/ceph-object-store-user-crd/#metadata","text":"name : The name of the object store user to create, which will be reflected in the secret and other resource names. namespace : The namespace of the Rook cluster where the object store user is created.","title":"Metadata"},{"location":"CRDs/Object-Storage/ceph-object-store-user-crd/#spec","text":"store : The object store in which the user will be created. This matches the name of the objectstore CRD. displayName : The display name which will be passed to the radosgw-admin user create command. quotas : This represents quota limitation can be set on the user. Please refer here for details. maxBuckets : The maximum bucket limit for the user. maxSize : Maximum size limit of all objects across all the user's buckets. maxObjects : Maximum number of objects across all the user's buckets. capabilities : Ceph allows users to be given additional permissions. Due to missing APIs in go-ceph for updating the user capabilities, this setting can currently only be used during the creation of the object store user. If a user's capabilities need modified, the user must be deleted and re-created. See the Ceph docs for more info. Rook supports adding read , write , read, write , or * permissions for the following resources: users buckets usage metadata zone roles info amz-cache bilog mdlog datalog user-policy odic-provider ratelimit","title":"Spec"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/","text":"Rook allows creation of zones in a ceph cluster for a Ceph Object Multisite configuration through a CRD. The following settings are available for Ceph object store zones. Example \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : ceph.rook.io/v1 kind : CephObjectZone metadata : name : zone-a namespace : rook-ceph spec : zoneGroup : zonegroup-a metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : osd erasureCoded : dataChunks : 2 codingChunks : 1 customEndpoints : - \"http://rgw-a.fqdn\" preservePoolsOnDelete : true Settings \u00b6 Metadata \u00b6 name : The name of the object zone to create namespace : The namespace of the Rook cluster where the object zone is created. Pools \u00b6 The pools allow all of the settings defined in the Pool CRD spec. For more details, see the Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least three devices (2 data + 1 coding chunks) in the cluster. Spec \u00b6 zonegroup : The object zonegroup in which the zone will be created. This matches the name of the object zone group CRD. metadataPool : The settings used to create all of the object store metadata pools. Must use replication. dataPool : The settings to create the object store data pool. Can use replication or erasure coding. customEndpoints : Specify the endpoint(s) that will accept multisite replication traffic for this zone. You may include the port in the definition if necessary. For example: \" https://my-object-store.my-domain.net:443 \". By default, Rook will set this to the DNS name of the ClusterIP Service created for the CephObjectStore that corresponds to this zone. Most multisite configurations will not exist within the same Kubernetes cluster, meaning the default value will not be useful. In these cases, you will be required to create your own custom ingress resource for the CephObjectStore in order to make the zone available for replication. You must add the endpoint for your custom ingress resource to this list to allow the store to accept replication traffic. In the case of multiple stores (or multiple endpoints for a single store), you are not required to put all endpoints in this list. Only specify the endpoints that should be used for replication traffic. If you update customEndpoints to return to an empty list, you must the Rook operator to automatically add the CephObjectStore service endpoint to Ceph's internal configuration. preservePoolsOnDelete : If it is set to 'true' the pools used to support the CephObjectZone will remain when it is deleted. This is a security measure to avoid accidental loss of data. It is set to 'true' by default. It is better to check whether data synced with other peer zones before triggering the deletion to avoid accidental loss of data via steps mentioned here When deleting a CephObjectZone, deletion will be blocked until all CephObjectStores belonging to the zone are removed.","title":"CephObjectZone CRD"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/#example","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : ceph.rook.io/v1 kind : CephObjectZone metadata : name : zone-a namespace : rook-ceph spec : zoneGroup : zonegroup-a metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : osd erasureCoded : dataChunks : 2 codingChunks : 1 customEndpoints : - \"http://rgw-a.fqdn\" preservePoolsOnDelete : true","title":"Example"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/#settings","text":"","title":"Settings"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/#metadata","text":"name : The name of the object zone to create namespace : The namespace of the Rook cluster where the object zone is created.","title":"Metadata"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/#pools","text":"The pools allow all of the settings defined in the Pool CRD spec. For more details, see the Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least three devices (2 data + 1 coding chunks) in the cluster.","title":"Pools"},{"location":"CRDs/Object-Storage/ceph-object-zone-crd/#spec","text":"zonegroup : The object zonegroup in which the zone will be created. This matches the name of the object zone group CRD. metadataPool : The settings used to create all of the object store metadata pools. Must use replication. dataPool : The settings to create the object store data pool. Can use replication or erasure coding. customEndpoints : Specify the endpoint(s) that will accept multisite replication traffic for this zone. You may include the port in the definition if necessary. For example: \" https://my-object-store.my-domain.net:443 \". By default, Rook will set this to the DNS name of the ClusterIP Service created for the CephObjectStore that corresponds to this zone. Most multisite configurations will not exist within the same Kubernetes cluster, meaning the default value will not be useful. In these cases, you will be required to create your own custom ingress resource for the CephObjectStore in order to make the zone available for replication. You must add the endpoint for your custom ingress resource to this list to allow the store to accept replication traffic. In the case of multiple stores (or multiple endpoints for a single store), you are not required to put all endpoints in this list. Only specify the endpoints that should be used for replication traffic. If you update customEndpoints to return to an empty list, you must the Rook operator to automatically add the CephObjectStore service endpoint to Ceph's internal configuration. preservePoolsOnDelete : If it is set to 'true' the pools used to support the CephObjectZone will remain when it is deleted. This is a security measure to avoid accidental loss of data. It is set to 'true' by default. It is better to check whether data synced with other peer zones before triggering the deletion to avoid accidental loss of data via steps mentioned here When deleting a CephObjectZone, deletion will be blocked until all CephObjectStores belonging to the zone are removed.","title":"Spec"},{"location":"CRDs/Object-Storage/ceph-object-zonegroup-crd/","text":"Rook allows creation of zone groups in a Ceph Object Multisite configuration through a CRD. The following settings are available for Ceph object store zone groups. Example \u00b6 1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephObjectZoneGroup metadata : name : zonegroup-a namespace : rook-ceph spec : realm : realm-a Settings \u00b6 Metadata \u00b6 name : The name of the object zone group to create namespace : The namespace of the Rook cluster where the object zone group is created. Spec \u00b6 realm : The object realm in which the zone group will be created. This matches the name of the object realm CRD.","title":"CephObjectZoneGroup CRD"},{"location":"CRDs/Object-Storage/ceph-object-zonegroup-crd/#example","text":"1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephObjectZoneGroup metadata : name : zonegroup-a namespace : rook-ceph spec : realm : realm-a","title":"Example"},{"location":"CRDs/Object-Storage/ceph-object-zonegroup-crd/#settings","text":"","title":"Settings"},{"location":"CRDs/Object-Storage/ceph-object-zonegroup-crd/#metadata","text":"name : The name of the object zone group to create namespace : The namespace of the Rook cluster where the object zone group is created.","title":"Metadata"},{"location":"CRDs/Object-Storage/ceph-object-zonegroup-crd/#spec","text":"realm : The object realm in which the zone group will be created. This matches the name of the object realm CRD.","title":"Spec"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/","text":"Rook allows creation and customization of shared filesystems through the custom resource definitions (CRDs). The following settings are available for Ceph filesystems. Examples \u00b6 Replicated \u00b6 Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because both of the defined pools set the failureDomain to host and the replicated.size to 3 . The failureDomain can also be set to another location type (e.g. rack ), if it has been added as a location in the Storage Selection Settings . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPools : - name : replicated failureDomain : host replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true # A key/value list of annotations annotations : # key: value placement : # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: # nodeSelectorTerms: # - matchExpressions: # - key: role # operator: In # values: # - mds-node # tolerations: # - key: mds-node # operator: Exists # podAffinity: # podAntiAffinity: # topologySpreadConstraints: resources : # limits: # cpu: \"500m\" # memory: \"1024Mi\" # requests: # cpu: \"500m\" # memory: \"1024Mi\" (These definitions can also be found in the filesystem.yaml file) Erasure Coded \u00b6 Erasure coded pools require the OSDs to use bluestore for the configured storeType . Additionally, erasure coded pools can only be used with dataPools . The metadataPool must use a replicated pool. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain will be set to host by default, and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs-ec namespace : rook-ceph spec : metadataPool : replicated : size : 3 dataPools : - name : default replicated : size : 3 - name : erasurecoded erasureCoded : dataChunks : 2 codingChunks : 1 metadataServer : activeCount : 1 activeStandby : true IMPORTANT : For erasure coded pools, we have to create a replicated pool as the default data pool and an erasure-coded pool as a secondary pool. (These definitions can also be found in the filesystem-ec.yaml file. Also see an example in the storageclass-ec.yaml for how to configure the volume.) Filesystem Settings \u00b6 Metadata \u00b6 name : The name of the filesystem to create, which will be reflected in the pool and other resource names. namespace : The namespace of the Rook cluster where the filesystem is created. Pools \u00b6 The pools allow all of the settings defined in the Pool CRD spec. For more details, see the Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least eight devices (6 data + 2 coding chunks) in the cluster. metadataPool : The settings used to create the filesystem metadata pool. Must use replication. dataPools : The settings to create the filesystem data pools. Optionally (and we highly recommend), a pool name can be specified with the name field to override the default generated name; see more below. If multiple pools are specified, Rook will add the pools to the filesystem. Assigning users or files to a pool is left as an exercise for the reader with the CephFS documentation . The data pools can use replication or erasure coding. If erasure coding pools are specified, the cluster must be running with bluestore enabled on the OSDs. name : (optional, and highly recommended) Override the default generated name of the pool. The final pool name will consist of the filesystem name and pool name, e.g., - . We highly recommend to specify name to prevent issues that can arise from modifying the spec in a way that causes Rook to lose the original pool ordering. preserveFilesystemOnDelete : If it is set to 'true' the filesystem will remain when the CephFilesystem resource is deleted. This is a security measure to avoid loss of data if the CephFilesystem resource is deleted accidentally. The default value is 'false'. This option replaces preservePoolsOnDelete which should no longer be set. (deprecated) preservePoolsOnDelete : This option is replaced by the above preserveFilesystemOnDelete . For backwards compatibility and upgradeability, if this is set to 'true', Rook will treat preserveFilesystemOnDelete as being set to 'true'. Metadata Server Settings \u00b6 The metadata server settings correspond to the MDS daemon settings. activeCount : The number of active MDS instances. As load increases, CephFS will automatically partition the filesystem across the MDS instances. Rook will create double the number of MDS instances as requested by the active count. The extra instances will be in standby mode for failover. activeStandby : If true, the extra MDS instances will be in active standby mode and will keep a warm cache of the filesystem metadata for faster failover. The instances will be assigned by CephFS in failover pairs. If false, the extra MDS instances will all be on passive standby mode and will not maintain a warm cache of the metadata. mirroring : Sets up mirroring of the filesystem enabled : whether mirroring is enabled on that filesystem (default: false) peers : to configure mirroring peers secretNames : a list of peers to connect to. Currently (Ceph Pacific release) only a single peer is supported where a peer represents a Ceph cluster. snapshotSchedules : schedule(s) snapshot.One or more schedules are supported. path : filesystem source path to take the snapshot on interval : frequency of the snapshots. The interval can be specified in days, hours, or minutes using d, h, m suffix respectively. startTime : optional, determines at what time the snapshot process starts, specified using the ISO 8601 time format. snapshotRetention : allow to manage retention policies: path : filesystem source path to apply the retention on duration : annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. placement : The mds pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD . resources : Set resource requests/limits for the Filesystem MDS Pod(s), see MDS Resources Configuration Settings priorityClassName : Set priority class name for the Filesystem MDS Pod(s) startupProbe : Disable, or override timing and threshold values of the Filesystem MDS startup probe livenessProbe : Disable, or override timing and threshold values of the Filesystem MDS livenessProbe. MDS Resources Configuration Settings \u00b6 The format of the resource requests/limits structure is the same as described in the Ceph Cluster CRD documentation . If the memory resource limit is declared Rook will automatically set the MDS configuration mds_cache_memory_limit . The configuration value is calculated with the aim that the actual MDS memory consumption remains consistent with the MDS pods' resource declaration. In order to provide the best possible experience running Ceph in containers, Rook internally recommends the memory for MDS daemons to be at least 4096MB. If a user configures a limit or request value that is too low, Rook will still run the pod(s) and print a warning to the operator log.","title":"CephFilesystem CRD"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#examples","text":"","title":"Examples"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#replicated","text":"Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because both of the defined pools set the failureDomain to host and the replicated.size to 3 . The failureDomain can also be set to another location type (e.g. rack ), if it has been added as a location in the Storage Selection Settings . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPools : - name : replicated failureDomain : host replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true # A key/value list of annotations annotations : # key: value placement : # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: # nodeSelectorTerms: # - matchExpressions: # - key: role # operator: In # values: # - mds-node # tolerations: # - key: mds-node # operator: Exists # podAffinity: # podAntiAffinity: # topologySpreadConstraints: resources : # limits: # cpu: \"500m\" # memory: \"1024Mi\" # requests: # cpu: \"500m\" # memory: \"1024Mi\" (These definitions can also be found in the filesystem.yaml file)","title":"Replicated"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#erasure-coded","text":"Erasure coded pools require the OSDs to use bluestore for the configured storeType . Additionally, erasure coded pools can only be used with dataPools . The metadataPool must use a replicated pool. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain will be set to host by default, and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs-ec namespace : rook-ceph spec : metadataPool : replicated : size : 3 dataPools : - name : default replicated : size : 3 - name : erasurecoded erasureCoded : dataChunks : 2 codingChunks : 1 metadataServer : activeCount : 1 activeStandby : true IMPORTANT : For erasure coded pools, we have to create a replicated pool as the default data pool and an erasure-coded pool as a secondary pool. (These definitions can also be found in the filesystem-ec.yaml file. Also see an example in the storageclass-ec.yaml for how to configure the volume.)","title":"Erasure Coded"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#filesystem-settings","text":"","title":"Filesystem Settings"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#metadata","text":"name : The name of the filesystem to create, which will be reflected in the pool and other resource names. namespace : The namespace of the Rook cluster where the filesystem is created.","title":"Metadata"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#pools","text":"The pools allow all of the settings defined in the Pool CRD spec. For more details, see the Pool CRD settings. In the example above, there must be at least three hosts (size 3) and at least eight devices (6 data + 2 coding chunks) in the cluster. metadataPool : The settings used to create the filesystem metadata pool. Must use replication. dataPools : The settings to create the filesystem data pools. Optionally (and we highly recommend), a pool name can be specified with the name field to override the default generated name; see more below. If multiple pools are specified, Rook will add the pools to the filesystem. Assigning users or files to a pool is left as an exercise for the reader with the CephFS documentation . The data pools can use replication or erasure coding. If erasure coding pools are specified, the cluster must be running with bluestore enabled on the OSDs. name : (optional, and highly recommended) Override the default generated name of the pool. The final pool name will consist of the filesystem name and pool name, e.g., - . We highly recommend to specify name to prevent issues that can arise from modifying the spec in a way that causes Rook to lose the original pool ordering. preserveFilesystemOnDelete : If it is set to 'true' the filesystem will remain when the CephFilesystem resource is deleted. This is a security measure to avoid loss of data if the CephFilesystem resource is deleted accidentally. The default value is 'false'. This option replaces preservePoolsOnDelete which should no longer be set. (deprecated) preservePoolsOnDelete : This option is replaced by the above preserveFilesystemOnDelete . For backwards compatibility and upgradeability, if this is set to 'true', Rook will treat preserveFilesystemOnDelete as being set to 'true'.","title":"Pools"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#metadata-server-settings","text":"The metadata server settings correspond to the MDS daemon settings. activeCount : The number of active MDS instances. As load increases, CephFS will automatically partition the filesystem across the MDS instances. Rook will create double the number of MDS instances as requested by the active count. The extra instances will be in standby mode for failover. activeStandby : If true, the extra MDS instances will be in active standby mode and will keep a warm cache of the filesystem metadata for faster failover. The instances will be assigned by CephFS in failover pairs. If false, the extra MDS instances will all be on passive standby mode and will not maintain a warm cache of the metadata. mirroring : Sets up mirroring of the filesystem enabled : whether mirroring is enabled on that filesystem (default: false) peers : to configure mirroring peers secretNames : a list of peers to connect to. Currently (Ceph Pacific release) only a single peer is supported where a peer represents a Ceph cluster. snapshotSchedules : schedule(s) snapshot.One or more schedules are supported. path : filesystem source path to take the snapshot on interval : frequency of the snapshots. The interval can be specified in days, hours, or minutes using d, h, m suffix respectively. startTime : optional, determines at what time the snapshot process starts, specified using the ISO 8601 time format. snapshotRetention : allow to manage retention policies: path : filesystem source path to apply the retention on duration : annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. placement : The mds pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD . resources : Set resource requests/limits for the Filesystem MDS Pod(s), see MDS Resources Configuration Settings priorityClassName : Set priority class name for the Filesystem MDS Pod(s) startupProbe : Disable, or override timing and threshold values of the Filesystem MDS startup probe livenessProbe : Disable, or override timing and threshold values of the Filesystem MDS livenessProbe.","title":"Metadata Server Settings"},{"location":"CRDs/Shared-Filesystem/ceph-filesystem-crd/#mds-resources-configuration-settings","text":"The format of the resource requests/limits structure is the same as described in the Ceph Cluster CRD documentation . If the memory resource limit is declared Rook will automatically set the MDS configuration mds_cache_memory_limit . The configuration value is calculated with the aim that the actual MDS memory consumption remains consistent with the MDS pods' resource declaration. In order to provide the best possible experience running Ceph in containers, Rook internally recommends the memory for MDS daemons to be at least 4096MB. If a user configures a limit or request value that is too low, Rook will still run the pod(s) and print a warning to the operator log.","title":"MDS Resources Configuration Settings"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/","text":"This guide assumes you have created a Rook cluster as explained in the main Quickstart guide Rook allows creation and updating the fs-mirror daemon through the custom resource definitions (CRDs). CephFS will support asynchronous replication of snapshots to a remote (different Ceph cluster) CephFS file system via cephfs-mirror tool. Snapshots are synchronized by mirroring snapshot data followed by creating a snapshot with the same name (for a given directory on the remote file system) as the snapshot being synchronized. For more information about user management and capabilities see the Ceph docs . Creating daemon \u00b6 To get you started, here is a simple example of a CRD to deploy an cephfs-mirror daemon. 1 2 3 4 5 6 apiVersion : ceph.rook.io/v1 kind : CephFilesystemMirror metadata : name : my-fs-mirror namespace : rook-ceph spec : {} Settings \u00b6 If any setting is unspecified, a suitable default will be used automatically. FilesystemMirror metadata \u00b6 name : The name that will be used for the Ceph cephfs-mirror daemon. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace. FilesystemMirror Settings \u00b6 placement : The cephfs-mirror pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD . annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. resources : The resource requirements for the cephfs-mirror pods. priorityClassName : The priority class to set on the cephfs-mirror pods. Configuring mirroring peers \u00b6 In order to configure mirroring peers, please refer to the CephFilesystem documentation .","title":"CephFilesystemMirror CRD"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/#creating-daemon","text":"To get you started, here is a simple example of a CRD to deploy an cephfs-mirror daemon. 1 2 3 4 5 6 apiVersion : ceph.rook.io/v1 kind : CephFilesystemMirror metadata : name : my-fs-mirror namespace : rook-ceph spec : {}","title":"Creating daemon"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/#settings","text":"If any setting is unspecified, a suitable default will be used automatically.","title":"Settings"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/#filesystemmirror-metadata","text":"name : The name that will be used for the Ceph cephfs-mirror daemon. namespace : The Kubernetes namespace that will be created for the Rook cluster. The services, pods, and other resources created by the operator will be added to this namespace.","title":"FilesystemMirror metadata"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/#filesystemmirror-settings","text":"placement : The cephfs-mirror pods can be given standard Kubernetes placement restrictions with nodeAffinity , tolerations , podAffinity , and podAntiAffinity similar to placement defined for daemons configured by the cluster CRD . annotations : Key value pair list of annotations to add. labels : Key value pair list of labels to add. resources : The resource requirements for the cephfs-mirror pods. priorityClassName : The priority class to set on the cephfs-mirror pods.","title":"FilesystemMirror Settings"},{"location":"CRDs/Shared-Filesystem/ceph-fs-mirror-crd/#configuring-mirroring-peers","text":"In order to configure mirroring peers, please refer to the CephFilesystem documentation .","title":"Configuring mirroring peers"},{"location":"CRDs/Shared-Filesystem/ceph-fs-subvolumegroup-crd/","text":"Info This guide assumes you have created a Rook cluster as explained in the main Quickstart guide Rook allows creation of Ceph Filesystem SubVolumeGroups through the custom resource definitions (CRDs). Filesystem subvolume groups are an abstraction for a directory level higher than Filesystem subvolumes to effect policies (e.g., File layouts) across a set of subvolumes. For more information about CephFS volume, subvolumegroup and subvolume refer to the Ceph docs . Creating daemon \u00b6 To get you started, here is a simple example of a CRD to create a subvolumegroup on the CephFilesystem \"myfs\". 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephFilesystemSubVolumeGroup metadata : name : group-a namespace : rook-ceph # namespace:cluster spec : # filesystemName is the metadata name of the CephFilesystem CR where the subvolume group will be created filesystemName : myfs Settings \u00b6 If any setting is unspecified, a suitable default will be used automatically. CephFilesystemSubVolumeGroup metadata \u00b6 name : The name that will be used for the Ceph Filesystem subvolume group. CephFilesystemSubVolumeGroup spec \u00b6 filesystemName : The metadata name of the CephFilesystem CR where the subvolume group will be created.","title":"FilesystemSubVolumeGroup CRD"},{"location":"CRDs/Shared-Filesystem/ceph-fs-subvolumegroup-crd/#creating-daemon","text":"To get you started, here is a simple example of a CRD to create a subvolumegroup on the CephFilesystem \"myfs\". 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephFilesystemSubVolumeGroup metadata : name : group-a namespace : rook-ceph # namespace:cluster spec : # filesystemName is the metadata name of the CephFilesystem CR where the subvolume group will be created filesystemName : myfs","title":"Creating daemon"},{"location":"CRDs/Shared-Filesystem/ceph-fs-subvolumegroup-crd/#settings","text":"If any setting is unspecified, a suitable default will be used automatically.","title":"Settings"},{"location":"CRDs/Shared-Filesystem/ceph-fs-subvolumegroup-crd/#cephfilesystemsubvolumegroup-metadata","text":"name : The name that will be used for the Ceph Filesystem subvolume group.","title":"CephFilesystemSubVolumeGroup metadata"},{"location":"CRDs/Shared-Filesystem/ceph-fs-subvolumegroup-crd/#cephfilesystemsubvolumegroup-spec","text":"filesystemName : The metadata name of the CephFilesystem CR where the subvolume group will be created.","title":"CephFilesystemSubVolumeGroup spec"},{"location":"Contributing/ci-configuration/","text":"This page contains information regarding the CI configuration used for the Rook project to test, build and release the project. Secrets \u00b6 Snyk (Security Scan): SNYK_TOKEN - API Token for the snyk security scanner (workflow file: synk.yaml ). Testing: IBM_INSTANCE_ID : Used for KMS (Key Management System) IBM Key Protect access (see .github/workflows/encryption-pvc-kms-ibm-kp/action.yml ). IBM_SERVICE_API_KEY : Used for KMS (Key Management System) IBM Key Protect access (see .github/workflows/encryption-pvc-kms-ibm-kp/action.yml ). Publishing: DOCKER_USERNAME + DOCKER_PASSWORD : Username and password of registry. DOCKER_REGISTRY : Target registry namespace (e.g., rook ) AWS_USR + AWS_PSW : AWS credentials with access to S3 for Helm chart publishing. GIT_API_TOKEN : GitHub access token, used to push docs changes to the docs repositories gh-pages branch.","title":"CI Configuration"},{"location":"Contributing/ci-configuration/#secrets","text":"Snyk (Security Scan): SNYK_TOKEN - API Token for the snyk security scanner (workflow file: synk.yaml ). Testing: IBM_INSTANCE_ID : Used for KMS (Key Management System) IBM Key Protect access (see .github/workflows/encryption-pvc-kms-ibm-kp/action.yml ). IBM_SERVICE_API_KEY : Used for KMS (Key Management System) IBM Key Protect access (see .github/workflows/encryption-pvc-kms-ibm-kp/action.yml ). Publishing: DOCKER_USERNAME + DOCKER_PASSWORD : Username and password of registry. DOCKER_REGISTRY : Target registry namespace (e.g., rook ) AWS_USR + AWS_PSW : AWS credentials with access to S3 for Helm chart publishing. GIT_API_TOKEN : GitHub access token, used to push docs changes to the docs repositories gh-pages branch.","title":"Secrets"},{"location":"Contributing/development-environment/","text":"Install Kubernetes \u00b6 You can choose any Kubernetes install of your choice. The test framework only depends on kubectl being configured. To install kubectl , please see the official guide . Minikube \u00b6 The developers of Rook are working on Minikube and thus it is the recommended way to quickly get Rook up and running. Minikube should not be used for production but the Rook authors consider it great for development. While other tools such as k3d/kind are great, users have faced issues deploying Rook. Always use a virtual machine when testing Rook. Never use your host system where local devices may mistakenly be consumed. To install Minikube follow the official guide . It is recommended to use the kvm2 driver when running on a Linux machine and the hyperkit driver when running on a MacOS. Both allow to create and attach additional disks to the virtual machine. This is required for the Ceph OSD to consume one drive. We don't recommend any other drivers for Rook. You will need a Minikube version 1.23 or higher. Starting the cluster on Minikube is as simple as running: 1 2 3 4 5 6 7 8 # On Linux minikube start --disk-size=40g --extra-disks=1 --driver kvm2 # On MacOS with Intel processor minikube start --disk-size=40g --extra-disks=1 --driver hyperkit # On MacOS with Apple silicon minikube start --disk-size=40g --extra-disks 1 --driver qemu It is recommended to install a Docker client on your host system too. Depending on your operating system follow the official guide . Stopping the cluster and destroying the Minikube virtual machine can be done with: 1 minikube delete Install Helm \u00b6 Use helm.sh to install Helm and set up Rook charts defined under _output/charts (generated by build): To install and set up Helm charts for Rook run tests/scripts/helm.sh up . To clean up tests/scripts/helm.sh clean . Note These helper scripts depend on some artifacts under the _output/ directory generated during build time. These scripts should be run from the project root. Note If Helm is not available in your PATH , Helm will be downloaded to a temporary directory ( /tmp/rook-tests-scripts-helm ) and used from that directory.","title":"Developer Environment"},{"location":"Contributing/development-environment/#install-kubernetes","text":"You can choose any Kubernetes install of your choice. The test framework only depends on kubectl being configured. To install kubectl , please see the official guide .","title":"Install Kubernetes"},{"location":"Contributing/development-environment/#minikube","text":"The developers of Rook are working on Minikube and thus it is the recommended way to quickly get Rook up and running. Minikube should not be used for production but the Rook authors consider it great for development. While other tools such as k3d/kind are great, users have faced issues deploying Rook. Always use a virtual machine when testing Rook. Never use your host system where local devices may mistakenly be consumed. To install Minikube follow the official guide . It is recommended to use the kvm2 driver when running on a Linux machine and the hyperkit driver when running on a MacOS. Both allow to create and attach additional disks to the virtual machine. This is required for the Ceph OSD to consume one drive. We don't recommend any other drivers for Rook. You will need a Minikube version 1.23 or higher. Starting the cluster on Minikube is as simple as running: 1 2 3 4 5 6 7 8 # On Linux minikube start --disk-size=40g --extra-disks=1 --driver kvm2 # On MacOS with Intel processor minikube start --disk-size=40g --extra-disks=1 --driver hyperkit # On MacOS with Apple silicon minikube start --disk-size=40g --extra-disks 1 --driver qemu It is recommended to install a Docker client on your host system too. Depending on your operating system follow the official guide . Stopping the cluster and destroying the Minikube virtual machine can be done with: 1 minikube delete","title":"Minikube"},{"location":"Contributing/development-environment/#install-helm","text":"Use helm.sh to install Helm and set up Rook charts defined under _output/charts (generated by build): To install and set up Helm charts for Rook run tests/scripts/helm.sh up . To clean up tests/scripts/helm.sh clean . Note These helper scripts depend on some artifacts under the _output/ directory generated during build time. These scripts should be run from the project root. Note If Helm is not available in your PATH , Helm will be downloaded to a temporary directory ( /tmp/rook-tests-scripts-helm ) and used from that directory.","title":"Install Helm"},{"location":"Contributing/development-flow/","text":"Thank you for your time and effort to help us improve Rook! Here are a few steps to get started. If you have any questions, don't hesitate to reach out to us on our Slack dev channel. Prerequisites \u00b6 GO 1.20 or greater installed Git client installed GitHub account Initial Setup \u00b6 Create a Fork \u00b6 Navigate to http://github.com/rook/rook and click the \"Fork\" button. Clone Your Fork \u00b6 In a console window: 1 2 3 4 5 6 7 8 # Create the rook repo path mkdir -p $GOPATH/src/github.com/rook # Navigate to the local repo path cd $GOPATH/src/github.com/rook # Clone your fork, where  is your GitHub account name git clone https://github.com//rook.git Add Upstream Remote \u00b6 Add the upstream remote to your local git: 1 2 3 4 5 6 # Add 'upstream' to the list of remotes cd rook git remote add upstream https://github.com/rook/rook.git # Verify the remote was added git remote -v Two remotes should be available: origin and upstream . Build \u00b6 Before building the project, fetch the remotes to synchronize tags. 1 2 3 # Fetch all remotes git fetch -a make build Tip If in a Linux environment and make build command throws an error like unknown revision for some imports, add export GOPROXY=https://proxy.golang.org,direct to ~/.bashrc . Reload your environment and confirm with go env that GOPROXY is set. Hint Make will automatically pick up podman if docker packages are not available on your machine. Development Settings \u00b6 For consistent whitespace and other formatting in .go and other source files, apply the following settings in your IDE: Format with the goreturns tool Trim trailing whitespace Markdown Table of Contents is correctly updated automatically VS Code \u00b6 Tip VS Code will prompt you automatically with some recommended extensions to install, such as Markdown, Go, YAML validator, and ShellCheck. VS Code will automatically use the recommended settings in the .vscode/settings.json file. Self assign Issue \u00b6 To self-assign an issue that is not yet assigned to anyone else, add a comment in the issue with /assign in the body. Layout \u00b6 The overall source code layout is summarized: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 rook \u251c\u2500\u2500 build # build makefiles and logic to build, publish and release all Rook artifacts \u251c\u2500\u2500 cluster \u2502 \u251c\u2500\u2500 charts # Helm charts \u2502 \u2502 \u2514\u2500\u2500 rook-ceph \u2502 \u2502 \u2514\u2500\u2500 rook-ceph-cluster \u2502 \u2514\u2500\u2500 examples # Sample manifestes to configure the cluster \u2502 \u251c\u2500\u2500 cmd \u2502 \u251c\u2500\u2500 rook # Main command entry points for operators and daemons \u2502 \u251c\u2500\u2500 design # Design documents \u251c\u2500\u2500 Documentation # Documentation that is published to rook.io \u251c\u2500\u2500 images # Rook docker image sources \u2502 \u251c\u2500\u2500 pkg \u2502 \u251c\u2500\u2500 apis \u2502 \u2502 \u251c\u2500\u2500 ceph.rook.io # ceph specs used in the CRDs \u2502 \u2502 \u2502 \u251c\u2500\u2500 v1 \u2502 \u251c\u2500\u2500 client # auto-generated strongly typed client code to access Rook APIs \u2502 \u251c\u2500\u2500 clusterd \u2502 \u251c\u2500\u2500 daemon # daemons for configuring ceph \u2502 \u2502 \u251c\u2500\u2500 ceph \u2502 \u2502 \u2514\u2500\u2500 discover \u2502 \u251c\u2500\u2500 operator # all reconcile logic and custom controllers \u2502 \u2502 \u251c\u2500\u2500 ceph \u2502 \u2502 \u251c\u2500\u2500 discover \u2502 \u2502 \u251c\u2500\u2500 k8sutil \u2502 \u251c\u2500\u2500 util \u2502 \u2514\u2500\u2500 version \u2514\u2500\u2500 tests \u251c\u2500\u2500 framework # integration test framework \u2502 \u251c\u2500\u2500 clients \u2502 \u251c\u2500\u2500 installer \u2502 \u2514\u2500\u2500 utils \u251c\u2500\u2500 integration # integration test cases that will be invoked during golang testing \u2514\u2500\u2500 scripts # scripts for setting up integration and manual testing environments Development \u00b6 To submit a change, create a branch in your fork and then submit a pull request (PR) from the branch. Design Document \u00b6 For new features of significant scope and complexity, a design document is recommended before work begins on the implementation. Create a design document if: Adding a new CRD Adding a significant feature. For smaller, straightforward features and bug fixes, there is no need for a design document. Authoring a design document has many advantages: Forces the author to think critically about the feature and identify potential issues early in the design Obtain agreement amongst the community before code is written to avoid wasted effort in the wrong direction Newcomers may more quickly understand the feature Note Writing code to prototype the feature while working on the design may be very useful to help flesh out the approach. A design document should be written as a markdown file in the design folder . Follow the process outlined in the design template . There are many examples of previous design documents in that folder. Submit a pull request for the design to be discussed and approved by the community, just like any other change to the repository. Create a Branch \u00b6 From a console, create a new branch based on your fork where changes will be developed: 1 2 3 4 5 6 # Update the remotes git fetch --all # Create a new branch that is based off upstream master. Give it a simple, but descriptive name. # Generally it will be two to three words separated by dashes and without numbers. git checkout -b feature-name upstream/master Updating Your Fork \u00b6 During the development lifecycle, keep your branch(es) updated with the latest upstream master. As others on the team push changes, rebase your commits on top of the latest. This avoids unnecessary merge commits and keeps the commit history clean. Whenever an update is needed to the local repository, never perform a merge, always rebase. This will avoid merge commits in the git history. If there are any modified files, first stash them with git stash . 1 2 git fetch --all git rebase upstream/master Rebasing is a very powerful feature of Git. You need to understand how it works to avoid risking losing your work. Read about it in the Git documentation . Briefly, rebasing does the following: \"Unwinds\" the local commits. The local commits are removed temporarily from the history. The latest changes from upstream are added to the history The local commits are re-applied one by one If there are merge conflicts, there will be a prompt to fix them before continuing. Read the output closely. It will instruct how to complete the rebase. When rebasing is completed, all of the commits are restored in the history. Submitting a Pull Request \u00b6 After a feature or bug fix is completed in your branch, open a Pull Request (PR) to the upstream Rook repository . Before opening the PR: If there are code changes, add unit tests and verify that all unit tests are passing. See Unit Tests below on running unit tests. Rebase on the latest upstream changes Regression Testing \u00b6 All pull requests must pass all continuous integration (CI) tests before they can be merged. These tests automatically run against every pull request. The results of these tests along with code review feedback determine whether your request will be merged. Unit Tests \u00b6 From the root of your local Rook repo execute the following to run all of the unit tests: 1 make test Unit tests for individual packages can be run with the standard go test command. To see code coverage on the packages that you changed, view the coverage.html in a browser to inspect your new code. 1 2 go test -coverprofile=coverage.out go tool cover -html=coverage.out -o coverage.html Writing unit tests \u00b6 Good unit tests start with easily testable code. Small chunks (\"units\") of code can be easily tested for every possible input. Higher-level code units that are built from smaller, already-tested units can more easily verify that the units are combined together correctly. Common cases that may need tests: the feature is enabled the feature is disabled the feature is only partially enabled, for every possible way it can be partially enabled every error that can be encountered during execution of the feature the feature can be disabled (including partially) after it was enabled the feature can be modified (including partially) after it was enabled if there is a slice/array involved, test length = 0, length = 1, length = 3, length == max, length > max an input is not specified, for each input an input is specified incorrectly, for each input a resource the code relies on doesn't exist, for each dependency Integration Tests \u00b6 Rook's upstream continuous integration (CI) tests will run integration tests against your changes automatically. Integration tests will be run in Github actions. If an integration test fails, a tmate session will be available for troubleshooting for a short time. See the action details for an ssh connection to the Github runner. Commit structure \u00b6 Rook maintainers value clear, lengthy and explanatory commit messages. Requirements for commits: A commit prefix from the list of known prefixes At least one paragraph that explains the original issue and the changes in the commit The Signed-off-by tag is at the end of the commit message, achieved by committing with git commit -s An example acceptable commit message: 1 2 3 4 5 6 component: commit title This is the commit message. Here I'm explaining what the bug was along with its root cause. Then I'm explaining how I fixed it. Signed-off-by: FirstName LastName  Commit History \u00b6 To prepare your branch to open a PR, the minimal number of logical commits is preferred to maintain a clean commit history. Most commonly a PR will include a single commit where all changes are squashed, although sometimes there will be multiple logical commits. 1 2 # Inspect your commit history to determine if you need to squash commits git log To squash multiple commits or make other changes to the commit history, use git rebase : 1 2 3 # # In this example, the last 5 commits will be opened in the git rebase tool. git rebase -i HEAD~5 Once your commit history is clean, ensure the branch is rebased on the latest upstream before opening the PR. Submitting \u00b6 Go to the Rook github to open the PR. If you have pushed recently to a branch, you will see an obvious link to open the PR. If you have not pushed recently, go to the Pull Request tab and select your fork and branch for the PR. After the PR is open, make changes simply by pushing new commits. The PR will track the changes in your fork and rerun the CI automatically. Always open a pull request against master. Never open a pull request against a released branch (e.g. release-1.2) unless working directly with a maintainer. Backporting to a Release Branch \u00b6 The flow for getting a fix into a release branch is: Open a PR to merge changes to master following the process outlined above Add the backport label to that PR such as backport-release-1.11 After the PR is merged to master, the mergify bot will automatically open a PR with the commits backported to the release branch After the CI is green and a maintainer has approved the PR, the bot will automatically merge the backport PR Debugging issues in Ceph manager modules \u00b6 The Ceph manager modules are written in Python and can be individually and dynamically loaded from the manager. We can take advantage of this feature in order to test changes and to debug issues in the modules. This is just a hack to debug any modification in the manager modules. The dashboard and the rook orchestrator modules are the two modules most commonly have modifications that need to be tested. Make modifications directly in the manager module and reload: Update the cluster so only a single mgr pod is running. Set the mgr.count: 1 in the CephCluster CR if it is not already. Shell into the manager container: 1 kubectl exec -n rook-ceph --stdin --tty $(kubectl get pod -n rook-ceph -l ceph_daemon_type=mgr,instance=a -o jsonpath='{.items[0].metadata.name}') -c mgr -- /bin/bash Make the modifications needed in the required manager module. The manager module source code is found in /usr/share/ceph/mgr/ . Note If the manager pod is restarted, all modifications made in the mgr container will be lost Restart the modified manager module to test the modifications: Example for restarting the rook manager module with the krew plugin : 1 2 kubectl rook-ceph ceph mgr module disable rook kubectl rook-ceph ceph mgr module enable rook Once the module is restarted the modifications will be running in the active manager. View the manager pod log or other changed behavior to validate the changes.","title":"Development Flow"},{"location":"Contributing/development-flow/#prerequisites","text":"GO 1.20 or greater installed Git client installed GitHub account","title":"Prerequisites"},{"location":"Contributing/development-flow/#initial-setup","text":"","title":"Initial Setup"},{"location":"Contributing/development-flow/#create-a-fork","text":"Navigate to http://github.com/rook/rook and click the \"Fork\" button.","title":"Create a Fork"},{"location":"Contributing/development-flow/#clone-your-fork","text":"In a console window: 1 2 3 4 5 6 7 8 # Create the rook repo path mkdir -p $GOPATH/src/github.com/rook # Navigate to the local repo path cd $GOPATH/src/github.com/rook # Clone your fork, where  is your GitHub account name git clone https://github.com//rook.git","title":"Clone Your Fork"},{"location":"Contributing/development-flow/#add-upstream-remote","text":"Add the upstream remote to your local git: 1 2 3 4 5 6 # Add 'upstream' to the list of remotes cd rook git remote add upstream https://github.com/rook/rook.git # Verify the remote was added git remote -v Two remotes should be available: origin and upstream .","title":"Add Upstream Remote"},{"location":"Contributing/development-flow/#build","text":"Before building the project, fetch the remotes to synchronize tags. 1 2 3 # Fetch all remotes git fetch -a make build Tip If in a Linux environment and make build command throws an error like unknown revision for some imports, add export GOPROXY=https://proxy.golang.org,direct to ~/.bashrc . Reload your environment and confirm with go env that GOPROXY is set. Hint Make will automatically pick up podman if docker packages are not available on your machine.","title":"Build"},{"location":"Contributing/development-flow/#development-settings","text":"For consistent whitespace and other formatting in .go and other source files, apply the following settings in your IDE: Format with the goreturns tool Trim trailing whitespace Markdown Table of Contents is correctly updated automatically","title":"Development Settings"},{"location":"Contributing/development-flow/#vs-code","text":"Tip VS Code will prompt you automatically with some recommended extensions to install, such as Markdown, Go, YAML validator, and ShellCheck. VS Code will automatically use the recommended settings in the .vscode/settings.json file.","title":"VS Code"},{"location":"Contributing/development-flow/#self-assign-issue","text":"To self-assign an issue that is not yet assigned to anyone else, add a comment in the issue with /assign in the body.","title":"Self assign Issue"},{"location":"Contributing/development-flow/#layout","text":"The overall source code layout is summarized: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 rook \u251c\u2500\u2500 build # build makefiles and logic to build, publish and release all Rook artifacts \u251c\u2500\u2500 cluster \u2502 \u251c\u2500\u2500 charts # Helm charts \u2502 \u2502 \u2514\u2500\u2500 rook-ceph \u2502 \u2502 \u2514\u2500\u2500 rook-ceph-cluster \u2502 \u2514\u2500\u2500 examples # Sample manifestes to configure the cluster \u2502 \u251c\u2500\u2500 cmd \u2502 \u251c\u2500\u2500 rook # Main command entry points for operators and daemons \u2502 \u251c\u2500\u2500 design # Design documents \u251c\u2500\u2500 Documentation # Documentation that is published to rook.io \u251c\u2500\u2500 images # Rook docker image sources \u2502 \u251c\u2500\u2500 pkg \u2502 \u251c\u2500\u2500 apis \u2502 \u2502 \u251c\u2500\u2500 ceph.rook.io # ceph specs used in the CRDs \u2502 \u2502 \u2502 \u251c\u2500\u2500 v1 \u2502 \u251c\u2500\u2500 client # auto-generated strongly typed client code to access Rook APIs \u2502 \u251c\u2500\u2500 clusterd \u2502 \u251c\u2500\u2500 daemon # daemons for configuring ceph \u2502 \u2502 \u251c\u2500\u2500 ceph \u2502 \u2502 \u2514\u2500\u2500 discover \u2502 \u251c\u2500\u2500 operator # all reconcile logic and custom controllers \u2502 \u2502 \u251c\u2500\u2500 ceph \u2502 \u2502 \u251c\u2500\u2500 discover \u2502 \u2502 \u251c\u2500\u2500 k8sutil \u2502 \u251c\u2500\u2500 util \u2502 \u2514\u2500\u2500 version \u2514\u2500\u2500 tests \u251c\u2500\u2500 framework # integration test framework \u2502 \u251c\u2500\u2500 clients \u2502 \u251c\u2500\u2500 installer \u2502 \u2514\u2500\u2500 utils \u251c\u2500\u2500 integration # integration test cases that will be invoked during golang testing \u2514\u2500\u2500 scripts # scripts for setting up integration and manual testing environments","title":"Layout"},{"location":"Contributing/development-flow/#development","text":"To submit a change, create a branch in your fork and then submit a pull request (PR) from the branch.","title":"Development"},{"location":"Contributing/development-flow/#design-document","text":"For new features of significant scope and complexity, a design document is recommended before work begins on the implementation. Create a design document if: Adding a new CRD Adding a significant feature. For smaller, straightforward features and bug fixes, there is no need for a design document. Authoring a design document has many advantages: Forces the author to think critically about the feature and identify potential issues early in the design Obtain agreement amongst the community before code is written to avoid wasted effort in the wrong direction Newcomers may more quickly understand the feature Note Writing code to prototype the feature while working on the design may be very useful to help flesh out the approach. A design document should be written as a markdown file in the design folder . Follow the process outlined in the design template . There are many examples of previous design documents in that folder. Submit a pull request for the design to be discussed and approved by the community, just like any other change to the repository.","title":"Design Document"},{"location":"Contributing/development-flow/#create-a-branch","text":"From a console, create a new branch based on your fork where changes will be developed: 1 2 3 4 5 6 # Update the remotes git fetch --all # Create a new branch that is based off upstream master. Give it a simple, but descriptive name. # Generally it will be two to three words separated by dashes and without numbers. git checkout -b feature-name upstream/master","title":"Create a Branch"},{"location":"Contributing/development-flow/#updating-your-fork","text":"During the development lifecycle, keep your branch(es) updated with the latest upstream master. As others on the team push changes, rebase your commits on top of the latest. This avoids unnecessary merge commits and keeps the commit history clean. Whenever an update is needed to the local repository, never perform a merge, always rebase. This will avoid merge commits in the git history. If there are any modified files, first stash them with git stash . 1 2 git fetch --all git rebase upstream/master Rebasing is a very powerful feature of Git. You need to understand how it works to avoid risking losing your work. Read about it in the Git documentation . Briefly, rebasing does the following: \"Unwinds\" the local commits. The local commits are removed temporarily from the history. The latest changes from upstream are added to the history The local commits are re-applied one by one If there are merge conflicts, there will be a prompt to fix them before continuing. Read the output closely. It will instruct how to complete the rebase. When rebasing is completed, all of the commits are restored in the history.","title":"Updating Your Fork"},{"location":"Contributing/development-flow/#submitting-a-pull-request","text":"After a feature or bug fix is completed in your branch, open a Pull Request (PR) to the upstream Rook repository . Before opening the PR: If there are code changes, add unit tests and verify that all unit tests are passing. See Unit Tests below on running unit tests. Rebase on the latest upstream changes","title":"Submitting a Pull Request"},{"location":"Contributing/development-flow/#regression-testing","text":"All pull requests must pass all continuous integration (CI) tests before they can be merged. These tests automatically run against every pull request. The results of these tests along with code review feedback determine whether your request will be merged.","title":"Regression Testing"},{"location":"Contributing/development-flow/#unit-tests","text":"From the root of your local Rook repo execute the following to run all of the unit tests: 1 make test Unit tests for individual packages can be run with the standard go test command. To see code coverage on the packages that you changed, view the coverage.html in a browser to inspect your new code. 1 2 go test -coverprofile=coverage.out go tool cover -html=coverage.out -o coverage.html","title":"Unit Tests"},{"location":"Contributing/development-flow/#writing-unit-tests","text":"Good unit tests start with easily testable code. Small chunks (\"units\") of code can be easily tested for every possible input. Higher-level code units that are built from smaller, already-tested units can more easily verify that the units are combined together correctly. Common cases that may need tests: the feature is enabled the feature is disabled the feature is only partially enabled, for every possible way it can be partially enabled every error that can be encountered during execution of the feature the feature can be disabled (including partially) after it was enabled the feature can be modified (including partially) after it was enabled if there is a slice/array involved, test length = 0, length = 1, length = 3, length == max, length > max an input is not specified, for each input an input is specified incorrectly, for each input a resource the code relies on doesn't exist, for each dependency","title":"Writing unit tests"},{"location":"Contributing/development-flow/#integration-tests","text":"Rook's upstream continuous integration (CI) tests will run integration tests against your changes automatically. Integration tests will be run in Github actions. If an integration test fails, a tmate session will be available for troubleshooting for a short time. See the action details for an ssh connection to the Github runner.","title":"Integration Tests"},{"location":"Contributing/development-flow/#commit-structure","text":"Rook maintainers value clear, lengthy and explanatory commit messages. Requirements for commits: A commit prefix from the list of known prefixes At least one paragraph that explains the original issue and the changes in the commit The Signed-off-by tag is at the end of the commit message, achieved by committing with git commit -s An example acceptable commit message: 1 2 3 4 5 6 component: commit title This is the commit message. Here I'm explaining what the bug was along with its root cause. Then I'm explaining how I fixed it. Signed-off-by: FirstName LastName ","title":"Commit structure"},{"location":"Contributing/development-flow/#commit-history","text":"To prepare your branch to open a PR, the minimal number of logical commits is preferred to maintain a clean commit history. Most commonly a PR will include a single commit where all changes are squashed, although sometimes there will be multiple logical commits. 1 2 # Inspect your commit history to determine if you need to squash commits git log To squash multiple commits or make other changes to the commit history, use git rebase : 1 2 3 # # In this example, the last 5 commits will be opened in the git rebase tool. git rebase -i HEAD~5 Once your commit history is clean, ensure the branch is rebased on the latest upstream before opening the PR.","title":"Commit History"},{"location":"Contributing/development-flow/#submitting","text":"Go to the Rook github to open the PR. If you have pushed recently to a branch, you will see an obvious link to open the PR. If you have not pushed recently, go to the Pull Request tab and select your fork and branch for the PR. After the PR is open, make changes simply by pushing new commits. The PR will track the changes in your fork and rerun the CI automatically. Always open a pull request against master. Never open a pull request against a released branch (e.g. release-1.2) unless working directly with a maintainer.","title":"Submitting"},{"location":"Contributing/development-flow/#backporting-to-a-release-branch","text":"The flow for getting a fix into a release branch is: Open a PR to merge changes to master following the process outlined above Add the backport label to that PR such as backport-release-1.11 After the PR is merged to master, the mergify bot will automatically open a PR with the commits backported to the release branch After the CI is green and a maintainer has approved the PR, the bot will automatically merge the backport PR","title":"Backporting to a Release Branch"},{"location":"Contributing/development-flow/#debugging-issues-in-ceph-manager-modules","text":"The Ceph manager modules are written in Python and can be individually and dynamically loaded from the manager. We can take advantage of this feature in order to test changes and to debug issues in the modules. This is just a hack to debug any modification in the manager modules. The dashboard and the rook orchestrator modules are the two modules most commonly have modifications that need to be tested. Make modifications directly in the manager module and reload: Update the cluster so only a single mgr pod is running. Set the mgr.count: 1 in the CephCluster CR if it is not already. Shell into the manager container: 1 kubectl exec -n rook-ceph --stdin --tty $(kubectl get pod -n rook-ceph -l ceph_daemon_type=mgr,instance=a -o jsonpath='{.items[0].metadata.name}') -c mgr -- /bin/bash Make the modifications needed in the required manager module. The manager module source code is found in /usr/share/ceph/mgr/ . Note If the manager pod is restarted, all modifications made in the mgr container will be lost Restart the modified manager module to test the modifications: Example for restarting the rook manager module with the krew plugin : 1 2 kubectl rook-ceph ceph mgr module disable rook kubectl rook-ceph ceph mgr module enable rook Once the module is restarted the modifications will be running in the active manager. View the manager pod log or other changed behavior to validate the changes.","title":"Debugging issues in Ceph manager modules"},{"location":"Contributing/documentation/","text":"We are using MkDocs with the Material for MkDocs theme . Markdown Extensions \u00b6 Thanks to the MkDocs Material theme we have certain \"markdown syntax extensions\" available: Admonitions Footnotes Icons, Emojis Task lists And more .. For a whole list of features Reference - Material for MkDocs . Local Preview \u00b6 To locally preview the documentation, you can run the following command (in the root of the repository): 1 make docs-preview When previewing, now you can navigate your browser to http://127.0.0.1:8000/ to open the preview of the documentation. Hint Should you encounter a command not found error while trying to preview the docs for the first time on a machine, you probably need to install the dependencies for MkDocs and extensions used. 1 pip3 install -r build/release/requirements_docs.txt Please make sure that your Python binary path is included in your PATH . Running helm-docs \u00b6 helm-docs is a tool that generates the documentation for a helm chart automatically. If there are changes in the helm chart, you need to run helm-docs manually, and check in the resulting autogenerated md files at the path /Documentation/Helm-Charts 1 make helm-docs","title":"Documentation"},{"location":"Contributing/documentation/#markdown-extensions","text":"Thanks to the MkDocs Material theme we have certain \"markdown syntax extensions\" available: Admonitions Footnotes Icons, Emojis Task lists And more .. For a whole list of features Reference - Material for MkDocs .","title":"Markdown Extensions"},{"location":"Contributing/documentation/#local-preview","text":"To locally preview the documentation, you can run the following command (in the root of the repository): 1 make docs-preview When previewing, now you can navigate your browser to http://127.0.0.1:8000/ to open the preview of the documentation. Hint Should you encounter a command not found error while trying to preview the docs for the first time on a machine, you probably need to install the dependencies for MkDocs and extensions used. 1 pip3 install -r build/release/requirements_docs.txt Please make sure that your Python binary path is included in your PATH .","title":"Local Preview"},{"location":"Contributing/documentation/#running-helm-docs","text":"helm-docs is a tool that generates the documentation for a helm chart automatically. If there are changes in the helm chart, you need to run helm-docs manually, and check in the resulting autogenerated md files at the path /Documentation/Helm-Charts 1 make helm-docs","title":"Running helm-docs"},{"location":"Contributing/rook-test-framework/","text":"Integration Tests \u00b6 The integration tests run end-to-end tests on Rook in a running instance of Kubernetes. The framework includes scripts for starting Kubernetes so users can quickly spin up a Kubernetes cluster. The tests are generally designed to install Rook, run tests, and uninstall Rook. The CI runs the integration tests with each PR and each master or release branch build. If the tests fail in a PR, a tmate session is started which will allow you to connect via ssh and troubleshoot the failure. The CI is the most efficient way to troubleshoot the tests since the environment is started automatically and you will only need to connect to investigate. This document will outline the steps to run the integration tests locally in a minikube environment, should the CI not be sufficient to troubleshoot. Hint The CI is generally much simpler to troubleshoot than running these tests locally. Running the tests locally is rarely necessary. Warning A risk of running the tests locally is that a local disk is required during the tests. If not running in a VM, your laptop or other test machine could be destroyed. Install Minikube \u00b6 Follow Rook's developer guide to install Minikube. Build Rook image \u00b6 Now that the Kubernetes cluster is running we need to populate the Docker registry to allow local image builds to be easily used inside Minikube. 1 eval $(minikube docker-env -p minikube) make build will now build and push the images to the Docker registry inside the Minikube virtual machine. 1 make build Tag the newly built images to rook/ceph:local-build for running tests, or rook/ceph:master if creating example manifests:: 1 2 docker tag $(docker images|awk '/build-/ {print $1}') rook/ceph:local-build docker tag rook/ceph:local-build rook/ceph:master Run integration tests \u00b6 Some settings are available to run the tests under different environments. The settings are all configured with environment variables. See environment.go for the available environment variables. Set the following variables: 1 2 3 export TEST_HELM_PATH=/tmp/rook-tests-scripts-helm/linux-amd64/helm export TEST_BASE_DIR=WORKING_DIR export TEST_SCRATCH_DEVICE=/dev/vdb Set TEST_SCRATCH_DEVICE to the correct block device name based on the driver that's being used. Hint If using the virtualbox minikube driver, the device should be /dev/sdb Warning The integration tests erase the contents of TEST_SCRATCH_DEVICE when the test is completed To run a specific suite, specify the suite name: 1 go test -v -timeout 1800s -run CephSmokeSuite github.com/rook/rook/tests/integration After running tests, see test logs under tests/integration/_output . To run specific tests inside a suite: 1 go test -v -timeout 1800s -run CephSmokeSuite github.com/rook/rook/tests/integration -testify.m TestARookClusterInstallation_SmokeTest Info Only the golang test suites are documented to run locally. Canary and other tests have only ever been supported in the CI. Running tests on OpenShift \u00b6 Setup OpenShift environment and export KUBECONFIG Make sure oc executable file is in the PATH. Only the CephSmokeSuite is currently supported on OpenShift. Set the following environment variables depending on the environment: 1 2 3 export TEST_ENV_NAME=openshift export TEST_STORAGE_CLASS=gp2 export TEST_BASE_DIR=/tmp Run the integration tests","title":"Rook Test Framework"},{"location":"Contributing/rook-test-framework/#integration-tests","text":"The integration tests run end-to-end tests on Rook in a running instance of Kubernetes. The framework includes scripts for starting Kubernetes so users can quickly spin up a Kubernetes cluster. The tests are generally designed to install Rook, run tests, and uninstall Rook. The CI runs the integration tests with each PR and each master or release branch build. If the tests fail in a PR, a tmate session is started which will allow you to connect via ssh and troubleshoot the failure. The CI is the most efficient way to troubleshoot the tests since the environment is started automatically and you will only need to connect to investigate. This document will outline the steps to run the integration tests locally in a minikube environment, should the CI not be sufficient to troubleshoot. Hint The CI is generally much simpler to troubleshoot than running these tests locally. Running the tests locally is rarely necessary. Warning A risk of running the tests locally is that a local disk is required during the tests. If not running in a VM, your laptop or other test machine could be destroyed.","title":"Integration Tests"},{"location":"Contributing/rook-test-framework/#install-minikube","text":"Follow Rook's developer guide to install Minikube.","title":"Install Minikube"},{"location":"Contributing/rook-test-framework/#build-rook-image","text":"Now that the Kubernetes cluster is running we need to populate the Docker registry to allow local image builds to be easily used inside Minikube. 1 eval $(minikube docker-env -p minikube) make build will now build and push the images to the Docker registry inside the Minikube virtual machine. 1 make build Tag the newly built images to rook/ceph:local-build for running tests, or rook/ceph:master if creating example manifests:: 1 2 docker tag $(docker images|awk '/build-/ {print $1}') rook/ceph:local-build docker tag rook/ceph:local-build rook/ceph:master","title":"Build Rook image"},{"location":"Contributing/rook-test-framework/#run-integration-tests","text":"Some settings are available to run the tests under different environments. The settings are all configured with environment variables. See environment.go for the available environment variables. Set the following variables: 1 2 3 export TEST_HELM_PATH=/tmp/rook-tests-scripts-helm/linux-amd64/helm export TEST_BASE_DIR=WORKING_DIR export TEST_SCRATCH_DEVICE=/dev/vdb Set TEST_SCRATCH_DEVICE to the correct block device name based on the driver that's being used. Hint If using the virtualbox minikube driver, the device should be /dev/sdb Warning The integration tests erase the contents of TEST_SCRATCH_DEVICE when the test is completed To run a specific suite, specify the suite name: 1 go test -v -timeout 1800s -run CephSmokeSuite github.com/rook/rook/tests/integration After running tests, see test logs under tests/integration/_output . To run specific tests inside a suite: 1 go test -v -timeout 1800s -run CephSmokeSuite github.com/rook/rook/tests/integration -testify.m TestARookClusterInstallation_SmokeTest Info Only the golang test suites are documented to run locally. Canary and other tests have only ever been supported in the CI.","title":"Run integration tests"},{"location":"Contributing/rook-test-framework/#running-tests-on-openshift","text":"Setup OpenShift environment and export KUBECONFIG Make sure oc executable file is in the PATH. Only the CephSmokeSuite is currently supported on OpenShift. Set the following environment variables depending on the environment: 1 2 3 export TEST_ENV_NAME=openshift export TEST_STORAGE_CLASS=gp2 export TEST_BASE_DIR=/tmp Run the integration tests","title":"Running tests on OpenShift"},{"location":"Getting-Started/ceph-openshift/","text":"OpenShift \u00b6 OpenShift adds a number of security and other enhancements to Kubernetes. In particular, security context constraints allow the cluster admin to define exactly which permissions are allowed to pods running in the cluster. You will need to define those permissions that allow the Rook pods to run. The settings for Rook in OpenShift are described below, and are also included in the example yaml files : operator-openshift.yaml : Creates the security context constraints and starts the operator deployment object-openshift.yaml : Creates an object store with rgw listening on a valid port number for OpenShift TL;DR \u00b6 To create an OpenShift cluster, the commands basically include: 1 2 3 oc create -f crds.yaml -f common.yaml oc create -f operator-openshift.yaml oc create -f cluster.yaml Helm Installation \u00b6 Configuration required for Openshift is automatically created by the Helm charts, such as the SecurityContextConstraints. See the Rook Helm Charts . Rook Privileges \u00b6 To orchestrate the storage platform, Rook requires the following access in the cluster: Create hostPath volumes, for persistence by the Ceph mon and osd pods Run pods in privileged mode, for access to /dev and hostPath volumes Host networking for the Rook agent and clusters that require host networking Ceph OSDs require host PIDs for communication on the same node Security Context Constraints \u00b6 Before starting the Rook operator or cluster, create the security context constraints needed by the Rook pods. The following yaml is found in operator-openshift.yaml under /deploy/examples . Hint Older versions of OpenShift may require apiVersion: v1 . Important to note is that if you plan on running Rook in namespaces other than the default rook-ceph , the example scc will need to be modified to accommodate for your namespaces where the Rook pods are running. To create the scc you will need a privileged account: 1 oc login -u system:admin We will create the security context constraints with the operator in the next section. Rook Settings \u00b6 There are some Rook settings that also need to be adjusted to work in OpenShift. Operator Settings \u00b6 There is an environment variable that needs to be set in the operator spec that will allow Rook to run in OpenShift clusters. ROOK_HOSTPATH_REQUIRES_PRIVILEGED : Must be set to true . Writing to the hostPath is required for the Ceph mon and osd pods. Given the restricted permissions in OpenShift with SELinux, the pod must be running privileged in order to write to the hostPath volume. 1 2 - name : ROOK_HOSTPATH_REQUIRES_PRIVILEGED value : \"true\" Now create the security context constraints and the operator: 1 oc create -f operator-openshift.yaml Cluster Settings \u00b6 The cluster settings in cluster.yaml are largely isolated from the differences in OpenShift. There is perhaps just one to take note of: dataDirHostPath : Ensure that it points to a valid, writable path on the host systems. Object Store Settings \u00b6 In OpenShift, ports less than 1024 cannot be bound. In the object store CRD , ensure the port is modified to meet this requirement. 1 2 gateway : port : 8080 You can expose a different port such as 80 by creating a service. A sample object store can be created with these settings: 1 oc create -f object-openshift.yaml","title":"OpenShift"},{"location":"Getting-Started/ceph-openshift/#openshift","text":"OpenShift adds a number of security and other enhancements to Kubernetes. In particular, security context constraints allow the cluster admin to define exactly which permissions are allowed to pods running in the cluster. You will need to define those permissions that allow the Rook pods to run. The settings for Rook in OpenShift are described below, and are also included in the example yaml files : operator-openshift.yaml : Creates the security context constraints and starts the operator deployment object-openshift.yaml : Creates an object store with rgw listening on a valid port number for OpenShift","title":"OpenShift"},{"location":"Getting-Started/ceph-openshift/#tldr","text":"To create an OpenShift cluster, the commands basically include: 1 2 3 oc create -f crds.yaml -f common.yaml oc create -f operator-openshift.yaml oc create -f cluster.yaml","title":"TL;DR"},{"location":"Getting-Started/ceph-openshift/#helm-installation","text":"Configuration required for Openshift is automatically created by the Helm charts, such as the SecurityContextConstraints. See the Rook Helm Charts .","title":"Helm Installation"},{"location":"Getting-Started/ceph-openshift/#rook-privileges","text":"To orchestrate the storage platform, Rook requires the following access in the cluster: Create hostPath volumes, for persistence by the Ceph mon and osd pods Run pods in privileged mode, for access to /dev and hostPath volumes Host networking for the Rook agent and clusters that require host networking Ceph OSDs require host PIDs for communication on the same node","title":"Rook Privileges"},{"location":"Getting-Started/ceph-openshift/#security-context-constraints","text":"Before starting the Rook operator or cluster, create the security context constraints needed by the Rook pods. The following yaml is found in operator-openshift.yaml under /deploy/examples . Hint Older versions of OpenShift may require apiVersion: v1 . Important to note is that if you plan on running Rook in namespaces other than the default rook-ceph , the example scc will need to be modified to accommodate for your namespaces where the Rook pods are running. To create the scc you will need a privileged account: 1 oc login -u system:admin We will create the security context constraints with the operator in the next section.","title":"Security Context Constraints"},{"location":"Getting-Started/ceph-openshift/#rook-settings","text":"There are some Rook settings that also need to be adjusted to work in OpenShift.","title":"Rook Settings"},{"location":"Getting-Started/ceph-openshift/#operator-settings","text":"There is an environment variable that needs to be set in the operator spec that will allow Rook to run in OpenShift clusters. ROOK_HOSTPATH_REQUIRES_PRIVILEGED : Must be set to true . Writing to the hostPath is required for the Ceph mon and osd pods. Given the restricted permissions in OpenShift with SELinux, the pod must be running privileged in order to write to the hostPath volume. 1 2 - name : ROOK_HOSTPATH_REQUIRES_PRIVILEGED value : \"true\" Now create the security context constraints and the operator: 1 oc create -f operator-openshift.yaml","title":"Operator Settings"},{"location":"Getting-Started/ceph-openshift/#cluster-settings","text":"The cluster settings in cluster.yaml are largely isolated from the differences in OpenShift. There is perhaps just one to take note of: dataDirHostPath : Ensure that it points to a valid, writable path on the host systems.","title":"Cluster Settings"},{"location":"Getting-Started/ceph-openshift/#object-store-settings","text":"In OpenShift, ports less than 1024 cannot be bound. In the object store CRD , ensure the port is modified to meet this requirement. 1 2 gateway : port : 8080 You can expose a different port such as 80 by creating a service. A sample object store can be created with these settings: 1 oc create -f object-openshift.yaml","title":"Object Store Settings"},{"location":"Getting-Started/ceph-teardown/","text":"Cleaning up a Cluster \u00b6 If you want to tear down the cluster and bring up a new one, be aware of the following resources that will need to be cleaned up: The resources created under Rook's namespace (assume rook-ceph here): The Rook operator and cluster created by operator.yaml and cluster.yaml (the cluster CRD) /var/lib/rook/rook-ceph : Path on each host in the cluster where configuration is cached by the ceph mons and osds Note that if you changed the default namespaces or paths such as dataDirHostPath in the sample yaml files, you will need to adjust these namespaces and paths throughout these instructions. If you see issues tearing down the cluster, see the Troubleshooting section below. If you are tearing down a cluster frequently for development purposes, it is instead recommended to use an environment such as Minikube that can easily be reset without worrying about any of these steps. Delete the Block and File artifacts \u00b6 First you will need to clean up the resources created on top of the Rook cluster. These commands will clean up the resources from the block and file walkthroughs (unmount volumes, delete volume claims, etc). If you did not complete those parts of the walkthrough, you can skip these instructions: 1 2 3 4 5 6 kubectl delete -f ../wordpress.yaml kubectl delete -f ../mysql.yaml kubectl delete -n rook-ceph cephblockpool replicapool kubectl delete storageclass rook-ceph-block kubectl delete -f csi/cephfs/kube-registry.yaml kubectl delete storageclass csi-cephfs After those block and file resources have been cleaned up, you can then delete your Rook cluster. This is important to delete before removing the Rook operator and agent or else resources may not be cleaned up properly . Delete the CephCluster CRD \u00b6 Edit the CephCluster and add the cleanupPolicy WARNING: DATA WILL BE PERMANENTLY DELETED AFTER DELETING THE CephCluster CR WITH cleanupPolicy . 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Once the cleanup policy is enabled, any new configuration changes in the CephCluster will be blocked. Nothing will happen until the deletion of the CR is requested, so this cleanupPolicy change can still be reverted if needed. Checkout more details about the cleanupPolicy here Delete the CephCluster CR. 1 kubectl -n rook-ceph delete cephcluster rook-ceph Verify that the cluster CR has been deleted before continuing to the next step. 1 kubectl -n rook-ceph get cephcluster If the cleanupPolicy was applied, then wait for the rook-ceph-cleanup jobs to be completed on all the nodes. These jobs will perform the following operations: Delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph , on all the nodes Wipe the data on the drives on all the nodes where OSDs were running in this cluster Note: The cleanup jobs might not start if the resources created on top of Rook Cluster are not deleted completely. See Delete the Operator and related Resources \u00b6 This will begin the process of the Rook Ceph operator and all other resources being cleaned up. This includes related resources such as the agent and discover daemonsets with the following commands: 1 2 3 kubectl delete -f operator.yaml kubectl delete -f common.yaml kubectl delete -f crds.yaml If the cleanupPolicy was applied and the cleanup jobs have completed on all the nodes, then the cluster tear down has been successful. If you skipped adding the cleanupPolicy then follow the manual steps mentioned below to tear down the cluster. Delete the data on hosts \u00b6 Attention The final cleanup step requires deleting files on each host in the cluster. All files under the dataDirHostPath property specified in the cluster CRD will need to be deleted. Otherwise, inconsistent state will remain when a new cluster is started. Connect to each machine and delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph . In the future this step will not be necessary when we build on the K8s local storage feature. If you modified the demo settings, additional cleanup is up to you for devices, host paths, etc. Zapping Devices \u00b6 Disks on nodes used by Rook for osds can be reset to a usable state with methods suggested below. Note that these scripts are not one-size-fits-all. Please use them with discretion to ensure you are not removing data unrelated to Rook and/or Ceph. Disks can be zapped fairly easily. A single disk can usually be cleared with some or all of the steps below. 1 2 3 4 5 6 7 8 9 10 11 12 13 DISK=\"/dev/sdX\" # Zap the disk to a fresh, usable state ( zap-all is important, b/c MBR has to be clean ) sgdisk --zap-all $DISK # Wipe a large portion of the beginning of the disk to remove more LVM metadata that may be present dd if=/dev/zero of=\"$DISK\" bs=1M count=100 oflag=direct,dsync # SSDs may be better cleaned with blkdiscard instead of dd blkdiscard $DISK # Inform the OS of partition table changes partprobe $DISK Ceph can leave LVM and device mapper data that can lock the disks, preventing the disks from being used again. These steps can help to free up old Ceph disks for re-use. Note that this only needs to be run once on each node. If you have only one Rook cluster and all Ceph disks are being wiped, run the following command. 1 2 3 4 5 6 # This command hangs on some systems: with caution, 'dmsetup remove_all --force' can be used ls /dev/mapper/ceph-* | xargs -I% -- dmsetup remove % # ceph-volume setup can leave ceph- directories in /dev and /dev/mapper ( unnecessary clutter ) rm -rf /dev/ceph-* rm -rf /dev/mapper/ceph--* If disks are still reported locked, rebooting the node often helps clear LVM-related holds on disks. If there are multiple Ceph clusters and some disks are not wiped yet, it is necessary to manually determine which disks map to which device mapper devices. Troubleshooting \u00b6 If the cleanup instructions are not executed in the order above, or you otherwise have difficulty cleaning up the cluster, here are a few things to try. The most common issue cleaning up the cluster is that the rook-ceph namespace or the cluster CRD remain indefinitely in the terminating state. A namespace cannot be removed until all of its resources are removed, so look at which resources are pending termination. Look at the pods: 1 kubectl -n rook-ceph get pod If a pod is still terminating, you will need to wait or else attempt to forcefully terminate it ( kubectl delete pod  ). Now look at the cluster CRD: 1 kubectl -n rook-ceph get cephcluster If the cluster CRD still exists even though you have executed the delete command earlier, see the next section on removing the finalizer. Removing the Cluster CRD Finalizer \u00b6 When a Cluster CRD is created, a finalizer is added automatically by the Rook operator. The finalizer will allow the operator to ensure that before the cluster CRD is deleted, all block and file mounts will be cleaned up. Without proper cleanup, pods consuming the storage will be hung indefinitely until a system reboot. The operator is responsible for removing the finalizer after the mounts have been cleaned up. If for some reason the operator is not able to remove the finalizer (i.e., the operator is not running anymore), you can delete the finalizer manually with the following command: 1 2 3 4 for CRD in $(kubectl get crd -n rook-ceph | awk '/ceph.rook.io/ {print $1}'); do kubectl get -n rook-ceph \"$CRD\" -o name | \\ xargs -I {} kubectl patch -n rook-ceph {} --type merge -p '{\"metadata\":{\"finalizers\": []}}' done This command will patch the following CRDs on v1.3: 1 2 3 4 5 6 cephblockpools.ceph.rook.io cephclients.ceph.rook.io cephfilesystems.ceph.rook.io cephnfses.ceph.rook.io cephobjectstores.ceph.rook.io cephobjectstoreusers.ceph.rook.io Within a few seconds you should see that the cluster CRD has been deleted and will no longer block other cleanup such as deleting the rook-ceph namespace. If the namespace is still stuck in Terminating state, you can check which resources are holding up the deletion and remove the finalizers and delete those 1 2 kubectl api-resources --verbs=list --namespaced -o name \\ | xargs -n 1 kubectl get --show-kind --ignore-not-found -n rook-ceph Remove critical resource finalizers \u00b6 Rook adds a finalizer ceph.rook.io/disaster-protection to resources critical to the Ceph cluster so that the resources will not be accidentally deleted. The operator is responsible for removing the finalizers when a CephCluster is deleted. If for some reason the operator is not able to remove the finalizers (i.e., the operator is not running anymore), you can remove the finalizers manually with the following commands: 1 2 kubectl -n rook-ceph patch configmap rook-ceph-mon-endpoints --type merge -p '{\"metadata\":{\"finalizers\": []}}' kubectl -n rook-ceph patch secrets rook-ceph-mon --type merge -p '{\"metadata\":{\"finalizers\": []}}'","title":"Cleanup"},{"location":"Getting-Started/ceph-teardown/#cleaning-up-a-cluster","text":"If you want to tear down the cluster and bring up a new one, be aware of the following resources that will need to be cleaned up: The resources created under Rook's namespace (assume rook-ceph here): The Rook operator and cluster created by operator.yaml and cluster.yaml (the cluster CRD) /var/lib/rook/rook-ceph : Path on each host in the cluster where configuration is cached by the ceph mons and osds Note that if you changed the default namespaces or paths such as dataDirHostPath in the sample yaml files, you will need to adjust these namespaces and paths throughout these instructions. If you see issues tearing down the cluster, see the Troubleshooting section below. If you are tearing down a cluster frequently for development purposes, it is instead recommended to use an environment such as Minikube that can easily be reset without worrying about any of these steps.","title":"Cleaning up a Cluster"},{"location":"Getting-Started/ceph-teardown/#delete-the-block-and-file-artifacts","text":"First you will need to clean up the resources created on top of the Rook cluster. These commands will clean up the resources from the block and file walkthroughs (unmount volumes, delete volume claims, etc). If you did not complete those parts of the walkthrough, you can skip these instructions: 1 2 3 4 5 6 kubectl delete -f ../wordpress.yaml kubectl delete -f ../mysql.yaml kubectl delete -n rook-ceph cephblockpool replicapool kubectl delete storageclass rook-ceph-block kubectl delete -f csi/cephfs/kube-registry.yaml kubectl delete storageclass csi-cephfs After those block and file resources have been cleaned up, you can then delete your Rook cluster. This is important to delete before removing the Rook operator and agent or else resources may not be cleaned up properly .","title":"Delete the Block and File artifacts"},{"location":"Getting-Started/ceph-teardown/#delete-the-cephcluster-crd","text":"Edit the CephCluster and add the cleanupPolicy WARNING: DATA WILL BE PERMANENTLY DELETED AFTER DELETING THE CephCluster CR WITH cleanupPolicy . 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Once the cleanup policy is enabled, any new configuration changes in the CephCluster will be blocked. Nothing will happen until the deletion of the CR is requested, so this cleanupPolicy change can still be reverted if needed. Checkout more details about the cleanupPolicy here Delete the CephCluster CR. 1 kubectl -n rook-ceph delete cephcluster rook-ceph Verify that the cluster CR has been deleted before continuing to the next step. 1 kubectl -n rook-ceph get cephcluster If the cleanupPolicy was applied, then wait for the rook-ceph-cleanup jobs to be completed on all the nodes. These jobs will perform the following operations: Delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph , on all the nodes Wipe the data on the drives on all the nodes where OSDs were running in this cluster Note: The cleanup jobs might not start if the resources created on top of Rook Cluster are not deleted completely. See","title":"Delete the CephCluster CRD"},{"location":"Getting-Started/ceph-teardown/#delete-the-operator-and-related-resources","text":"This will begin the process of the Rook Ceph operator and all other resources being cleaned up. This includes related resources such as the agent and discover daemonsets with the following commands: 1 2 3 kubectl delete -f operator.yaml kubectl delete -f common.yaml kubectl delete -f crds.yaml If the cleanupPolicy was applied and the cleanup jobs have completed on all the nodes, then the cluster tear down has been successful. If you skipped adding the cleanupPolicy then follow the manual steps mentioned below to tear down the cluster.","title":"Delete the Operator and related Resources"},{"location":"Getting-Started/ceph-teardown/#delete-the-data-on-hosts","text":"Attention The final cleanup step requires deleting files on each host in the cluster. All files under the dataDirHostPath property specified in the cluster CRD will need to be deleted. Otherwise, inconsistent state will remain when a new cluster is started. Connect to each machine and delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph . In the future this step will not be necessary when we build on the K8s local storage feature. If you modified the demo settings, additional cleanup is up to you for devices, host paths, etc.","title":"Delete the data on hosts"},{"location":"Getting-Started/ceph-teardown/#zapping-devices","text":"Disks on nodes used by Rook for osds can be reset to a usable state with methods suggested below. Note that these scripts are not one-size-fits-all. Please use them with discretion to ensure you are not removing data unrelated to Rook and/or Ceph. Disks can be zapped fairly easily. A single disk can usually be cleared with some or all of the steps below. 1 2 3 4 5 6 7 8 9 10 11 12 13 DISK=\"/dev/sdX\" # Zap the disk to a fresh, usable state ( zap-all is important, b/c MBR has to be clean ) sgdisk --zap-all $DISK # Wipe a large portion of the beginning of the disk to remove more LVM metadata that may be present dd if=/dev/zero of=\"$DISK\" bs=1M count=100 oflag=direct,dsync # SSDs may be better cleaned with blkdiscard instead of dd blkdiscard $DISK # Inform the OS of partition table changes partprobe $DISK Ceph can leave LVM and device mapper data that can lock the disks, preventing the disks from being used again. These steps can help to free up old Ceph disks for re-use. Note that this only needs to be run once on each node. If you have only one Rook cluster and all Ceph disks are being wiped, run the following command. 1 2 3 4 5 6 # This command hangs on some systems: with caution, 'dmsetup remove_all --force' can be used ls /dev/mapper/ceph-* | xargs -I% -- dmsetup remove % # ceph-volume setup can leave ceph- directories in /dev and /dev/mapper ( unnecessary clutter ) rm -rf /dev/ceph-* rm -rf /dev/mapper/ceph--* If disks are still reported locked, rebooting the node often helps clear LVM-related holds on disks. If there are multiple Ceph clusters and some disks are not wiped yet, it is necessary to manually determine which disks map to which device mapper devices.","title":"Zapping Devices"},{"location":"Getting-Started/ceph-teardown/#troubleshooting","text":"If the cleanup instructions are not executed in the order above, or you otherwise have difficulty cleaning up the cluster, here are a few things to try. The most common issue cleaning up the cluster is that the rook-ceph namespace or the cluster CRD remain indefinitely in the terminating state. A namespace cannot be removed until all of its resources are removed, so look at which resources are pending termination. Look at the pods: 1 kubectl -n rook-ceph get pod If a pod is still terminating, you will need to wait or else attempt to forcefully terminate it ( kubectl delete pod  ). Now look at the cluster CRD: 1 kubectl -n rook-ceph get cephcluster If the cluster CRD still exists even though you have executed the delete command earlier, see the next section on removing the finalizer.","title":"Troubleshooting"},{"location":"Getting-Started/ceph-teardown/#removing-the-cluster-crd-finalizer","text":"When a Cluster CRD is created, a finalizer is added automatically by the Rook operator. The finalizer will allow the operator to ensure that before the cluster CRD is deleted, all block and file mounts will be cleaned up. Without proper cleanup, pods consuming the storage will be hung indefinitely until a system reboot. The operator is responsible for removing the finalizer after the mounts have been cleaned up. If for some reason the operator is not able to remove the finalizer (i.e., the operator is not running anymore), you can delete the finalizer manually with the following command: 1 2 3 4 for CRD in $(kubectl get crd -n rook-ceph | awk '/ceph.rook.io/ {print $1}'); do kubectl get -n rook-ceph \"$CRD\" -o name | \\ xargs -I {} kubectl patch -n rook-ceph {} --type merge -p '{\"metadata\":{\"finalizers\": []}}' done This command will patch the following CRDs on v1.3: 1 2 3 4 5 6 cephblockpools.ceph.rook.io cephclients.ceph.rook.io cephfilesystems.ceph.rook.io cephnfses.ceph.rook.io cephobjectstores.ceph.rook.io cephobjectstoreusers.ceph.rook.io Within a few seconds you should see that the cluster CRD has been deleted and will no longer block other cleanup such as deleting the rook-ceph namespace. If the namespace is still stuck in Terminating state, you can check which resources are holding up the deletion and remove the finalizers and delete those 1 2 kubectl api-resources --verbs=list --namespaced -o name \\ | xargs -n 1 kubectl get --show-kind --ignore-not-found -n rook-ceph","title":"Removing the Cluster CRD Finalizer"},{"location":"Getting-Started/ceph-teardown/#remove-critical-resource-finalizers","text":"Rook adds a finalizer ceph.rook.io/disaster-protection to resources critical to the Ceph cluster so that the resources will not be accidentally deleted. The operator is responsible for removing the finalizers when a CephCluster is deleted. If for some reason the operator is not able to remove the finalizers (i.e., the operator is not running anymore), you can remove the finalizers manually with the following commands: 1 2 kubectl -n rook-ceph patch configmap rook-ceph-mon-endpoints --type merge -p '{\"metadata\":{\"finalizers\": []}}' kubectl -n rook-ceph patch secrets rook-ceph-mon --type merge -p '{\"metadata\":{\"finalizers\": []}}'","title":"Remove critical resource finalizers"},{"location":"Getting-Started/example-configurations/","text":"Configuration for Rook and Ceph can be configured in multiple ways to provide block devices, shared filesystem volumes or object storage in a kubernetes namespace. While several examples are provided to simplify storage setup, settings are available to optimize various production environments. See the example yaml files folder for all the rook/ceph setup example spec files. Common Resources \u00b6 The first step to deploy Rook is to create the CRDs and other common resources. The configuration for these resources will be the same for most deployments. The crds.yaml and common.yaml sets these resources up. 1 kubectl create -f crds.yaml -f common.yaml The examples all assume the operator and all Ceph daemons will be started in the same namespace. If deploying the operator in a separate namespace, see the comments throughout common.yaml . Operator \u00b6 After the common resources are created, the next step is to create the Operator deployment. Several spec file examples are provided in this directory : operator.yaml : The most common settings for production deployments kubectl create -f operator.yaml operator-openshift.yaml : Includes all of the operator settings for running a basic Rook cluster in an OpenShift environment. You will also want to review the OpenShift Prerequisites to confirm the settings. oc create -f operator-openshift.yaml Settings for the operator are configured through environment variables on the operator deployment. The individual settings are documented in operator.yaml . Cluster CRD \u00b6 Now that the operator is running, create the Ceph storage cluster with the CephCluster CR. This CR contains the most critical settings that will influence how the operator configures the storage. It is important to understand the various ways to configure the cluster. These examples represent several different ways to configure the storage. cluster.yaml : Common settings for a production storage cluster. Requires at least three worker nodes. cluster-test.yaml : Settings for a test cluster where redundancy is not configured. Requires only a single node. cluster-on-pvc.yaml : Common settings for backing the Ceph Mons and OSDs by PVs. Useful when running in cloud environments or where local PVs have been created for Ceph to consume. cluster-external.yaml : Connect to an external Ceph cluster with minimal access to monitor the health of the cluster and connect to the storage. cluster-external-management.yaml : Connect to an external Ceph cluster with the admin key of the external cluster to enable remote creation of pools and configure services such as an Object Store or a Shared Filesystem . cluster-stretched.yaml : Create a cluster in \"stretched\" mode, with five mons stretched across three zones, and the OSDs across two zones. See the Stretch documentation . See the Cluster CRD topic for more details and more examples for the settings. Setting up consumable storage \u00b6 Now we are ready to setup Block, Shared Filesystem or Object storage in the Rook cluster. These storage types are respectively created with the CephBlockPool, CephFilesystem and CephObjectStore CRs. Block Devices \u00b6 Ceph provides raw block device volumes to pods. Each example below sets up a storage class which can then be used to provision a block device in application pods. The storage class is defined with a Ceph pool which defines the level of data redundancy in Ceph: storageclass.yaml : This example illustrates replication of 3 for production scenarios and requires at least three worker nodes. Data is replicated on three different kubernetes worker nodes. Intermittent or long-lasting single node failures will not result in data unavailability or loss. storageclass-ec.yaml : Configures erasure coding for data durability rather than replication. Ceph's erasure coding is more efficient than replication so you can get high reliability without the 3x replication cost of the preceding example (but at the cost of higher computational encoding and decoding costs on the worker nodes). Erasure coding requires at least three worker nodes. See the Erasure coding documentation. storageclass-test.yaml : Replication of 1 for test scenarios. Requires only a single node. Do not use this for production applications. A single node failure can result in full data loss. The block storage classes are found in the examples directory: csi/rbd : the CSI driver examples for block devices See the CephBlockPool CRD topic for more block storage settings. Shared Filesystem \u00b6 Ceph filesystem (CephFS) allows the user to mount a shared posix-compliant folder into one or more application pods. This storage is similar to NFS shared storage or CIFS shared folders, as explained here . Shared Filesystem storage contains configurable pools for different scenarios: filesystem.yaml : Replication of 3 for production scenarios. Requires at least three worker nodes. filesystem-ec.yaml : Erasure coding for production scenarios. Requires at least three worker nodes. filesystem-test.yaml : Replication of 1 for test scenarios. Requires only a single node. Dynamic provisioning is possible with the CSI driver. The storage class for shared filesystems is found in the csi/cephfs directory. See the Shared Filesystem CRD topic for more details on the settings. Object Storage \u00b6 Ceph supports storing blobs of data called objects that support HTTP(s)-type get/put/post and delete semantics. This storage is similar to AWS S3 storage, for example. Object storage contains multiple pools that can be configured for different scenarios: object.yaml : Replication of 3 for production scenarios. Requires at least three worker nodes. object-openshift.yaml : Replication of 3 with rgw in a port range valid for OpenShift. Requires at least three worker nodes. object-ec.yaml : Erasure coding rather than replication for production scenarios. Requires at least three worker nodes. object-test.yaml : Replication of 1 for test scenarios. Requires only a single node. See the Object Store CRD topic for more details on the settings. Object Storage User \u00b6 object-user.yaml : Creates a simple object storage user and generates credentials for the S3 API Object Storage Buckets \u00b6 The Ceph operator also runs an object store bucket provisioner which can grant access to existing buckets or dynamically provision new buckets. object-bucket-claim-retain.yaml Creates a request for a new bucket by referencing a StorageClass which saves the bucket when the initiating OBC is deleted. object-bucket-claim-delete.yaml Creates a request for a new bucket by referencing a StorageClass which deletes the bucket when the initiating OBC is deleted. storageclass-bucket-retain.yaml Creates a new StorageClass which defines the Ceph Object Store and retains the bucket after the initiating OBC is deleted. storageclass-bucket-delete.yaml Creates a new StorageClass which defines the Ceph Object Store and deletes the bucket after the initiating OBC is deleted.","title":"Example Configurations"},{"location":"Getting-Started/example-configurations/#common-resources","text":"The first step to deploy Rook is to create the CRDs and other common resources. The configuration for these resources will be the same for most deployments. The crds.yaml and common.yaml sets these resources up. 1 kubectl create -f crds.yaml -f common.yaml The examples all assume the operator and all Ceph daemons will be started in the same namespace. If deploying the operator in a separate namespace, see the comments throughout common.yaml .","title":"Common Resources"},{"location":"Getting-Started/example-configurations/#operator","text":"After the common resources are created, the next step is to create the Operator deployment. Several spec file examples are provided in this directory : operator.yaml : The most common settings for production deployments kubectl create -f operator.yaml operator-openshift.yaml : Includes all of the operator settings for running a basic Rook cluster in an OpenShift environment. You will also want to review the OpenShift Prerequisites to confirm the settings. oc create -f operator-openshift.yaml Settings for the operator are configured through environment variables on the operator deployment. The individual settings are documented in operator.yaml .","title":"Operator"},{"location":"Getting-Started/example-configurations/#cluster-crd","text":"Now that the operator is running, create the Ceph storage cluster with the CephCluster CR. This CR contains the most critical settings that will influence how the operator configures the storage. It is important to understand the various ways to configure the cluster. These examples represent several different ways to configure the storage. cluster.yaml : Common settings for a production storage cluster. Requires at least three worker nodes. cluster-test.yaml : Settings for a test cluster where redundancy is not configured. Requires only a single node. cluster-on-pvc.yaml : Common settings for backing the Ceph Mons and OSDs by PVs. Useful when running in cloud environments or where local PVs have been created for Ceph to consume. cluster-external.yaml : Connect to an external Ceph cluster with minimal access to monitor the health of the cluster and connect to the storage. cluster-external-management.yaml : Connect to an external Ceph cluster with the admin key of the external cluster to enable remote creation of pools and configure services such as an Object Store or a Shared Filesystem . cluster-stretched.yaml : Create a cluster in \"stretched\" mode, with five mons stretched across three zones, and the OSDs across two zones. See the Stretch documentation . See the Cluster CRD topic for more details and more examples for the settings.","title":"Cluster CRD"},{"location":"Getting-Started/example-configurations/#setting-up-consumable-storage","text":"Now we are ready to setup Block, Shared Filesystem or Object storage in the Rook cluster. These storage types are respectively created with the CephBlockPool, CephFilesystem and CephObjectStore CRs.","title":"Setting up consumable storage"},{"location":"Getting-Started/example-configurations/#block-devices","text":"Ceph provides raw block device volumes to pods. Each example below sets up a storage class which can then be used to provision a block device in application pods. The storage class is defined with a Ceph pool which defines the level of data redundancy in Ceph: storageclass.yaml : This example illustrates replication of 3 for production scenarios and requires at least three worker nodes. Data is replicated on three different kubernetes worker nodes. Intermittent or long-lasting single node failures will not result in data unavailability or loss. storageclass-ec.yaml : Configures erasure coding for data durability rather than replication. Ceph's erasure coding is more efficient than replication so you can get high reliability without the 3x replication cost of the preceding example (but at the cost of higher computational encoding and decoding costs on the worker nodes). Erasure coding requires at least three worker nodes. See the Erasure coding documentation. storageclass-test.yaml : Replication of 1 for test scenarios. Requires only a single node. Do not use this for production applications. A single node failure can result in full data loss. The block storage classes are found in the examples directory: csi/rbd : the CSI driver examples for block devices See the CephBlockPool CRD topic for more block storage settings.","title":"Block Devices"},{"location":"Getting-Started/example-configurations/#shared-filesystem","text":"Ceph filesystem (CephFS) allows the user to mount a shared posix-compliant folder into one or more application pods. This storage is similar to NFS shared storage or CIFS shared folders, as explained here . Shared Filesystem storage contains configurable pools for different scenarios: filesystem.yaml : Replication of 3 for production scenarios. Requires at least three worker nodes. filesystem-ec.yaml : Erasure coding for production scenarios. Requires at least three worker nodes. filesystem-test.yaml : Replication of 1 for test scenarios. Requires only a single node. Dynamic provisioning is possible with the CSI driver. The storage class for shared filesystems is found in the csi/cephfs directory. See the Shared Filesystem CRD topic for more details on the settings.","title":"Shared Filesystem"},{"location":"Getting-Started/example-configurations/#object-storage","text":"Ceph supports storing blobs of data called objects that support HTTP(s)-type get/put/post and delete semantics. This storage is similar to AWS S3 storage, for example. Object storage contains multiple pools that can be configured for different scenarios: object.yaml : Replication of 3 for production scenarios. Requires at least three worker nodes. object-openshift.yaml : Replication of 3 with rgw in a port range valid for OpenShift. Requires at least three worker nodes. object-ec.yaml : Erasure coding rather than replication for production scenarios. Requires at least three worker nodes. object-test.yaml : Replication of 1 for test scenarios. Requires only a single node. See the Object Store CRD topic for more details on the settings.","title":"Object Storage"},{"location":"Getting-Started/example-configurations/#object-storage-user","text":"object-user.yaml : Creates a simple object storage user and generates credentials for the S3 API","title":"Object Storage User"},{"location":"Getting-Started/example-configurations/#object-storage-buckets","text":"The Ceph operator also runs an object store bucket provisioner which can grant access to existing buckets or dynamically provision new buckets. object-bucket-claim-retain.yaml Creates a request for a new bucket by referencing a StorageClass which saves the bucket when the initiating OBC is deleted. object-bucket-claim-delete.yaml Creates a request for a new bucket by referencing a StorageClass which deletes the bucket when the initiating OBC is deleted. storageclass-bucket-retain.yaml Creates a new StorageClass which defines the Ceph Object Store and retains the bucket after the initiating OBC is deleted. storageclass-bucket-delete.yaml Creates a new StorageClass which defines the Ceph Object Store and deletes the bucket after the initiating OBC is deleted.","title":"Object Storage Buckets"},{"location":"Getting-Started/glossary/","text":"Glossary \u00b6 Rook \u00b6 CephBlockPool CRD \u00b6 The CephBlockPool CRD is used by Rook to allow creation and customization of storage pools. CephBlockPoolRadosNamespace CRD \u00b6 The CephBlockPoolRadosNamespace CRD is used by Rook to allow creation of Ceph RADOS Namespaces. CephClient CRD \u00b6 CephClient CRD is used by Rook to allow creation and updating clients. CephCluster CRD \u00b6 The CephCluster CRD is used by Rook to allow creation and customization of storage clusters through the custom resource definitions (CRDs). Ceph CSI \u00b6 The Ceph CSI plugins implement an interface between a CSI-enabled Container Orchestrator (CO) and Ceph clusters. CephFilesystem CRD \u00b6 The CephFilesystem CRD is used by Rook to allow creation and customization of shared filesystems through the custom resource definitions (CRDs). CephFilesystemMirror CRD \u00b6 The CephFilesystemMirror CRD is used by Rook to allow creation and updating the Ceph fs-mirror daemon. CephFilesystemSubVolumeGroup CRD \u00b6 CephFilesystemMirror CRD is used by Rook to allow creation of Ceph Filesystem SubVolumeGroups. CephNFS CRD \u00b6 CephNFS CRD is used by Rook to allow exporting NFS shares of a CephFilesystem or CephObjectStore through the CephNFS custom resource definition. For further information please refer to the example here . CephObjectStore CRD \u00b6 CephObjectStore CRD is used by Rook to allow creation and customization of object stores. CephObjectStoreUser CRD \u00b6 CephObjectStoreUser CRD is used by Rook to allow creation and customization of object store users. For more information and examples refer to this documentation . CephObjectRealm CRD \u00b6 CephObjectRealm CRD is used by Rook to allow creation of a realm in a Ceph Object Multisite configuration. For more information and examples refer to this documentation . CephObjectZoneGroup CRD \u00b6 CephObjectZoneGroup CRD is used by Rook to allow creation of zone groups in a Ceph Object Multisite configuration. For more information and examples refer to this documentation . CephObjectZone CRD \u00b6 CephObjectZone CRD is used by Rook to allow creation of zones in a ceph cluster for a Ceph Object Multisite configuration. For more information and examples refer to this documentation . CephRBDMirror CRD \u00b6 CephRBDMirror CRD is used by Rook to allow creation and updating rbd-mirror daemon(s) through the custom resource definitions (CRDs). For more information and examples refer to this documentation . External Storage Cluster \u00b6 An external cluster is a Ceph configuration that is managed outside of the local K8s cluster. Host Storage Cluster \u00b6 A host storage cluster is where Rook configures Ceph to store data directly on the host devices. Krew Plugin \u00b6 The Rook Krew plugin is a tool to help troubleshoot your Rook cluster. Object Bucket Claim (OBC) \u00b6 An Object Bucket Claim (OBC) is custom resource which requests a bucket (new or existing) from a Ceph object store. For further reference please refer to OBC Custom Resource . Object Bucket (OB) \u00b6 An Object Bucket (OB) is a custom resource automatically generated when a bucket is provisioned. It is a global resource, typically not visible to non-admin users, and contains information specific to the bucket. OpenShift \u00b6 OpenShift Container Platform is a distribution of the Kubernetes container platform. PVC Storage Cluster \u00b6 In a PersistentVolumeClaim-based cluster , the Ceph persistent data is stored on volumes requested from a storage class of your choice. Stretch Storage Cluster \u00b6 A stretched cluster is a deployment model in which two datacenters with low latency are available for storage in the same K8s cluster, rather than three or more. To support this scenario, Rook has integrated support for stretch clusters . Toolbox \u00b6 The Rook toolbox is a container with common tools used for rook debugging and testing. Ceph \u00b6 Ceph is a distributed network storage and file system with distributed metadata management and POSIX semantics. See also the Ceph Glossary . Here are a few of the important terms to understand: Ceph Monitor (MON) Ceph Manager (MGR) Ceph Metadata Server (MDS) Object Storage Device (OSD) RADOS Block Device (RBD) Ceph Object Gateway (RGW) Kubernetes \u00b6 Kubernetes, also known as K8s, is an open-source system for automating deployment, scaling, and management of containerized applications. For further information see also the Kubernetes Glossary for more definitions. Here are a few of the important terms to understand: Affinity Container Storage Interface (CSI) for Kubernetes CustomResourceDefinition (CRDs) DaemonSet Deployment Finalizer Node affinity Node Selector PersistentVolume (PV) PersistentVolumeClaim (PVC) Selector Storage Class Taint Toleration Volume","title":"Glossary"},{"location":"Getting-Started/glossary/#glossary","text":"","title":"Glossary"},{"location":"Getting-Started/glossary/#rook","text":"","title":"Rook"},{"location":"Getting-Started/glossary/#cephblockpool-crd","text":"The CephBlockPool CRD is used by Rook to allow creation and customization of storage pools.","title":"CephBlockPool CRD"},{"location":"Getting-Started/glossary/#cephblockpoolradosnamespace-crd","text":"The CephBlockPoolRadosNamespace CRD is used by Rook to allow creation of Ceph RADOS Namespaces.","title":"CephBlockPoolRadosNamespace CRD"},{"location":"Getting-Started/glossary/#cephclient-crd","text":"CephClient CRD is used by Rook to allow creation and updating clients.","title":"CephClient CRD"},{"location":"Getting-Started/glossary/#cephcluster-crd","text":"The CephCluster CRD is used by Rook to allow creation and customization of storage clusters through the custom resource definitions (CRDs).","title":"CephCluster CRD"},{"location":"Getting-Started/glossary/#ceph-csi","text":"The Ceph CSI plugins implement an interface between a CSI-enabled Container Orchestrator (CO) and Ceph clusters.","title":"Ceph CSI"},{"location":"Getting-Started/glossary/#cephfilesystem-crd","text":"The CephFilesystem CRD is used by Rook to allow creation and customization of shared filesystems through the custom resource definitions (CRDs).","title":"CephFilesystem CRD"},{"location":"Getting-Started/glossary/#cephfilesystemmirror-crd","text":"The CephFilesystemMirror CRD is used by Rook to allow creation and updating the Ceph fs-mirror daemon.","title":"CephFilesystemMirror CRD"},{"location":"Getting-Started/glossary/#cephfilesystemsubvolumegroup-crd","text":"CephFilesystemMirror CRD is used by Rook to allow creation of Ceph Filesystem SubVolumeGroups.","title":"CephFilesystemSubVolumeGroup CRD"},{"location":"Getting-Started/glossary/#cephnfs-crd","text":"CephNFS CRD is used by Rook to allow exporting NFS shares of a CephFilesystem or CephObjectStore through the CephNFS custom resource definition. For further information please refer to the example here .","title":"CephNFS CRD"},{"location":"Getting-Started/glossary/#cephobjectstore-crd","text":"CephObjectStore CRD is used by Rook to allow creation and customization of object stores.","title":"CephObjectStore CRD"},{"location":"Getting-Started/glossary/#cephobjectstoreuser-crd","text":"CephObjectStoreUser CRD is used by Rook to allow creation and customization of object store users. For more information and examples refer to this documentation .","title":"CephObjectStoreUser CRD"},{"location":"Getting-Started/glossary/#cephobjectrealm-crd","text":"CephObjectRealm CRD is used by Rook to allow creation of a realm in a Ceph Object Multisite configuration. For more information and examples refer to this documentation .","title":"CephObjectRealm CRD"},{"location":"Getting-Started/glossary/#cephobjectzonegroup-crd","text":"CephObjectZoneGroup CRD is used by Rook to allow creation of zone groups in a Ceph Object Multisite configuration. For more information and examples refer to this documentation .","title":"CephObjectZoneGroup CRD"},{"location":"Getting-Started/glossary/#cephobjectzone-crd","text":"CephObjectZone CRD is used by Rook to allow creation of zones in a ceph cluster for a Ceph Object Multisite configuration. For more information and examples refer to this documentation .","title":"CephObjectZone CRD"},{"location":"Getting-Started/glossary/#cephrbdmirror-crd","text":"CephRBDMirror CRD is used by Rook to allow creation and updating rbd-mirror daemon(s) through the custom resource definitions (CRDs). For more information and examples refer to this documentation .","title":"CephRBDMirror CRD"},{"location":"Getting-Started/glossary/#external-storage-cluster","text":"An external cluster is a Ceph configuration that is managed outside of the local K8s cluster.","title":"External Storage Cluster"},{"location":"Getting-Started/glossary/#host-storage-cluster","text":"A host storage cluster is where Rook configures Ceph to store data directly on the host devices.","title":"Host Storage Cluster"},{"location":"Getting-Started/glossary/#krew-plugin","text":"The Rook Krew plugin is a tool to help troubleshoot your Rook cluster.","title":"Krew Plugin"},{"location":"Getting-Started/glossary/#object-bucket-claim-obc","text":"An Object Bucket Claim (OBC) is custom resource which requests a bucket (new or existing) from a Ceph object store. For further reference please refer to OBC Custom Resource .","title":"Object Bucket Claim (OBC)"},{"location":"Getting-Started/glossary/#object-bucket-ob","text":"An Object Bucket (OB) is a custom resource automatically generated when a bucket is provisioned. It is a global resource, typically not visible to non-admin users, and contains information specific to the bucket.","title":"Object Bucket (OB)"},{"location":"Getting-Started/glossary/#openshift","text":"OpenShift Container Platform is a distribution of the Kubernetes container platform.","title":"OpenShift"},{"location":"Getting-Started/glossary/#pvc-storage-cluster","text":"In a PersistentVolumeClaim-based cluster , the Ceph persistent data is stored on volumes requested from a storage class of your choice.","title":"PVC Storage Cluster"},{"location":"Getting-Started/glossary/#stretch-storage-cluster","text":"A stretched cluster is a deployment model in which two datacenters with low latency are available for storage in the same K8s cluster, rather than three or more. To support this scenario, Rook has integrated support for stretch clusters .","title":"Stretch Storage Cluster"},{"location":"Getting-Started/glossary/#toolbox","text":"The Rook toolbox is a container with common tools used for rook debugging and testing.","title":"Toolbox"},{"location":"Getting-Started/glossary/#ceph","text":"Ceph is a distributed network storage and file system with distributed metadata management and POSIX semantics. See also the Ceph Glossary . Here are a few of the important terms to understand: Ceph Monitor (MON) Ceph Manager (MGR) Ceph Metadata Server (MDS) Object Storage Device (OSD) RADOS Block Device (RBD) Ceph Object Gateway (RGW)","title":"Ceph"},{"location":"Getting-Started/glossary/#kubernetes","text":"Kubernetes, also known as K8s, is an open-source system for automating deployment, scaling, and management of containerized applications. For further information see also the Kubernetes Glossary for more definitions. Here are a few of the important terms to understand: Affinity Container Storage Interface (CSI) for Kubernetes CustomResourceDefinition (CRDs) DaemonSet Deployment Finalizer Node affinity Node Selector PersistentVolume (PV) PersistentVolumeClaim (PVC) Selector Storage Class Taint Toleration Volume","title":"Kubernetes"},{"location":"Getting-Started/intro/","text":"Rook \u00b6 Rook is an open source cloud-native storage orchestrator , providing the platform, framework, and support for Ceph storage to natively integrate with cloud-native environments. Ceph is a distributed storage system that provides file, block and object storage and is deployed in large scale production clusters. Rook automates deployment and management of Ceph to provide self-managing, self-scaling, and self-healing storage services. The Rook operator does this by building on Kubernetes resources to deploy, configure, provision, scale, upgrade, and monitor Ceph. The Ceph operator was declared stable in December 2018 in the Rook v0.9 release, providing a production storage platform for many years. Rook is hosted by the Cloud Native Computing Foundation (CNCF) as a graduated level project. Quick Start Guide \u00b6 Starting Ceph in your cluster is as simple as a few kubectl commands. See our Quickstart guide to get started with the Ceph operator! Designs \u00b6 Ceph is a highly scalable distributed storage solution for block storage, object storage, and shared filesystems with years of production deployments. See the Ceph overview . For detailed design documentation, see also the design docs . Need help? Be sure to join the Rook Slack \u00b6 If you have any questions along the way, don't hesitate to ask in our Slack channel . Sign up for the Rook Slack here .","title":"Rook"},{"location":"Getting-Started/intro/#rook","text":"Rook is an open source cloud-native storage orchestrator , providing the platform, framework, and support for Ceph storage to natively integrate with cloud-native environments. Ceph is a distributed storage system that provides file, block and object storage and is deployed in large scale production clusters. Rook automates deployment and management of Ceph to provide self-managing, self-scaling, and self-healing storage services. The Rook operator does this by building on Kubernetes resources to deploy, configure, provision, scale, upgrade, and monitor Ceph. The Ceph operator was declared stable in December 2018 in the Rook v0.9 release, providing a production storage platform for many years. Rook is hosted by the Cloud Native Computing Foundation (CNCF) as a graduated level project.","title":"Rook"},{"location":"Getting-Started/intro/#quick-start-guide","text":"Starting Ceph in your cluster is as simple as a few kubectl commands. See our Quickstart guide to get started with the Ceph operator!","title":"Quick Start Guide"},{"location":"Getting-Started/intro/#designs","text":"Ceph is a highly scalable distributed storage solution for block storage, object storage, and shared filesystems with years of production deployments. See the Ceph overview . For detailed design documentation, see also the design docs .","title":"Designs"},{"location":"Getting-Started/intro/#need-help-be-sure-to-join-the-rook-slack","text":"If you have any questions along the way, don't hesitate to ask in our Slack channel . Sign up for the Rook Slack here .","title":"Need help? Be sure to join the Rook Slack"},{"location":"Getting-Started/quickstart/","text":"Welcome to Rook! We hope you have a great experience installing the Rook cloud-native storage orchestrator platform to enable highly available, durable Ceph storage in Kubernetes clusters. Don't hesitate to ask questions in our Slack channel . Sign up for the Rook Slack here . This guide will walk through the basic setup of a Ceph cluster and enable K8s applications to consume block, object, and file storage. Always use a virtual machine when testing Rook. Never use a host system where local devices may mistakenly be consumed. Minimum Version \u00b6 Kubernetes v1.22 or higher is supported by Rook. CPU Architecture \u00b6 Architectures released are amd64 / x86_64 and arm64 . Prerequisites \u00b6 To check if a Kubernetes cluster is ready for Rook , see the prerequisites . To configure the Ceph storage cluster, at least one of these local storage options are required: Raw devices (no partitions or formatted filesystems) Raw partitions (no formatted filesystem) LVM Logical Volumes (no formatted filesystem) Persistent Volumes available from a storage class in block mode TL;DR \u00b6 A simple Rook cluster is created for Kubernetes with the following kubectl commands and example manifests . 1 2 3 4 $ git clone --single-branch --branch master https://github.com/rook/rook.git cd rook/deploy/examples kubectl create -f crds.yaml -f common.yaml -f operator.yaml kubectl create -f cluster.yaml After the cluster is running, applications can consume block, object, or file storage. Deploy the Rook Operator \u00b6 The first step is to deploy the Rook operator. Important The Rook Helm Chart is available to deploy the operator instead of creating the below manifests. Note Check that the example yaml files are from a tagged release of Rook. Note These steps are for a standard production Rook deployment in Kubernetes. For Openshift, testing, or more options, see the example configurations documentation . 1 2 3 4 5 cd deploy/examples kubectl create -f crds.yaml -f common.yaml -f operator.yaml # verify the rook-ceph-operator is in the ` Running ` state before proceeding kubectl -n rook-ceph get pod Before starting the operator in production, consider these settings: Some Rook features are disabled by default. See the operator.yaml for these and other advanced settings. Device discovery: Rook will watch for new devices to configure if the ROOK_ENABLE_DISCOVERY_DAEMON setting is enabled, commonly used in bare metal clusters. Node affinity and tolerations: The CSI driver by default will run on any node in the cluster. To restrict the CSI driver affinity, several settings are available. If deploying Rook into a namespace other than the default rook-ceph , see the topic on using an alternative namespace . Cluster Environments \u00b6 The Rook documentation is focused around starting Rook in a variety of environments. While creating the cluster in this guide, consider these example cluster manifests: cluster.yaml : Cluster settings for a production cluster running on bare metal. Requires at least three worker nodes. cluster-on-pvc.yaml : Cluster settings for a production cluster running in a dynamic cloud environment. cluster-test.yaml : Cluster settings for a test environment such as minikube. See the Ceph example configurations for more details. Create a Ceph Cluster \u00b6 Now that the Rook operator is running we can create the Ceph cluster. Important The Rook Cluster Helm Chart is available to deploy the operator instead of creating the below manifests. Important For the cluster to survive reboots, set the dataDirHostPath property that is valid for the hosts. For more settings, see the documentation on configuring the cluster . Create the cluster: 1 kubectl create -f cluster.yaml Verify the cluster is running by viewing the pods in the rook-ceph namespace. The number of osd pods will depend on the number of nodes in the cluster and the number of devices configured. For the default cluster.yaml above, one OSD will be created for each available device found on each node. Hint If the rook-ceph-mon , rook-ceph-mgr , or rook-ceph-osd pods are not created, please refer to the Ceph common issues for more details and potential solutions. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 $ kubectl -n rook-ceph get pod NAME READY STATUS RESTARTS AGE csi-cephfsplugin-provisioner-d77bb49c6-n5tgs 5/5 Running 0 140s csi-cephfsplugin-provisioner-d77bb49c6-v9rvn 5/5 Running 0 140s csi-cephfsplugin-rthrp 3/3 Running 0 140s csi-rbdplugin-hbsm7 3/3 Running 0 140s csi-rbdplugin-provisioner-5b5cd64fd-nvk6c 6/6 Running 0 140s csi-rbdplugin-provisioner-5b5cd64fd-q7bxl 6/6 Running 0 140s rook-ceph-crashcollector-minikube-5b57b7c5d4-hfldl 1/1 Running 0 105s rook-ceph-mgr-a-64cd7cdf54-j8b5p 2/2 Running 0 77s rook-ceph-mgr-b-657d54fc89-2xxw7 2/2 Running 0 56s rook-ceph-mon-a-694bb7987d-fp9w7 1/1 Running 0 105s rook-ceph-mon-b-856fdd5cb9-5h2qk 1/1 Running 0 94s rook-ceph-mon-c-57545897fc-j576h 1/1 Running 0 85s rook-ceph-operator-85f5b946bd-s8grz 1/1 Running 0 92m rook-ceph-osd-0-6bb747b6c5-lnvb6 1/1 Running 0 23s rook-ceph-osd-1-7f67f9646d-44p7v 1/1 Running 0 24s rook-ceph-osd-2-6cd4b776ff-v4d68 1/1 Running 0 25s rook-ceph-osd-prepare-node1-vx2rz 0/2 Completed 0 60s rook-ceph-osd-prepare-node2-ab3fd 0/2 Completed 0 60s rook-ceph-osd-prepare-node3-w4xyz 0/2 Completed 0 60s To verify that the cluster is in a healthy state, connect to the Rook toolbox and run the ceph status command. All mons should be in quorum A mgr should be active At least three OSDs should be up and in If the health is not HEALTH_OK , the warnings or errors should be investigated 1 2 3 4 5 6 7 8 9 10 $ ceph status cluster: id: a0452c76-30d9-4c1a-a948-5d8405f19a7c health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 3m) mgr:a(active, since 2m), standbys: b osd: 3 osds: 3 up (since 1m), 3 in (since 1m) []...] Hint If the cluster is not healthy, please refer to the Ceph common issues for potential solutions. Storage \u00b6 For a walkthrough of the three types of storage exposed by Rook, see the guides for: Block : Create block storage to be consumed by a pod (RWO) Shared Filesystem : Create a filesystem to be shared across multiple pods (RWX) Object : Create an object store that is accessible with an S3 endpoint inside or outside the Kubernetes cluster Ceph Dashboard \u00b6 Ceph has a dashboard to view the status of the cluster. See the dashboard guide . Tools \u00b6 Create a toolbox pod for full access to a ceph admin client for debugging and troubleshooting the Rook cluster. See the toolbox documentation for setup and usage information. The Rook Krew plugin provides commands to view status and troubleshoot issues. See the advanced configuration document for helpful maintenance and tuning examples. Monitoring \u00b6 Each Rook cluster has built-in metrics collectors/exporters for monitoring with Prometheus. To configure monitoring, see the monitoring guide . Telemetry \u00b6 The Rook maintainers would like to receive telemetry reports for Rook clusters. The data is anonymous and does not include any identifying information. Enable the telemetry reporting feature with the following command in the toolbox: 1 ceph telemetry on For more details on what is reported and how your privacy is protected, see the Ceph Telemetry Documentation . Teardown \u00b6 When finished with the test cluster, see the cleanup guide .","title":"Quickstart"},{"location":"Getting-Started/quickstart/#minimum-version","text":"Kubernetes v1.22 or higher is supported by Rook.","title":"Minimum Version"},{"location":"Getting-Started/quickstart/#cpu-architecture","text":"Architectures released are amd64 / x86_64 and arm64 .","title":"CPU Architecture"},{"location":"Getting-Started/quickstart/#prerequisites","text":"To check if a Kubernetes cluster is ready for Rook , see the prerequisites . To configure the Ceph storage cluster, at least one of these local storage options are required: Raw devices (no partitions or formatted filesystems) Raw partitions (no formatted filesystem) LVM Logical Volumes (no formatted filesystem) Persistent Volumes available from a storage class in block mode","title":"Prerequisites"},{"location":"Getting-Started/quickstart/#tldr","text":"A simple Rook cluster is created for Kubernetes with the following kubectl commands and example manifests . 1 2 3 4 $ git clone --single-branch --branch master https://github.com/rook/rook.git cd rook/deploy/examples kubectl create -f crds.yaml -f common.yaml -f operator.yaml kubectl create -f cluster.yaml After the cluster is running, applications can consume block, object, or file storage.","title":"TL;DR"},{"location":"Getting-Started/quickstart/#deploy-the-rook-operator","text":"The first step is to deploy the Rook operator. Important The Rook Helm Chart is available to deploy the operator instead of creating the below manifests. Note Check that the example yaml files are from a tagged release of Rook. Note These steps are for a standard production Rook deployment in Kubernetes. For Openshift, testing, or more options, see the example configurations documentation . 1 2 3 4 5 cd deploy/examples kubectl create -f crds.yaml -f common.yaml -f operator.yaml # verify the rook-ceph-operator is in the ` Running ` state before proceeding kubectl -n rook-ceph get pod Before starting the operator in production, consider these settings: Some Rook features are disabled by default. See the operator.yaml for these and other advanced settings. Device discovery: Rook will watch for new devices to configure if the ROOK_ENABLE_DISCOVERY_DAEMON setting is enabled, commonly used in bare metal clusters. Node affinity and tolerations: The CSI driver by default will run on any node in the cluster. To restrict the CSI driver affinity, several settings are available. If deploying Rook into a namespace other than the default rook-ceph , see the topic on using an alternative namespace .","title":"Deploy the Rook Operator"},{"location":"Getting-Started/quickstart/#cluster-environments","text":"The Rook documentation is focused around starting Rook in a variety of environments. While creating the cluster in this guide, consider these example cluster manifests: cluster.yaml : Cluster settings for a production cluster running on bare metal. Requires at least three worker nodes. cluster-on-pvc.yaml : Cluster settings for a production cluster running in a dynamic cloud environment. cluster-test.yaml : Cluster settings for a test environment such as minikube. See the Ceph example configurations for more details.","title":"Cluster Environments"},{"location":"Getting-Started/quickstart/#create-a-ceph-cluster","text":"Now that the Rook operator is running we can create the Ceph cluster. Important The Rook Cluster Helm Chart is available to deploy the operator instead of creating the below manifests. Important For the cluster to survive reboots, set the dataDirHostPath property that is valid for the hosts. For more settings, see the documentation on configuring the cluster . Create the cluster: 1 kubectl create -f cluster.yaml Verify the cluster is running by viewing the pods in the rook-ceph namespace. The number of osd pods will depend on the number of nodes in the cluster and the number of devices configured. For the default cluster.yaml above, one OSD will be created for each available device found on each node. Hint If the rook-ceph-mon , rook-ceph-mgr , or rook-ceph-osd pods are not created, please refer to the Ceph common issues for more details and potential solutions. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 $ kubectl -n rook-ceph get pod NAME READY STATUS RESTARTS AGE csi-cephfsplugin-provisioner-d77bb49c6-n5tgs 5/5 Running 0 140s csi-cephfsplugin-provisioner-d77bb49c6-v9rvn 5/5 Running 0 140s csi-cephfsplugin-rthrp 3/3 Running 0 140s csi-rbdplugin-hbsm7 3/3 Running 0 140s csi-rbdplugin-provisioner-5b5cd64fd-nvk6c 6/6 Running 0 140s csi-rbdplugin-provisioner-5b5cd64fd-q7bxl 6/6 Running 0 140s rook-ceph-crashcollector-minikube-5b57b7c5d4-hfldl 1/1 Running 0 105s rook-ceph-mgr-a-64cd7cdf54-j8b5p 2/2 Running 0 77s rook-ceph-mgr-b-657d54fc89-2xxw7 2/2 Running 0 56s rook-ceph-mon-a-694bb7987d-fp9w7 1/1 Running 0 105s rook-ceph-mon-b-856fdd5cb9-5h2qk 1/1 Running 0 94s rook-ceph-mon-c-57545897fc-j576h 1/1 Running 0 85s rook-ceph-operator-85f5b946bd-s8grz 1/1 Running 0 92m rook-ceph-osd-0-6bb747b6c5-lnvb6 1/1 Running 0 23s rook-ceph-osd-1-7f67f9646d-44p7v 1/1 Running 0 24s rook-ceph-osd-2-6cd4b776ff-v4d68 1/1 Running 0 25s rook-ceph-osd-prepare-node1-vx2rz 0/2 Completed 0 60s rook-ceph-osd-prepare-node2-ab3fd 0/2 Completed 0 60s rook-ceph-osd-prepare-node3-w4xyz 0/2 Completed 0 60s To verify that the cluster is in a healthy state, connect to the Rook toolbox and run the ceph status command. All mons should be in quorum A mgr should be active At least three OSDs should be up and in If the health is not HEALTH_OK , the warnings or errors should be investigated 1 2 3 4 5 6 7 8 9 10 $ ceph status cluster: id: a0452c76-30d9-4c1a-a948-5d8405f19a7c health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 3m) mgr:a(active, since 2m), standbys: b osd: 3 osds: 3 up (since 1m), 3 in (since 1m) []...] Hint If the cluster is not healthy, please refer to the Ceph common issues for potential solutions.","title":"Create a Ceph Cluster"},{"location":"Getting-Started/quickstart/#storage","text":"For a walkthrough of the three types of storage exposed by Rook, see the guides for: Block : Create block storage to be consumed by a pod (RWO) Shared Filesystem : Create a filesystem to be shared across multiple pods (RWX) Object : Create an object store that is accessible with an S3 endpoint inside or outside the Kubernetes cluster","title":"Storage"},{"location":"Getting-Started/quickstart/#ceph-dashboard","text":"Ceph has a dashboard to view the status of the cluster. See the dashboard guide .","title":"Ceph Dashboard"},{"location":"Getting-Started/quickstart/#tools","text":"Create a toolbox pod for full access to a ceph admin client for debugging and troubleshooting the Rook cluster. See the toolbox documentation for setup and usage information. The Rook Krew plugin provides commands to view status and troubleshoot issues. See the advanced configuration document for helpful maintenance and tuning examples.","title":"Tools"},{"location":"Getting-Started/quickstart/#monitoring","text":"Each Rook cluster has built-in metrics collectors/exporters for monitoring with Prometheus. To configure monitoring, see the monitoring guide .","title":"Monitoring"},{"location":"Getting-Started/quickstart/#telemetry","text":"The Rook maintainers would like to receive telemetry reports for Rook clusters. The data is anonymous and does not include any identifying information. Enable the telemetry reporting feature with the following command in the toolbox: 1 ceph telemetry on For more details on what is reported and how your privacy is protected, see the Ceph Telemetry Documentation .","title":"Telemetry"},{"location":"Getting-Started/quickstart/#teardown","text":"When finished with the test cluster, see the cleanup guide .","title":"Teardown"},{"location":"Getting-Started/release-cycle/","text":"Release Cycle \u00b6 Rook plans to release a new minor version three times a year, or about every four months. The most recent two minor Rook releases are actively maintained. Patch releases for the latest minor release are typically bi-weekly. Urgent patches may be released sooner. Patch releases for the previous minor release are commonly monthly, though will vary depending on the urgency of fixes. Definition of Maintenance \u00b6 The Rook community defines maintenance in that relevant bug fixes that are merged to the main development branch will be eligible to be back-ported to the release branch of any currently maintained version. Patches will be released as needed. It is also possible that a fix may be merged directly to the release branch if no longer applicable on the main development branch. While Rook maintainers make significant efforts to release urgent issues in a timely manner, maintenance does not indicate any SLA on response time. K8s Versions \u00b6 The minimum version supported by a Rook release is specified in the Quickstart Guide . Rook expects to support the most recent six versions of Kubernetes. While these K8s versions may not all be supported by the K8s release cycle, we understand that clusters may take time to update.","title":"Release Cycle"},{"location":"Getting-Started/release-cycle/#release-cycle","text":"Rook plans to release a new minor version three times a year, or about every four months. The most recent two minor Rook releases are actively maintained. Patch releases for the latest minor release are typically bi-weekly. Urgent patches may be released sooner. Patch releases for the previous minor release are commonly monthly, though will vary depending on the urgency of fixes.","title":"Release Cycle"},{"location":"Getting-Started/release-cycle/#definition-of-maintenance","text":"The Rook community defines maintenance in that relevant bug fixes that are merged to the main development branch will be eligible to be back-ported to the release branch of any currently maintained version. Patches will be released as needed. It is also possible that a fix may be merged directly to the release branch if no longer applicable on the main development branch. While Rook maintainers make significant efforts to release urgent issues in a timely manner, maintenance does not indicate any SLA on response time.","title":"Definition of Maintenance"},{"location":"Getting-Started/release-cycle/#k8s-versions","text":"The minimum version supported by a Rook release is specified in the Quickstart Guide . Rook expects to support the most recent six versions of Kubernetes. While these K8s versions may not all be supported by the K8s release cycle, we understand that clusters may take time to update.","title":"K8s Versions"},{"location":"Getting-Started/storage-architecture/","text":"Ceph is a highly scalable distributed storage solution for block storage , object storage , and shared filesystems with years of production deployments. Design \u00b6 Rook enables Ceph storage to run on Kubernetes using Kubernetes primitives. With Ceph running in the Kubernetes cluster, Kubernetes applications can mount block devices and filesystems managed by Rook, or can use the S3/Swift API for object storage. The Rook operator automates configuration of storage components and monitors the cluster to ensure the storage remains available and healthy. The Rook operator is a simple container that has all that is needed to bootstrap and monitor the storage cluster. The operator will start and monitor Ceph monitor pods , the Ceph OSD daemons to provide RADOS storage, as well as start and manage other Ceph daemons. The operator manages CRDs for pools, object stores (S3/Swift), and filesystems by initializing the pods and other resources necessary to run the services. The operator will monitor the storage daemons to ensure the cluster is healthy. Ceph mons will be started or failed over when necessary, and other adjustments are made as the cluster grows or shrinks. The operator will also watch for desired state changes specified in the Ceph custom resources (CRs) and apply the changes. Rook automatically configures the Ceph-CSI driver to mount the storage to your pods. The rook/ceph image includes all necessary tools to manage the cluster. Rook is not in the Ceph data path. Many of the Ceph concepts like placement groups and crush maps are hidden so you don't have to worry about them. Instead, Rook creates a simplified user experience for admins that is in terms of physical resources, pools, volumes, filesystems, and buckets. Advanced configuration can be applied when needed with the Ceph tools. Rook is implemented in golang. Ceph is implemented in C++ where the data path is highly optimized. We believe this combination offers the best of both worlds. Architecture \u00b6 Example applications are shown above for the three supported storage types: Block Storage is represented with a blue app, which has a ReadWriteOnce (RWO) volume mounted. The application can read and write to the RWO volume, while Ceph manages the IO. Shared Filesystem is represented by two purple apps that are sharing a ReadWriteMany (RWX) volume. Both applications can actively read or write simultaneously to the volume. Ceph will ensure the data is safely protected for multiple writers with the MDS daemon. Object storage is represented by an orange app that can read and write to a bucket with a standard S3 client. Below the dotted line in the above diagram, the components fall into three categories: Rook operator (blue layer): The operator automates configuration of Ceph CSI plugins and provisioners (orange layer): The Ceph-CSI driver provides the provisioning and mounting of volumes Ceph daemons (red layer): The Ceph daemons run the core storage architecture. See the Glossary to learn more about each daemon. Production clusters must have three or more nodes for a resilient storage platform. Block Storage \u00b6 In the diagram above, the flow to create an application with an RWO volume is: The (blue) app creates a PVC to request storage The PVC defines the Ceph RBD storage class (sc) for provisioning the storage K8s calls the Ceph-CSI RBD provisioner to create the Ceph RBD image. The kubelet calls the CSI RBD volume plugin to mount the volume in the app The volume is now available for reads and writes. A ReadWriteOnce volume can be mounted on one node at a time. Shared Filesystem \u00b6 In the diagram above, the flow to create a applications with a RWX volume is: The (purple) app creates a PVC to request storage The PVC defines the CephFS storage class (sc) for provisioning the storage K8s calls the Ceph-CSI CephFS provisioner to create the CephFS subvolume The kubelet calls the CSI CephFS volume plugin to mount the volume in the app The volume is now available for reads and writes. A ReadWriteMany volume can be mounted on multiple nodes for your application to use. Object Storage S3 \u00b6 In the diagram above, the flow to create an application with access to an S3 bucket is: The (orange) app creates an ObjectBucketClaim (OBC) to request a bucket The Rook operator creates a Ceph RGW bucket (via the lib-bucket-provisioner) The Rook operator creates a secret with the credentials for accessing the bucket and a configmap with bucket information The app retrieves the credentials from the secret The app can now read and write to the bucket with an S3 client A S3 compatible client can use the S3 bucket right away using the credentials ( Secret ) and bucket info ( ConfigMap ).","title":"Storage Architecture"},{"location":"Getting-Started/storage-architecture/#design","text":"Rook enables Ceph storage to run on Kubernetes using Kubernetes primitives. With Ceph running in the Kubernetes cluster, Kubernetes applications can mount block devices and filesystems managed by Rook, or can use the S3/Swift API for object storage. The Rook operator automates configuration of storage components and monitors the cluster to ensure the storage remains available and healthy. The Rook operator is a simple container that has all that is needed to bootstrap and monitor the storage cluster. The operator will start and monitor Ceph monitor pods , the Ceph OSD daemons to provide RADOS storage, as well as start and manage other Ceph daemons. The operator manages CRDs for pools, object stores (S3/Swift), and filesystems by initializing the pods and other resources necessary to run the services. The operator will monitor the storage daemons to ensure the cluster is healthy. Ceph mons will be started or failed over when necessary, and other adjustments are made as the cluster grows or shrinks. The operator will also watch for desired state changes specified in the Ceph custom resources (CRs) and apply the changes. Rook automatically configures the Ceph-CSI driver to mount the storage to your pods. The rook/ceph image includes all necessary tools to manage the cluster. Rook is not in the Ceph data path. Many of the Ceph concepts like placement groups and crush maps are hidden so you don't have to worry about them. Instead, Rook creates a simplified user experience for admins that is in terms of physical resources, pools, volumes, filesystems, and buckets. Advanced configuration can be applied when needed with the Ceph tools. Rook is implemented in golang. Ceph is implemented in C++ where the data path is highly optimized. We believe this combination offers the best of both worlds.","title":"Design"},{"location":"Getting-Started/storage-architecture/#architecture","text":"Example applications are shown above for the three supported storage types: Block Storage is represented with a blue app, which has a ReadWriteOnce (RWO) volume mounted. The application can read and write to the RWO volume, while Ceph manages the IO. Shared Filesystem is represented by two purple apps that are sharing a ReadWriteMany (RWX) volume. Both applications can actively read or write simultaneously to the volume. Ceph will ensure the data is safely protected for multiple writers with the MDS daemon. Object storage is represented by an orange app that can read and write to a bucket with a standard S3 client. Below the dotted line in the above diagram, the components fall into three categories: Rook operator (blue layer): The operator automates configuration of Ceph CSI plugins and provisioners (orange layer): The Ceph-CSI driver provides the provisioning and mounting of volumes Ceph daemons (red layer): The Ceph daemons run the core storage architecture. See the Glossary to learn more about each daemon. Production clusters must have three or more nodes for a resilient storage platform.","title":"Architecture"},{"location":"Getting-Started/storage-architecture/#block-storage","text":"In the diagram above, the flow to create an application with an RWO volume is: The (blue) app creates a PVC to request storage The PVC defines the Ceph RBD storage class (sc) for provisioning the storage K8s calls the Ceph-CSI RBD provisioner to create the Ceph RBD image. The kubelet calls the CSI RBD volume plugin to mount the volume in the app The volume is now available for reads and writes. A ReadWriteOnce volume can be mounted on one node at a time.","title":"Block Storage"},{"location":"Getting-Started/storage-architecture/#shared-filesystem","text":"In the diagram above, the flow to create a applications with a RWX volume is: The (purple) app creates a PVC to request storage The PVC defines the CephFS storage class (sc) for provisioning the storage K8s calls the Ceph-CSI CephFS provisioner to create the CephFS subvolume The kubelet calls the CSI CephFS volume plugin to mount the volume in the app The volume is now available for reads and writes. A ReadWriteMany volume can be mounted on multiple nodes for your application to use.","title":"Shared Filesystem"},{"location":"Getting-Started/storage-architecture/#object-storage-s3","text":"In the diagram above, the flow to create an application with access to an S3 bucket is: The (orange) app creates an ObjectBucketClaim (OBC) to request a bucket The Rook operator creates a Ceph RGW bucket (via the lib-bucket-provisioner) The Rook operator creates a secret with the credentials for accessing the bucket and a configmap with bucket information The app retrieves the credentials from the secret The app can now read and write to the bucket with an S3 client A S3 compatible client can use the S3 bucket right away using the credentials ( Secret ) and bucket info ( ConfigMap ).","title":"Object Storage S3"},{"location":"Getting-Started/Prerequisites/authenticated-registry/","text":"If you want to use an image from authenticated docker registry (e.g. for image cache/mirror), you'll need to add an imagePullSecret to all relevant service accounts. This way all pods created by the operator (for service account: rook-ceph-system ) or all new pods in the namespace (for service account: default ) will have the imagePullSecret added to their spec. The whole process is described in the official kubernetes documentation . Example setup for a ceph cluster \u00b6 To get you started, here's a quick rundown for the ceph example from the quickstart guide . First, we'll create the secret for our registry as described here (the secret will be created in the rook-ceph namespace, make sure to change it if your Rook Ceph Operator/Cluster is in another namespace): 1 kubectl -n rook-ceph create secret docker-registry my-registry-secret --docker-server=DOCKER_REGISTRY_SERVER --docker-username=DOCKER_USER --docker-password=DOCKER_PASSWORD --docker-email=DOCKER_EMAIL Next we'll add the following snippet to all relevant service accounts as described here : 1 2 imagePullSecrets : - name : my-registry-secret The service accounts are: rook-ceph-system (namespace: rook-ceph ): Will affect all pods created by the rook operator in the rook-ceph namespace. default (namespace: rook-ceph ): Will affect most pods in the rook-ceph namespace. rook-ceph-mgr (namespace: rook-ceph ): Will affect the MGR pods in the rook-ceph namespace. rook-ceph-osd (namespace: rook-ceph ): Will affect the OSD pods in the rook-ceph namespace. rook-ceph-rgw (namespace: rook-ceph ): Will affect the RGW pods in the rook-ceph namespace. You can do it either via e.g. kubectl -n  edit serviceaccount default or by modifying the operator.yaml and cluster.yaml before deploying them. Since it's the same procedure for all service accounts, here is just one example: 1 kubectl -n rook-ceph edit serviceaccount default 1 2 3 4 5 6 7 8 9 10 apiVersion : v1 kind : ServiceAccount metadata : name : default namespace : rook-ceph secrets : - name : default-token-12345 # Add the highlighted lines: imagePullSecrets : - name : my-registry-secret After doing this for all service accounts all pods should be able to pull the image from your registry.","title":"Authenticated Container Registries"},{"location":"Getting-Started/Prerequisites/authenticated-registry/#example-setup-for-a-ceph-cluster","text":"To get you started, here's a quick rundown for the ceph example from the quickstart guide . First, we'll create the secret for our registry as described here (the secret will be created in the rook-ceph namespace, make sure to change it if your Rook Ceph Operator/Cluster is in another namespace): 1 kubectl -n rook-ceph create secret docker-registry my-registry-secret --docker-server=DOCKER_REGISTRY_SERVER --docker-username=DOCKER_USER --docker-password=DOCKER_PASSWORD --docker-email=DOCKER_EMAIL Next we'll add the following snippet to all relevant service accounts as described here : 1 2 imagePullSecrets : - name : my-registry-secret The service accounts are: rook-ceph-system (namespace: rook-ceph ): Will affect all pods created by the rook operator in the rook-ceph namespace. default (namespace: rook-ceph ): Will affect most pods in the rook-ceph namespace. rook-ceph-mgr (namespace: rook-ceph ): Will affect the MGR pods in the rook-ceph namespace. rook-ceph-osd (namespace: rook-ceph ): Will affect the OSD pods in the rook-ceph namespace. rook-ceph-rgw (namespace: rook-ceph ): Will affect the RGW pods in the rook-ceph namespace. You can do it either via e.g. kubectl -n  edit serviceaccount default or by modifying the operator.yaml and cluster.yaml before deploying them. Since it's the same procedure for all service accounts, here is just one example: 1 kubectl -n rook-ceph edit serviceaccount default 1 2 3 4 5 6 7 8 9 10 apiVersion : v1 kind : ServiceAccount metadata : name : default namespace : rook-ceph secrets : - name : default-token-12345 # Add the highlighted lines: imagePullSecrets : - name : my-registry-secret After doing this for all service accounts all pods should be able to pull the image from your registry.","title":"Example setup for a ceph cluster"},{"location":"Getting-Started/Prerequisites/prerequisites/","text":"Rook can be installed on any existing Kubernetes cluster as long as it meets the minimum version and Rook is granted the required privileges (see below for more information). Minimum Version \u00b6 Kubernetes v1.22 or higher is supported. CPU Architecture \u00b6 Architectures supported are amd64 / x86_64 and arm64 . Ceph Prerequisites \u00b6 To configure the Ceph storage cluster, at least one of these local storage types is required: Raw devices (no partitions or formatted filesystems) Raw partitions (no formatted filesystem) LVM Logical Volumes (no formatted filesystem) Persistent Volumes available from a storage class in block mode Confirm whether the partitions or devices are formatted with filesystems with the following command: 1 2 3 4 5 6 7 $ lsblk -f NAME FSTYPE LABEL UUID MOUNTPOINT vda \u2514\u2500vda1 LVM2_member >eSO50t-GkUV-YKTH-WsGq-hNJY-eKNf-3i07IB \u251c\u2500ubuntu--vg-root ext4 c2366f76-6e21-4f10-a8f3-6776212e2fe4 / \u2514\u2500ubuntu--vg-swap_1 swap 9492a3dc-ad75-47cd-9596-678e8cf17ff9 [SWAP] vdb If the FSTYPE field is not empty, there is a filesystem on top of the corresponding device. In this example, vdb is available to Rook, while vda and its partitions have a filesystem and are not available. Admission Controller \u00b6 Enabling the Rook admission controller is recommended to provide an additional level of validation that Rook is configured correctly with the custom resource (CR) settings. An admission controller intercepts requests to the Kubernetes API server prior to persistence of the object, but after the request is authenticated and authorized. To deploy the Rook admission controllers, install the cert manager before Rook is installed: 1 kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.11.1/cert-manager.yaml LVM package \u00b6 Ceph OSDs have a dependency on LVM in the following scenarios: If encryption is enabled ( encryptedDevice: \"true\" in the cluster CR) A metadata device is specified LVM is not required for OSDs in these scenarios: OSDs are created on raw devices or partitions OSDs are created on PVCs using the storageClassDeviceSets If LVM is required, LVM needs to be available on the hosts where OSDs will be running. Some Linux distributions do not ship with the lvm2 package. This package is required on all storage nodes in the k8s cluster to run Ceph OSDs. Without this package even though Rook will be able to successfully create the Ceph OSDs, when a node is rebooted the OSD pods running on the restarted node will fail to start . Please install LVM using your Linux distribution's package manager. For example: CentOS : 1 sudo yum install -y lvm2 Ubuntu : 1 sudo apt-get install -y lvm2 RancherOS : Since version 1.5.0 LVM is supported Logical volumes will not be activated during the boot process. You need to add an runcmd command for that. 1 2 runcmd : - [ \"vgchange\" , \"-ay\" ] Kernel \u00b6 RBD \u00b6 Ceph requires a Linux kernel built with the RBD module. Many Linux distributions have this module, but not all. For example, the GKE Container-Optimised OS (COS) does not have RBD. Test your Kubernetes nodes by running modprobe rbd . If the rbd module is 'not found', rebuild the kernel to include the rbd module, install a newer kernel, or choose a different Linux distribution. Rook's default RBD configuration specifies only the layering feature, for broad compatibility with older kernels. If your Kubernetes nodes run a 5.4 or later kernel, additional feature flags can be enabled in the storage class. The fast-diff and object-map features are especially useful. 1 imageFeatures : layering,fast-diff,object-map,deep-flatten,exclusive-lock CephFS \u00b6 If creating RWX volumes from a Ceph shared file system (CephFS), the recommended minimum kernel version is 4.17 . If the kernel version is less than 4.17, the requested PVC sizes will not be enforced. Storage quotas will only be enforced on newer kernels. Distro Notes \u00b6 Specific configurations for some distributions. NixOS \u00b6 For NixOS, the kernel modules will be found in the non-standard path /run/current-system/kernel-modules/lib/modules/ , and they'll be symlinked inside the also non-standard path /nix . Rook containers require read access to those locations to be able to load the required modules. They have to be bind-mounted as volumes in the CephFS and RBD plugin pods. If installing Rook with Helm, uncomment these example settings in values.yaml : csi.csiCephFSPluginVolume csi.csiCephFSPluginVolumeMount csi.csiRBDPluginVolume csi.csiRBDPluginVolumeMount If deploying without Helm, add those same values to the settings in the rook-ceph-operator-config ConfigMap found in operator.yaml: CSI_CEPHFS_PLUGIN_VOLUME CSI_CEPHFS_PLUGIN_VOLUME_MOUNT CSI_RBD_PLUGIN_VOLUME CSI_RBD_PLUGIN_VOLUME_MOUNT","title":"Prerequisites"},{"location":"Getting-Started/Prerequisites/prerequisites/#minimum-version","text":"Kubernetes v1.22 or higher is supported.","title":"Minimum Version"},{"location":"Getting-Started/Prerequisites/prerequisites/#cpu-architecture","text":"Architectures supported are amd64 / x86_64 and arm64 .","title":"CPU Architecture"},{"location":"Getting-Started/Prerequisites/prerequisites/#ceph-prerequisites","text":"To configure the Ceph storage cluster, at least one of these local storage types is required: Raw devices (no partitions or formatted filesystems) Raw partitions (no formatted filesystem) LVM Logical Volumes (no formatted filesystem) Persistent Volumes available from a storage class in block mode Confirm whether the partitions or devices are formatted with filesystems with the following command: 1 2 3 4 5 6 7 $ lsblk -f NAME FSTYPE LABEL UUID MOUNTPOINT vda \u2514\u2500vda1 LVM2_member >eSO50t-GkUV-YKTH-WsGq-hNJY-eKNf-3i07IB \u251c\u2500ubuntu--vg-root ext4 c2366f76-6e21-4f10-a8f3-6776212e2fe4 / \u2514\u2500ubuntu--vg-swap_1 swap 9492a3dc-ad75-47cd-9596-678e8cf17ff9 [SWAP] vdb If the FSTYPE field is not empty, there is a filesystem on top of the corresponding device. In this example, vdb is available to Rook, while vda and its partitions have a filesystem and are not available.","title":"Ceph Prerequisites"},{"location":"Getting-Started/Prerequisites/prerequisites/#admission-controller","text":"Enabling the Rook admission controller is recommended to provide an additional level of validation that Rook is configured correctly with the custom resource (CR) settings. An admission controller intercepts requests to the Kubernetes API server prior to persistence of the object, but after the request is authenticated and authorized. To deploy the Rook admission controllers, install the cert manager before Rook is installed: 1 kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.11.1/cert-manager.yaml","title":"Admission Controller"},{"location":"Getting-Started/Prerequisites/prerequisites/#lvm-package","text":"Ceph OSDs have a dependency on LVM in the following scenarios: If encryption is enabled ( encryptedDevice: \"true\" in the cluster CR) A metadata device is specified LVM is not required for OSDs in these scenarios: OSDs are created on raw devices or partitions OSDs are created on PVCs using the storageClassDeviceSets If LVM is required, LVM needs to be available on the hosts where OSDs will be running. Some Linux distributions do not ship with the lvm2 package. This package is required on all storage nodes in the k8s cluster to run Ceph OSDs. Without this package even though Rook will be able to successfully create the Ceph OSDs, when a node is rebooted the OSD pods running on the restarted node will fail to start . Please install LVM using your Linux distribution's package manager. For example: CentOS : 1 sudo yum install -y lvm2 Ubuntu : 1 sudo apt-get install -y lvm2 RancherOS : Since version 1.5.0 LVM is supported Logical volumes will not be activated during the boot process. You need to add an runcmd command for that. 1 2 runcmd : - [ \"vgchange\" , \"-ay\" ]","title":"LVM package"},{"location":"Getting-Started/Prerequisites/prerequisites/#kernel","text":"","title":"Kernel"},{"location":"Getting-Started/Prerequisites/prerequisites/#rbd","text":"Ceph requires a Linux kernel built with the RBD module. Many Linux distributions have this module, but not all. For example, the GKE Container-Optimised OS (COS) does not have RBD. Test your Kubernetes nodes by running modprobe rbd . If the rbd module is 'not found', rebuild the kernel to include the rbd module, install a newer kernel, or choose a different Linux distribution. Rook's default RBD configuration specifies only the layering feature, for broad compatibility with older kernels. If your Kubernetes nodes run a 5.4 or later kernel, additional feature flags can be enabled in the storage class. The fast-diff and object-map features are especially useful. 1 imageFeatures : layering,fast-diff,object-map,deep-flatten,exclusive-lock","title":"RBD"},{"location":"Getting-Started/Prerequisites/prerequisites/#cephfs","text":"If creating RWX volumes from a Ceph shared file system (CephFS), the recommended minimum kernel version is 4.17 . If the kernel version is less than 4.17, the requested PVC sizes will not be enforced. Storage quotas will only be enforced on newer kernels.","title":"CephFS"},{"location":"Getting-Started/Prerequisites/prerequisites/#distro-notes","text":"Specific configurations for some distributions.","title":"Distro Notes"},{"location":"Getting-Started/Prerequisites/prerequisites/#nixos","text":"For NixOS, the kernel modules will be found in the non-standard path /run/current-system/kernel-modules/lib/modules/ , and they'll be symlinked inside the also non-standard path /nix . Rook containers require read access to those locations to be able to load the required modules. They have to be bind-mounted as volumes in the CephFS and RBD plugin pods. If installing Rook with Helm, uncomment these example settings in values.yaml : csi.csiCephFSPluginVolume csi.csiCephFSPluginVolumeMount csi.csiRBDPluginVolume csi.csiRBDPluginVolumeMount If deploying without Helm, add those same values to the settings in the rook-ceph-operator-config ConfigMap found in operator.yaml: CSI_CEPHFS_PLUGIN_VOLUME CSI_CEPHFS_PLUGIN_VOLUME_MOUNT CSI_RBD_PLUGIN_VOLUME CSI_RBD_PLUGIN_VOLUME_MOUNT","title":"NixOS"},{"location":"Helm-Charts/ceph-cluster-chart/","text":"Creates Rook resources to configure a Ceph cluster using the Helm package manager. This chart is a simple packaging of templates that will optionally create Rook resources such as: CephCluster, CephFilesystem, and CephObjectStore CRs Storage classes to expose Ceph RBD volumes, CephFS volumes, and RGW buckets Ingress for external access to the dashboard Toolbox Prerequisites \u00b6 Kubernetes 1.22+ Helm 3.x Install the Rook Operator chart Installing \u00b6 The helm install command deploys rook on the Kubernetes cluster in the default configuration. The configuration section lists the parameters that can be configured during installation. It is recommended that the rook operator be installed into the rook-ceph namespace. The clusters can be installed into the same namespace as the operator or a separate namespace. Rook currently publishes builds of this chart to the release and master channels. Before installing, review the values.yaml to confirm if the default settings need to be updated. If the operator was installed in a namespace other than rook-ceph , the namespace must be set in the operatorNamespace variable. Set the desired settings in the cephClusterSpec . The defaults are only an example and not likely to apply to your cluster. The monitoring section should be removed from the cephClusterSpec , as it is specified separately in the helm settings. The default values for cephBlockPools , cephFileSystems , and CephObjectStores will create one of each, and their corresponding storage classes. All Ceph components now have default values for the pod resources. The resources may need to be adjusted in production clusters depending on the load. The resources can also be disabled if Ceph should not be limited (e.g. test clusters). Release \u00b6 The release channel is the most recent release of Rook that is considered stable for the community. The example install assumes you have first installed the Rook Operator Helm Chart and created your customized values.yaml. 1 2 3 helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace rook-ceph rook-ceph-cluster \\ --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster -f values.yaml Note --namespace specifies the cephcluster namespace, which may be different from the rook operator namespace. Configuration \u00b6 The following table lists the configurable parameters of the rook-operator chart and their default values. Parameter Description Default cephBlockPools A list of CephBlockPool configurations to deploy See below cephBlockPoolsVolumeSnapshotClass Settings for the block pool snapshot class See RBD Snapshots cephClusterSpec Cluster configuration. See below cephFileSystemVolumeSnapshotClass Settings for the filesystem snapshot class See CephFS Snapshots cephFileSystems A list of CephFileSystem configurations to deploy See below cephObjectStores A list of CephObjectStore configurations to deploy See below clusterName The metadata.name of the CephCluster CR The same as the namespace configOverride Cluster ceph.conf override nil ingress.dashboard Enable an ingress for the ceph-dashboard {} kubeVersion Optional override of the target kubernetes version nil monitoring.createPrometheusRules Whether to create the Prometheus rules for Ceph alerts false monitoring.enabled Enable Prometheus integration, will also create necessary RBAC rules to allow Operator to create ServiceMonitors. Monitoring requires Prometheus to be pre-installed false monitoring.prometheusRule.annotations Annotations applied to PrometheusRule {} monitoring.prometheusRule.labels Labels applied to PrometheusRule {} monitoring.rulesNamespaceOverride The namespace in which to create the prometheus rules, if different from the rook cluster namespace. If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus deployed) to set rulesNamespaceOverride for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions. nil operatorNamespace Namespace of the main rook operator \"rook-ceph\" pspEnable Create & use PSP resources. Set this to the same value as the rook-ceph chart. false toolbox.affinity Toolbox affinity {} toolbox.containerSecurityContext Toolbox container security context {\"capabilities\":{\"drop\":[\"ALL\"]},\"runAsGroup\":2016,\"runAsNonRoot\":true,\"runAsUser\":2016} toolbox.enabled Enable Ceph debugging pod deployment. See toolbox false toolbox.image Toolbox image, defaults to the image used by the Ceph cluster nil toolbox.priorityClassName Set the priority class for the toolbox if desired nil toolbox.resources Toolbox resources {\"limits\":{\"cpu\":\"500m\",\"memory\":\"1Gi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"128Mi\"}} toolbox.tolerations Toolbox tolerations [] Ceph Cluster Spec \u00b6 The CephCluster CRD takes its spec from cephClusterSpec.* . This is not an exhaustive list of parameters. For the full list, see the Cluster CRD topic. The cluster spec example is for a converged cluster where all the Ceph daemons are running locally, as in the host-based example (cluster.yaml). For a different configuration such as a PVC-based cluster (cluster-on-pvc.yaml), external cluster (cluster-external.yaml), or stretch cluster (cluster-stretched.yaml), replace this entire cephClusterSpec with the specs from those examples. Ceph Block Pools \u00b6 The cephBlockPools array in the values file will define a list of CephBlockPool as described in the table below. Parameter Description Default name The name of the CephBlockPool ceph-blockpool spec The CephBlockPool spec, see the CephBlockPool documentation. {} storageClass.enabled Whether a storage class is deployed alongside the CephBlockPool true storageClass.isDefault Whether the storage class will be the default storage class for PVCs. See PersistentVolumeClaim documentation for details. true storageClass.name The name of the storage class ceph-block storageClass.parameters See Block Storage documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete storageClass.allowVolumeExpansion Whether volume expansion is allowed by default. true storageClass.mountOptions Specifies the mount options for storageClass [] storageClass.allowedTopologies Specifies the allowedTopologies for storageClass [] Ceph File Systems \u00b6 The cephFileSystems array in the values file will define a list of CephFileSystem as described in the table below. Parameter Description Default name The name of the CephFileSystem ceph-filesystem spec The CephFileSystem spec, see the CephFilesystem CRD documentation. see values.yaml storageClass.enabled Whether a storage class is deployed alongside the CephFileSystem true storageClass.name The name of the storage class ceph-filesystem storageClass.pool The name of Data Pool , without the filesystem name prefix data0 storageClass.parameters See Shared Filesystem documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete storageClass.mountOptions Specifies the mount options for storageClass [] Ceph Object Stores \u00b6 The cephObjectStores array in the values file will define a list of CephObjectStore as described in the table below. Parameter Description Default name The name of the CephObjectStore ceph-objectstore spec The CephObjectStore spec, see the CephObjectStore CRD documentation. see values.yaml storageClass.enabled Whether a storage class is deployed alongside the CephObjectStore true storageClass.name The name of the storage class ceph-bucket storageClass.parameters See Object Store storage class documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete ingress.enabled Enable an ingress for the object store false ingress.annotations Ingress annotations {} ingress.host.name Ingress hostname \"\" ingress.host.path Ingress path prefix / ingress.tls Ingress tls / ingress.ingressClassName Ingress tls \"\" Existing Clusters \u00b6 If you have an existing CephCluster CR that was created without the helm chart and you want the helm chart to start managing the cluster: Extract the spec section of your existing CephCluster CR and copy to the cephClusterSpec section in values.yaml . Add the following annotations and label to your existing CephCluster CR: 1 2 3 4 5 annotations : meta.helm.sh/release-name : rook-ceph-cluster meta.helm.sh/release-namespace : rook-ceph labels : app.kubernetes.io/managed-by : Helm Run the helm install command in the Installing section to create the chart. In the future when updates to the cluster are needed, ensure the values.yaml always contains the desired CephCluster spec. Development Build \u00b6 To deploy from a local build from your development environment: 1 2 cd deploy/charts/rook-ceph-cluster helm install --create-namespace --namespace rook-ceph rook-ceph-cluster -f values.yaml . Uninstalling the Chart \u00b6 To see the currently installed Rook chart: 1 helm ls --namespace rook-ceph To uninstall/delete the rook-ceph-cluster chart: 1 helm delete --namespace rook-ceph rook-ceph-cluster The command removes all the Kubernetes components associated with the chart and deletes the release. Removing the cluster chart does not remove the Rook operator. In addition, all data on hosts in the Rook data directory ( /var/lib/rook by default) and on OSD raw devices is kept. To reuse disks, you will have to wipe them before recreating the cluster. See the teardown documentation for more information.","title":"Ceph Cluster Helm Chart"},{"location":"Helm-Charts/ceph-cluster-chart/#prerequisites","text":"Kubernetes 1.22+ Helm 3.x Install the Rook Operator chart","title":"Prerequisites"},{"location":"Helm-Charts/ceph-cluster-chart/#installing","text":"The helm install command deploys rook on the Kubernetes cluster in the default configuration. The configuration section lists the parameters that can be configured during installation. It is recommended that the rook operator be installed into the rook-ceph namespace. The clusters can be installed into the same namespace as the operator or a separate namespace. Rook currently publishes builds of this chart to the release and master channels. Before installing, review the values.yaml to confirm if the default settings need to be updated. If the operator was installed in a namespace other than rook-ceph , the namespace must be set in the operatorNamespace variable. Set the desired settings in the cephClusterSpec . The defaults are only an example and not likely to apply to your cluster. The monitoring section should be removed from the cephClusterSpec , as it is specified separately in the helm settings. The default values for cephBlockPools , cephFileSystems , and CephObjectStores will create one of each, and their corresponding storage classes. All Ceph components now have default values for the pod resources. The resources may need to be adjusted in production clusters depending on the load. The resources can also be disabled if Ceph should not be limited (e.g. test clusters).","title":"Installing"},{"location":"Helm-Charts/ceph-cluster-chart/#release","text":"The release channel is the most recent release of Rook that is considered stable for the community. The example install assumes you have first installed the Rook Operator Helm Chart and created your customized values.yaml. 1 2 3 helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace rook-ceph rook-ceph-cluster \\ --set operatorNamespace=rook-ceph rook-release/rook-ceph-cluster -f values.yaml Note --namespace specifies the cephcluster namespace, which may be different from the rook operator namespace.","title":"Release"},{"location":"Helm-Charts/ceph-cluster-chart/#configuration","text":"The following table lists the configurable parameters of the rook-operator chart and their default values. Parameter Description Default cephBlockPools A list of CephBlockPool configurations to deploy See below cephBlockPoolsVolumeSnapshotClass Settings for the block pool snapshot class See RBD Snapshots cephClusterSpec Cluster configuration. See below cephFileSystemVolumeSnapshotClass Settings for the filesystem snapshot class See CephFS Snapshots cephFileSystems A list of CephFileSystem configurations to deploy See below cephObjectStores A list of CephObjectStore configurations to deploy See below clusterName The metadata.name of the CephCluster CR The same as the namespace configOverride Cluster ceph.conf override nil ingress.dashboard Enable an ingress for the ceph-dashboard {} kubeVersion Optional override of the target kubernetes version nil monitoring.createPrometheusRules Whether to create the Prometheus rules for Ceph alerts false monitoring.enabled Enable Prometheus integration, will also create necessary RBAC rules to allow Operator to create ServiceMonitors. Monitoring requires Prometheus to be pre-installed false monitoring.prometheusRule.annotations Annotations applied to PrometheusRule {} monitoring.prometheusRule.labels Labels applied to PrometheusRule {} monitoring.rulesNamespaceOverride The namespace in which to create the prometheus rules, if different from the rook cluster namespace. If you have multiple rook-ceph clusters in the same k8s cluster, choose the same namespace (ideally, namespace with prometheus deployed) to set rulesNamespaceOverride for all the clusters. Otherwise, you will get duplicate alerts with multiple alert definitions. nil operatorNamespace Namespace of the main rook operator \"rook-ceph\" pspEnable Create & use PSP resources. Set this to the same value as the rook-ceph chart. false toolbox.affinity Toolbox affinity {} toolbox.containerSecurityContext Toolbox container security context {\"capabilities\":{\"drop\":[\"ALL\"]},\"runAsGroup\":2016,\"runAsNonRoot\":true,\"runAsUser\":2016} toolbox.enabled Enable Ceph debugging pod deployment. See toolbox false toolbox.image Toolbox image, defaults to the image used by the Ceph cluster nil toolbox.priorityClassName Set the priority class for the toolbox if desired nil toolbox.resources Toolbox resources {\"limits\":{\"cpu\":\"500m\",\"memory\":\"1Gi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"128Mi\"}} toolbox.tolerations Toolbox tolerations []","title":"Configuration"},{"location":"Helm-Charts/ceph-cluster-chart/#ceph-cluster-spec","text":"The CephCluster CRD takes its spec from cephClusterSpec.* . This is not an exhaustive list of parameters. For the full list, see the Cluster CRD topic. The cluster spec example is for a converged cluster where all the Ceph daemons are running locally, as in the host-based example (cluster.yaml). For a different configuration such as a PVC-based cluster (cluster-on-pvc.yaml), external cluster (cluster-external.yaml), or stretch cluster (cluster-stretched.yaml), replace this entire cephClusterSpec with the specs from those examples.","title":"Ceph Cluster Spec"},{"location":"Helm-Charts/ceph-cluster-chart/#ceph-block-pools","text":"The cephBlockPools array in the values file will define a list of CephBlockPool as described in the table below. Parameter Description Default name The name of the CephBlockPool ceph-blockpool spec The CephBlockPool spec, see the CephBlockPool documentation. {} storageClass.enabled Whether a storage class is deployed alongside the CephBlockPool true storageClass.isDefault Whether the storage class will be the default storage class for PVCs. See PersistentVolumeClaim documentation for details. true storageClass.name The name of the storage class ceph-block storageClass.parameters See Block Storage documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete storageClass.allowVolumeExpansion Whether volume expansion is allowed by default. true storageClass.mountOptions Specifies the mount options for storageClass [] storageClass.allowedTopologies Specifies the allowedTopologies for storageClass []","title":"Ceph Block Pools"},{"location":"Helm-Charts/ceph-cluster-chart/#ceph-file-systems","text":"The cephFileSystems array in the values file will define a list of CephFileSystem as described in the table below. Parameter Description Default name The name of the CephFileSystem ceph-filesystem spec The CephFileSystem spec, see the CephFilesystem CRD documentation. see values.yaml storageClass.enabled Whether a storage class is deployed alongside the CephFileSystem true storageClass.name The name of the storage class ceph-filesystem storageClass.pool The name of Data Pool , without the filesystem name prefix data0 storageClass.parameters See Shared Filesystem documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete storageClass.mountOptions Specifies the mount options for storageClass []","title":"Ceph File Systems"},{"location":"Helm-Charts/ceph-cluster-chart/#ceph-object-stores","text":"The cephObjectStores array in the values file will define a list of CephObjectStore as described in the table below. Parameter Description Default name The name of the CephObjectStore ceph-objectstore spec The CephObjectStore spec, see the CephObjectStore CRD documentation. see values.yaml storageClass.enabled Whether a storage class is deployed alongside the CephObjectStore true storageClass.name The name of the storage class ceph-bucket storageClass.parameters See Object Store storage class documentation or the helm values.yaml for suitable values see values.yaml storageClass.reclaimPolicy The default Reclaim Policy to apply to PVCs created with this storage class. Delete ingress.enabled Enable an ingress for the object store false ingress.annotations Ingress annotations {} ingress.host.name Ingress hostname \"\" ingress.host.path Ingress path prefix / ingress.tls Ingress tls / ingress.ingressClassName Ingress tls \"\"","title":"Ceph Object Stores"},{"location":"Helm-Charts/ceph-cluster-chart/#existing-clusters","text":"If you have an existing CephCluster CR that was created without the helm chart and you want the helm chart to start managing the cluster: Extract the spec section of your existing CephCluster CR and copy to the cephClusterSpec section in values.yaml . Add the following annotations and label to your existing CephCluster CR: 1 2 3 4 5 annotations : meta.helm.sh/release-name : rook-ceph-cluster meta.helm.sh/release-namespace : rook-ceph labels : app.kubernetes.io/managed-by : Helm Run the helm install command in the Installing section to create the chart. In the future when updates to the cluster are needed, ensure the values.yaml always contains the desired CephCluster spec.","title":"Existing Clusters"},{"location":"Helm-Charts/ceph-cluster-chart/#development-build","text":"To deploy from a local build from your development environment: 1 2 cd deploy/charts/rook-ceph-cluster helm install --create-namespace --namespace rook-ceph rook-ceph-cluster -f values.yaml .","title":"Development Build"},{"location":"Helm-Charts/ceph-cluster-chart/#uninstalling-the-chart","text":"To see the currently installed Rook chart: 1 helm ls --namespace rook-ceph To uninstall/delete the rook-ceph-cluster chart: 1 helm delete --namespace rook-ceph rook-ceph-cluster The command removes all the Kubernetes components associated with the chart and deletes the release. Removing the cluster chart does not remove the Rook operator. In addition, all data on hosts in the Rook data directory ( /var/lib/rook by default) and on OSD raw devices is kept. To reuse disks, you will have to wipe them before recreating the cluster. See the teardown documentation for more information.","title":"Uninstalling the Chart"},{"location":"Helm-Charts/helm-charts/","text":"Rook has published the following Helm charts for the Ceph storage provider: Rook Ceph Operator : Starts the Ceph Operator, which will watch for Ceph CRs (custom resources) Rook Ceph Cluster : Creates Ceph CRs that the operator will use to configure the cluster The Helm charts are intended to simplify deployment and upgrades. Configuring the Rook resources without Helm is also fully supported by creating the manifests directly.","title":"Helm Charts Overview"},{"location":"Helm-Charts/operator-chart/","text":"Installs rook to create, configure, and manage Ceph clusters on Kubernetes. Introduction \u00b6 This chart bootstraps a rook-ceph-operator deployment on a Kubernetes cluster using the Helm package manager. Prerequisites \u00b6 Kubernetes 1.22+ Helm 3.x See the Helm support matrix for more details. Installing \u00b6 The Ceph Operator helm chart will install the basic components necessary to create a storage platform for your Kubernetes cluster. Install the Helm chart Create a Rook cluster . The helm install command deploys rook on the Kubernetes cluster in the default configuration. The configuration section lists the parameters that can be configured during installation. It is recommended that the rook operator be installed into the rook-ceph namespace (you will install your clusters into separate namespaces). Rook currently publishes builds of the Ceph operator to the release and master channels. Release \u00b6 The release channel is the most recent release of Rook that is considered stable for the community. 1 2 helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph -f values.yaml For example settings, see the next section or values.yaml Configuration \u00b6 The following table lists the configurable parameters of the rook-operator chart and their default values. Parameter Description Default admissionController Set tolerations and nodeAffinity 1 for admission controller pod. The admission controller would be best to start on the same nodes as other ceph daemons. nil allowLoopDevices If true, loop devices are allowed to be used for osds in test clusters false annotations Pod annotations {} cephCommandsTimeoutSeconds The timeout for ceph commands in seconds \"15\" containerSecurityContext Set the container security context for the operator {\"capabilities\":{\"drop\":[\"ALL\"]},\"runAsGroup\":2016,\"runAsNonRoot\":true,\"runAsUser\":2016} crds.enabled Whether the helm chart should create and update the CRDs. If false, the CRDs must be managed independently with deploy/examples/crds.yaml. WARNING Only set during first deployment. If later disabled the cluster may be DESTROYED. If the CRDs are deleted in this case, see the disaster recovery guide to restore them. true csi.allowUnsupportedVersion Allow starting an unsupported ceph-csi image false csi.attacher.image Kubernetes CSI Attacher image registry.k8s.io/sig-storage/csi-attacher:v4.3.0 csi.cephFSAttachRequired Whether to skip any attach operation altogether for CephFS PVCs. See more details here . If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the CephFS PVC fast. WARNING It's highly discouraged to use this for CephFS RWO volumes. Refer to this issue for more details. true csi.cephFSFSGroupPolicy Policy for modifying a volume's ownership or permissions when the CephFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.cephFSKernelMountOptions Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options . Set to \"ms_mode=secure\" when connections.encrypted is enabled in CephCluster CR nil csi.cephFSPluginUpdateStrategy CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.cephFSPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI cephFS plugin daemonset update strategy. 1 csi.cephcsi.image Ceph CSI image quay.io/cephcsi/cephcsi:v3.9.0 csi.cephfsGrpcMetricsPort CSI CephFS driver GRPC metrics port 9091 csi.cephfsLivenessMetricsPort CSI CephFS driver metrics port 9081 csi.cephfsPodLabels Labels to add to the CSI CephFS Deployments and DaemonSets Pods nil csi.clusterName Cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful in cases like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster nil csi.csiAddons.enabled Enable CSIAddons false csi.csiAddons.image CSIAddons Sidecar image \"quay.io/csiaddons/k8s-sidecar:v0.7.0\" csi.csiAddonsPort CSI Addons server port 9070 csi.csiCephFSPluginResource CEPH CSI CephFS plugin resource requirement list see values.yaml csi.csiCephFSPluginVolume The volume of the CephCSI CephFS plugin DaemonSet nil csi.csiCephFSPluginVolumeMount The volume mounts of the CephCSI CephFS plugin DaemonSet nil csi.csiCephFSProvisionerResource CEPH CSI CephFS provisioner resource requirement list see values.yaml csi.csiNFSPluginResource CEPH CSI NFS plugin resource requirement list see values.yaml csi.csiNFSProvisionerResource CEPH CSI NFS provisioner resource requirement list see values.yaml csi.csiRBDPluginResource CEPH CSI RBD plugin resource requirement list see values.yaml csi.csiRBDPluginVolume The volume of the CephCSI RBD plugin DaemonSet nil csi.csiRBDPluginVolumeMount The volume mounts of the CephCSI RBD plugin DaemonSet nil csi.csiRBDProvisionerResource CEPH CSI RBD provisioner resource requirement list csi-omap-generator resources will be applied only if enableOMAPGenerator is set to true see values.yaml csi.enableCSIEncryption Enable Ceph CSI PVC encryption support false csi.enableCSIHostNetwork Enable host networking for CSI CephFS and RBD nodeplugins. This may be necessary in some network configurations where the SDN does not provide access to an external cluster or there is significant drop in read/write performance true csi.enableCephfsDriver Enable Ceph CSI CephFS driver true csi.enableCephfsSnapshotter Enable Snapshotter in CephFS provisioner pod true csi.enableGrpcMetrics Enable Ceph CSI GRPC Metrics false csi.enableLiveness Enable Ceph CSI Liveness sidecar deployment false csi.enableMetadata Enable adding volume metadata on the CephFS subvolumes and RBD images. Not all users might be interested in getting volume/snapshot details as metadata on CephFS subvolume and RBD images. Hence enable metadata is false by default false csi.enableNFSSnapshotter Enable Snapshotter in NFS provisioner pod true csi.enableOMAPGenerator OMAP generator generates the omap mapping between the PV name and the RBD image which helps CSI to identify the rbd images for CSI operations. CSI_ENABLE_OMAP_GENERATOR needs to be enabled when we are using rbd mirroring feature. By default OMAP generator is disabled and when enabled, it will be deployed as a sidecar with CSI provisioner pod, to enable set it to true. false csi.enablePluginSelinuxHostMount Enable Host mount for /etc/selinux directory for Ceph CSI nodeplugins false csi.enableRBDSnapshotter Enable Snapshotter in RBD provisioner pod true csi.enableRbdDriver Enable Ceph CSI RBD driver true csi.forceCephFSKernelClient Enable Ceph Kernel clients on kernel < 4.17. If your kernel does not support quotas for CephFS you may want to disable this setting. However, this will cause an issue during upgrades with the FUSE client. See the upgrade guide true csi.grpcTimeoutInSeconds Set GRPC timeout for csi containers (in seconds). It should be >= 120. If this value is not set or is invalid, it defaults to 150 150 csi.imagePullPolicy Image pull policy \"IfNotPresent\" csi.kubeletDirPath Kubelet root directory path (if the Kubelet uses a different path for the --root-dir flag) /var/lib/kubelet csi.logLevel Set logging level for cephCSI containers maintained by the cephCSI. Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity. 0 csi.nfs.enabled Enable the nfs csi driver false csi.nfsAttachRequired Whether to skip any attach operation altogether for NFS PVCs. See more details here . If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the NFS PVC fast. WARNING It's highly discouraged to use this for NFS RWO volumes. Refer to this issue for more details. true csi.nfsFSGroupPolicy Policy for modifying a volume's ownership or permissions when the NFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.nfsPluginUpdateStrategy CSI NFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.nfsPodLabels Labels to add to the CSI NFS Deployments and DaemonSets Pods nil csi.pluginNodeAffinity The node labels for affinity of the CephCSI RBD plugin DaemonSet 1 nil csi.pluginPriorityClassName PriorityClassName to be set on csi driver plugin pods \"system-node-critical\" csi.pluginTolerations Array of tolerations in YAML format which will be added to CephCSI plugin DaemonSet nil csi.provisioner.image Kubernetes CSI provisioner image registry.k8s.io/sig-storage/csi-provisioner:v3.5.0 csi.provisionerNodeAffinity The node labels for affinity of the CSI provisioner deployment 1 nil csi.provisionerPriorityClassName PriorityClassName to be set on csi driver provisioner pods \"system-cluster-critical\" csi.provisionerReplicas Set replicas for csi provisioner deployment 2 csi.provisionerTolerations Array of tolerations in YAML format which will be added to CSI provisioner deployment nil csi.rbdAttachRequired Whether to skip any attach operation altogether for RBD PVCs. See more details here . If set to false it skips the volume attachments and makes the creation of pods using the RBD PVC fast. WARNING It's highly discouraged to use this for RWO volumes as it can cause data corruption. csi-addons operations like Reclaimspace and PVC Keyrotation will also not be supported if set to false since we'll have no VolumeAttachments to determine which node the PVC is mounted on. Refer to this issue for more details. true csi.rbdFSGroupPolicy Policy for modifying a volume's ownership or permissions when the RBD PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.rbdGrpcMetricsPort Ceph CSI RBD driver GRPC metrics port 9090 csi.rbdLivenessMetricsPort Ceph CSI RBD driver metrics port 8080 csi.rbdPluginUpdateStrategy CSI RBD plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.rbdPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI RBD plugin daemonset update strategy. 1 csi.rbdPodLabels Labels to add to the CSI RBD Deployments and DaemonSets Pods nil csi.readAffinity.crushLocationLabels Define which node labels to use as CRUSH location. This should correspond to the values set in the CRUSH map. labels listed here csi.readAffinity.enabled Enable read affinity for RBD volumes. Recommended to set to true if running kernel 5.8 or newer. false csi.registrar.image Kubernetes CSI registrar image registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.8.0 csi.resizer.image Kubernetes CSI resizer image registry.k8s.io/sig-storage/csi-resizer:v1.8.0 csi.serviceMonitor.enabled Enable ServiceMonitor for Ceph CSI drivers false csi.serviceMonitor.interval Service monitor scrape interval \"5s\" csi.serviceMonitor.labels ServiceMonitor additional labels {} csi.sidecarLogLevel Set logging level for Kubernetes-csi sidecar containers. Supported values from 0 to 5. 0 for general useful logs (the default), 5 for trace level verbosity. 0 csi.snapshotter.image Kubernetes CSI snapshotter image registry.k8s.io/sig-storage/csi-snapshotter:v6.2.2 csi.topology.domainLabels domainLabels define which node labels to use as domains for CSI nodeplugins to advertise their domains nil csi.topology.enabled Enable topology based provisioning false currentNamespaceOnly Whether the operator should watch cluster CRD in its own namespace or not false disableAdmissionController Whether to disable the admission controller true disableDeviceHotplug Disable automatic orchestration when new devices are discovered. false discover.nodeAffinity The node labels for affinity of discover-agent 1 nil discover.podLabels Labels to add to the discover pods nil discover.resources Add resources to discover daemon pods nil discover.toleration Toleration for the discover pods. Options: NoSchedule , PreferNoSchedule or NoExecute nil discover.tolerationKey The specific key of the taint to tolerate nil discover.tolerations Array of tolerations in YAML format which will be added to discover deployment nil discoverDaemonUdev Blacklist certain disks according to the regex provided. nil enableDiscoveryDaemon Enable discovery daemon false enableOBCWatchOperatorNamespace Whether the OBC provisioner should watch on the operator namespace or not, if not the namespace of the cluster will be used true hostpathRequiresPrivileged Runs Ceph Pods as privileged to be able to write to hostPaths in OpenShift with SELinux restrictions. false image.pullPolicy Image pull policy \"IfNotPresent\" image.repository Image \"rook/ceph\" image.tag Image tag master imagePullSecrets imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts. nil logLevel Global log level for the operator. Options: ERROR , WARNING , INFO , DEBUG \"INFO\" monitoring.enabled Enable monitoring. Requires Prometheus to be pre-installed. Enabling will also create RBAC rules to allow Operator to create ServiceMonitors false nodeSelector Kubernetes nodeSelector to add to the Deployment. {} priorityClassName Set the priority class for the rook operator deployment if desired nil pspEnable If true, create & use PSP resources false rbacEnable If true, create & use RBAC resources true resources Pod resource requests & limits {\"limits\":{\"cpu\":\"500m\",\"memory\":\"512Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"128Mi\"}} scaleDownOperator If true, scale down the rook operator. This is useful for administrative actions where the rook operator must be scaled down, while using gitops style tooling to deploy your helm charts. false tolerations List of Kubernetes tolerations to add to the Deployment. [] unreachableNodeTolerationSeconds Delay to use for the node.kubernetes.io/unreachable pod failure toleration to override the Kubernetes default of 5 minutes 5 useOperatorHostNetwork If true, run rook operator on the host network nil Development Build \u00b6 To deploy from a local build from your development environment: Build the Rook docker image: make Copy the image to your K8s cluster, such as with the docker save then the docker load commands Install the helm chart: 1 2 cd deploy/charts/rook-ceph helm install --create-namespace --namespace rook-ceph rook-ceph . Uninstalling the Chart \u00b6 To see the currently installed Rook chart: 1 helm ls --namespace rook-ceph To uninstall/delete the rook-ceph deployment: 1 helm delete --namespace rook-ceph rook-ceph The command removes all the Kubernetes components associated with the chart and deletes the release. After uninstalling you may want to clean up the CRDs as described on the teardown documentation . nodeAffinity and *NodeAffinity options should have the format \"role=storage,rook; storage=ceph\" or storage=;role=rook-example or storage=; ( checks only for presence of key ) \u21a9 \u21a9 \u21a9 \u21a9","title":"Ceph Operator Helm Chart"},{"location":"Helm-Charts/operator-chart/#introduction","text":"This chart bootstraps a rook-ceph-operator deployment on a Kubernetes cluster using the Helm package manager.","title":"Introduction"},{"location":"Helm-Charts/operator-chart/#prerequisites","text":"Kubernetes 1.22+ Helm 3.x See the Helm support matrix for more details.","title":"Prerequisites"},{"location":"Helm-Charts/operator-chart/#installing","text":"The Ceph Operator helm chart will install the basic components necessary to create a storage platform for your Kubernetes cluster. Install the Helm chart Create a Rook cluster . The helm install command deploys rook on the Kubernetes cluster in the default configuration. The configuration section lists the parameters that can be configured during installation. It is recommended that the rook operator be installed into the rook-ceph namespace (you will install your clusters into separate namespaces). Rook currently publishes builds of the Ceph operator to the release and master channels.","title":"Installing"},{"location":"Helm-Charts/operator-chart/#release","text":"The release channel is the most recent release of Rook that is considered stable for the community. 1 2 helm repo add rook-release https://charts.rook.io/release helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph -f values.yaml For example settings, see the next section or values.yaml","title":"Release"},{"location":"Helm-Charts/operator-chart/#configuration","text":"The following table lists the configurable parameters of the rook-operator chart and their default values. Parameter Description Default admissionController Set tolerations and nodeAffinity 1 for admission controller pod. The admission controller would be best to start on the same nodes as other ceph daemons. nil allowLoopDevices If true, loop devices are allowed to be used for osds in test clusters false annotations Pod annotations {} cephCommandsTimeoutSeconds The timeout for ceph commands in seconds \"15\" containerSecurityContext Set the container security context for the operator {\"capabilities\":{\"drop\":[\"ALL\"]},\"runAsGroup\":2016,\"runAsNonRoot\":true,\"runAsUser\":2016} crds.enabled Whether the helm chart should create and update the CRDs. If false, the CRDs must be managed independently with deploy/examples/crds.yaml. WARNING Only set during first deployment. If later disabled the cluster may be DESTROYED. If the CRDs are deleted in this case, see the disaster recovery guide to restore them. true csi.allowUnsupportedVersion Allow starting an unsupported ceph-csi image false csi.attacher.image Kubernetes CSI Attacher image registry.k8s.io/sig-storage/csi-attacher:v4.3.0 csi.cephFSAttachRequired Whether to skip any attach operation altogether for CephFS PVCs. See more details here . If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the CephFS PVC fast. WARNING It's highly discouraged to use this for CephFS RWO volumes. Refer to this issue for more details. true csi.cephFSFSGroupPolicy Policy for modifying a volume's ownership or permissions when the CephFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.cephFSKernelMountOptions Set CephFS Kernel mount options to use https://docs.ceph.com/en/latest/man/8/mount.ceph/#options . Set to \"ms_mode=secure\" when connections.encrypted is enabled in CephCluster CR nil csi.cephFSPluginUpdateStrategy CSI CephFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.cephFSPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI cephFS plugin daemonset update strategy. 1 csi.cephcsi.image Ceph CSI image quay.io/cephcsi/cephcsi:v3.9.0 csi.cephfsGrpcMetricsPort CSI CephFS driver GRPC metrics port 9091 csi.cephfsLivenessMetricsPort CSI CephFS driver metrics port 9081 csi.cephfsPodLabels Labels to add to the CSI CephFS Deployments and DaemonSets Pods nil csi.clusterName Cluster name identifier to set as metadata on the CephFS subvolume and RBD images. This will be useful in cases like for example, when two container orchestrator clusters (Kubernetes/OCP) are using a single ceph cluster nil csi.csiAddons.enabled Enable CSIAddons false csi.csiAddons.image CSIAddons Sidecar image \"quay.io/csiaddons/k8s-sidecar:v0.7.0\" csi.csiAddonsPort CSI Addons server port 9070 csi.csiCephFSPluginResource CEPH CSI CephFS plugin resource requirement list see values.yaml csi.csiCephFSPluginVolume The volume of the CephCSI CephFS plugin DaemonSet nil csi.csiCephFSPluginVolumeMount The volume mounts of the CephCSI CephFS plugin DaemonSet nil csi.csiCephFSProvisionerResource CEPH CSI CephFS provisioner resource requirement list see values.yaml csi.csiNFSPluginResource CEPH CSI NFS plugin resource requirement list see values.yaml csi.csiNFSProvisionerResource CEPH CSI NFS provisioner resource requirement list see values.yaml csi.csiRBDPluginResource CEPH CSI RBD plugin resource requirement list see values.yaml csi.csiRBDPluginVolume The volume of the CephCSI RBD plugin DaemonSet nil csi.csiRBDPluginVolumeMount The volume mounts of the CephCSI RBD plugin DaemonSet nil csi.csiRBDProvisionerResource CEPH CSI RBD provisioner resource requirement list csi-omap-generator resources will be applied only if enableOMAPGenerator is set to true see values.yaml csi.enableCSIEncryption Enable Ceph CSI PVC encryption support false csi.enableCSIHostNetwork Enable host networking for CSI CephFS and RBD nodeplugins. This may be necessary in some network configurations where the SDN does not provide access to an external cluster or there is significant drop in read/write performance true csi.enableCephfsDriver Enable Ceph CSI CephFS driver true csi.enableCephfsSnapshotter Enable Snapshotter in CephFS provisioner pod true csi.enableGrpcMetrics Enable Ceph CSI GRPC Metrics false csi.enableLiveness Enable Ceph CSI Liveness sidecar deployment false csi.enableMetadata Enable adding volume metadata on the CephFS subvolumes and RBD images. Not all users might be interested in getting volume/snapshot details as metadata on CephFS subvolume and RBD images. Hence enable metadata is false by default false csi.enableNFSSnapshotter Enable Snapshotter in NFS provisioner pod true csi.enableOMAPGenerator OMAP generator generates the omap mapping between the PV name and the RBD image which helps CSI to identify the rbd images for CSI operations. CSI_ENABLE_OMAP_GENERATOR needs to be enabled when we are using rbd mirroring feature. By default OMAP generator is disabled and when enabled, it will be deployed as a sidecar with CSI provisioner pod, to enable set it to true. false csi.enablePluginSelinuxHostMount Enable Host mount for /etc/selinux directory for Ceph CSI nodeplugins false csi.enableRBDSnapshotter Enable Snapshotter in RBD provisioner pod true csi.enableRbdDriver Enable Ceph CSI RBD driver true csi.forceCephFSKernelClient Enable Ceph Kernel clients on kernel < 4.17. If your kernel does not support quotas for CephFS you may want to disable this setting. However, this will cause an issue during upgrades with the FUSE client. See the upgrade guide true csi.grpcTimeoutInSeconds Set GRPC timeout for csi containers (in seconds). It should be >= 120. If this value is not set or is invalid, it defaults to 150 150 csi.imagePullPolicy Image pull policy \"IfNotPresent\" csi.kubeletDirPath Kubelet root directory path (if the Kubelet uses a different path for the --root-dir flag) /var/lib/kubelet csi.logLevel Set logging level for cephCSI containers maintained by the cephCSI. Supported values from 0 to 5. 0 for general useful logs, 5 for trace level verbosity. 0 csi.nfs.enabled Enable the nfs csi driver false csi.nfsAttachRequired Whether to skip any attach operation altogether for NFS PVCs. See more details here . If cephFSAttachRequired is set to false it skips the volume attachments and makes the creation of pods using the NFS PVC fast. WARNING It's highly discouraged to use this for NFS RWO volumes. Refer to this issue for more details. true csi.nfsFSGroupPolicy Policy for modifying a volume's ownership or permissions when the NFS PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.nfsPluginUpdateStrategy CSI NFS plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.nfsPodLabels Labels to add to the CSI NFS Deployments and DaemonSets Pods nil csi.pluginNodeAffinity The node labels for affinity of the CephCSI RBD plugin DaemonSet 1 nil csi.pluginPriorityClassName PriorityClassName to be set on csi driver plugin pods \"system-node-critical\" csi.pluginTolerations Array of tolerations in YAML format which will be added to CephCSI plugin DaemonSet nil csi.provisioner.image Kubernetes CSI provisioner image registry.k8s.io/sig-storage/csi-provisioner:v3.5.0 csi.provisionerNodeAffinity The node labels for affinity of the CSI provisioner deployment 1 nil csi.provisionerPriorityClassName PriorityClassName to be set on csi driver provisioner pods \"system-cluster-critical\" csi.provisionerReplicas Set replicas for csi provisioner deployment 2 csi.provisionerTolerations Array of tolerations in YAML format which will be added to CSI provisioner deployment nil csi.rbdAttachRequired Whether to skip any attach operation altogether for RBD PVCs. See more details here . If set to false it skips the volume attachments and makes the creation of pods using the RBD PVC fast. WARNING It's highly discouraged to use this for RWO volumes as it can cause data corruption. csi-addons operations like Reclaimspace and PVC Keyrotation will also not be supported if set to false since we'll have no VolumeAttachments to determine which node the PVC is mounted on. Refer to this issue for more details. true csi.rbdFSGroupPolicy Policy for modifying a volume's ownership or permissions when the RBD PVC is being mounted. supported values are documented at https://kubernetes-csi.github.io/docs/support-fsgroup.html \"File\" csi.rbdGrpcMetricsPort Ceph CSI RBD driver GRPC metrics port 9090 csi.rbdLivenessMetricsPort Ceph CSI RBD driver metrics port 8080 csi.rbdPluginUpdateStrategy CSI RBD plugin daemonset update strategy, supported values are OnDelete and RollingUpdate RollingUpdate csi.rbdPluginUpdateStrategyMaxUnavailable A maxUnavailable parameter of CSI RBD plugin daemonset update strategy. 1 csi.rbdPodLabels Labels to add to the CSI RBD Deployments and DaemonSets Pods nil csi.readAffinity.crushLocationLabels Define which node labels to use as CRUSH location. This should correspond to the values set in the CRUSH map. labels listed here csi.readAffinity.enabled Enable read affinity for RBD volumes. Recommended to set to true if running kernel 5.8 or newer. false csi.registrar.image Kubernetes CSI registrar image registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.8.0 csi.resizer.image Kubernetes CSI resizer image registry.k8s.io/sig-storage/csi-resizer:v1.8.0 csi.serviceMonitor.enabled Enable ServiceMonitor for Ceph CSI drivers false csi.serviceMonitor.interval Service monitor scrape interval \"5s\" csi.serviceMonitor.labels ServiceMonitor additional labels {} csi.sidecarLogLevel Set logging level for Kubernetes-csi sidecar containers. Supported values from 0 to 5. 0 for general useful logs (the default), 5 for trace level verbosity. 0 csi.snapshotter.image Kubernetes CSI snapshotter image registry.k8s.io/sig-storage/csi-snapshotter:v6.2.2 csi.topology.domainLabels domainLabels define which node labels to use as domains for CSI nodeplugins to advertise their domains nil csi.topology.enabled Enable topology based provisioning false currentNamespaceOnly Whether the operator should watch cluster CRD in its own namespace or not false disableAdmissionController Whether to disable the admission controller true disableDeviceHotplug Disable automatic orchestration when new devices are discovered. false discover.nodeAffinity The node labels for affinity of discover-agent 1 nil discover.podLabels Labels to add to the discover pods nil discover.resources Add resources to discover daemon pods nil discover.toleration Toleration for the discover pods. Options: NoSchedule , PreferNoSchedule or NoExecute nil discover.tolerationKey The specific key of the taint to tolerate nil discover.tolerations Array of tolerations in YAML format which will be added to discover deployment nil discoverDaemonUdev Blacklist certain disks according to the regex provided. nil enableDiscoveryDaemon Enable discovery daemon false enableOBCWatchOperatorNamespace Whether the OBC provisioner should watch on the operator namespace or not, if not the namespace of the cluster will be used true hostpathRequiresPrivileged Runs Ceph Pods as privileged to be able to write to hostPaths in OpenShift with SELinux restrictions. false image.pullPolicy Image pull policy \"IfNotPresent\" image.repository Image \"rook/ceph\" image.tag Image tag master imagePullSecrets imagePullSecrets option allow to pull docker images from private docker registry. Option will be passed to all service accounts. nil logLevel Global log level for the operator. Options: ERROR , WARNING , INFO , DEBUG \"INFO\" monitoring.enabled Enable monitoring. Requires Prometheus to be pre-installed. Enabling will also create RBAC rules to allow Operator to create ServiceMonitors false nodeSelector Kubernetes nodeSelector to add to the Deployment. {} priorityClassName Set the priority class for the rook operator deployment if desired nil pspEnable If true, create & use PSP resources false rbacEnable If true, create & use RBAC resources true resources Pod resource requests & limits {\"limits\":{\"cpu\":\"500m\",\"memory\":\"512Mi\"},\"requests\":{\"cpu\":\"100m\",\"memory\":\"128Mi\"}} scaleDownOperator If true, scale down the rook operator. This is useful for administrative actions where the rook operator must be scaled down, while using gitops style tooling to deploy your helm charts. false tolerations List of Kubernetes tolerations to add to the Deployment. [] unreachableNodeTolerationSeconds Delay to use for the node.kubernetes.io/unreachable pod failure toleration to override the Kubernetes default of 5 minutes 5 useOperatorHostNetwork If true, run rook operator on the host network nil","title":"Configuration"},{"location":"Helm-Charts/operator-chart/#development-build","text":"To deploy from a local build from your development environment: Build the Rook docker image: make Copy the image to your K8s cluster, such as with the docker save then the docker load commands Install the helm chart: 1 2 cd deploy/charts/rook-ceph helm install --create-namespace --namespace rook-ceph rook-ceph .","title":"Development Build"},{"location":"Helm-Charts/operator-chart/#uninstalling-the-chart","text":"To see the currently installed Rook chart: 1 helm ls --namespace rook-ceph To uninstall/delete the rook-ceph deployment: 1 helm delete --namespace rook-ceph rook-ceph The command removes all the Kubernetes components associated with the chart and deletes the release. After uninstalling you may want to clean up the CRDs as described on the teardown documentation . nodeAffinity and *NodeAffinity options should have the format \"role=storage,rook; storage=ceph\" or storage=;role=rook-example or storage=; ( checks only for presence of key ) \u21a9 \u21a9 \u21a9 \u21a9","title":"Uninstalling the Chart"},{"location":"Storage-Configuration/ceph-teardown/","text":"Cleaning up a Cluster \u00b6 If you want to tear down the cluster and bring up a new one, be aware of the following resources that will need to be cleaned up: The resources created under Rook's namespace (assume rook-ceph here): The Rook operator and cluster created by operator.yaml and cluster.yaml (the cluster CRD) /var/lib/rook/rook-ceph : Path on each host in the cluster where configuration is cached by the ceph mons and osds Note that if you changed the default namespaces or paths such as dataDirHostPath in the sample yaml files, you will need to adjust these namespaces and paths throughout these instructions. If you see issues tearing down the cluster, see the Troubleshooting section below. If you are tearing down a cluster frequently for development purposes, it is instead recommended to use an environment such as Minikube that can easily be reset without worrying about any of these steps. Delete the Block and File artifacts \u00b6 First you will need to clean up the resources created on top of the Rook cluster. These commands will clean up the resources from the block and file walkthroughs (unmount volumes, delete volume claims, etc). If you did not complete those parts of the walkthrough, you can skip these instructions: 1 2 3 4 5 6 kubectl delete -f ../wordpress.yaml kubectl delete -f ../mysql.yaml kubectl delete -n rook-ceph cephblockpool replicapool kubectl delete storageclass rook-ceph-block kubectl delete -f csi/cephfs/kube-registry.yaml kubectl delete storageclass csi-cephfs After those block and file resources have been cleaned up, you can then delete your Rook cluster. This is important to delete before removing the Rook operator and agent or else resources may not be cleaned up properly . Delete the CephCluster CRD \u00b6 Edit the CephCluster and add the cleanupPolicy WARNING: DATA WILL BE PERMANENTLY DELETED AFTER DELETING THE CephCluster CR WITH cleanupPolicy . 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Once the cleanup policy is enabled, any new configuration changes in the CephCluster will be blocked. Nothing will happen until the deletion of the CR is requested, so this cleanupPolicy change can still be reverted if needed. Checkout more details about the cleanupPolicy here Delete the CephCluster CR. 1 kubectl -n rook-ceph delete cephcluster rook-ceph Verify that the cluster CR has been deleted before continuing to the next step. 1 kubectl -n rook-ceph get cephcluster If the cleanupPolicy was applied, then wait for the rook-ceph-cleanup jobs to be completed on all the nodes. These jobs will perform the following operations: Delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph , on all the nodes Wipe the data on the drives on all the nodes where OSDs were running in this cluster Note: The cleanup jobs might not start if the resources created on top of Rook Cluster are not deleted completely. See Delete the Operator and related Resources \u00b6 This will begin the process of the Rook Ceph operator and all other resources being cleaned up. This includes related resources such as the agent and discover daemonsets with the following commands: 1 2 3 kubectl delete -f operator.yaml kubectl delete -f common.yaml kubectl delete -f crds.yaml If the cleanupPolicy was applied and the cleanup jobs have completed on all the nodes, then the cluster tear down has been successful. If you skipped adding the cleanupPolicy then follow the manual steps mentioned below to tear down the cluster. Delete the data on hosts \u00b6 Attention The final cleanup step requires deleting files on each host in the cluster. All files under the dataDirHostPath property specified in the cluster CRD will need to be deleted. Otherwise, inconsistent state will remain when a new cluster is started. Connect to each machine and delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph . In the future this step will not be necessary when we build on the K8s local storage feature. If you modified the demo settings, additional cleanup is up to you for devices, host paths, etc. Zapping Devices \u00b6 Disks on nodes used by Rook for osds can be reset to a usable state with methods suggested below. Note that these scripts are not one-size-fits-all. Please use them with discretion to ensure you are not removing data unrelated to Rook and/or Ceph. Disks can be zapped fairly easily. A single disk can usually be cleared with some or all of the steps below. 1 2 3 4 5 6 7 8 9 10 11 12 13 DISK=\"/dev/sdX\" # Zap the disk to a fresh, usable state ( zap-all is important, b/c MBR has to be clean ) sgdisk --zap-all $DISK # Wipe a large portion of the beginning of the disk to remove more LVM metadata that may be present dd if=/dev/zero of=\"$DISK\" bs=1M count=100 oflag=direct,dsync # SSDs may be better cleaned with blkdiscard instead of dd blkdiscard $DISK # Inform the OS of partition table changes partprobe $DISK Ceph can leave LVM and device mapper data that can lock the disks, preventing the disks from being used again. These steps can help to free up old Ceph disks for re-use. Note that this only needs to be run once on each node. If you have only one Rook cluster and all Ceph disks are being wiped, run the following command. 1 2 3 4 5 6 # This command hangs on some systems: with caution, 'dmsetup remove_all --force' can be used ls /dev/mapper/ceph-* | xargs -I% -- dmsetup remove % # ceph-volume setup can leave ceph- directories in /dev and /dev/mapper ( unnecessary clutter ) rm -rf /dev/ceph-* rm -rf /dev/mapper/ceph--* If disks are still reported locked, rebooting the node often helps clear LVM-related holds on disks. If there are multiple Ceph clusters and some disks are not wiped yet, it is necessary to manually determine which disks map to which device mapper devices. Troubleshooting \u00b6 If the cleanup instructions are not executed in the order above, or you otherwise have difficulty cleaning up the cluster, here are a few things to try. The most common issue cleaning up the cluster is that the rook-ceph namespace or the cluster CRD remain indefinitely in the terminating state. A namespace cannot be removed until all of its resources are removed, so look at which resources are pending termination. Look at the pods: 1 kubectl -n rook-ceph get pod If a pod is still terminating, you will need to wait or else attempt to forcefully terminate it ( kubectl delete pod  ). Now look at the cluster CRD: 1 kubectl -n rook-ceph get cephcluster If the cluster CRD still exists even though you have executed the delete command earlier, see the next section on removing the finalizer. Removing the Cluster CRD Finalizer \u00b6 When a Cluster CRD is created, a finalizer is added automatically by the Rook operator. The finalizer will allow the operator to ensure that before the cluster CRD is deleted, all block and file mounts will be cleaned up. Without proper cleanup, pods consuming the storage will be hung indefinitely until a system reboot. The operator is responsible for removing the finalizer after the mounts have been cleaned up. If for some reason the operator is not able to remove the finalizer (i.e., the operator is not running anymore), you can delete the finalizer manually with the following command: 1 2 3 4 for CRD in $(kubectl get crd -n rook-ceph | awk '/ceph.rook.io/ {print $1}'); do kubectl get -n rook-ceph \"$CRD\" -o name | \\ xargs -I {} kubectl patch -n rook-ceph {} --type merge -p '{\"metadata\":{\"finalizers\": []}}' done This command will patch the following CRDs on v1.3: 1 2 3 4 5 6 cephblockpools.ceph.rook.io cephclients.ceph.rook.io cephfilesystems.ceph.rook.io cephnfses.ceph.rook.io cephobjectstores.ceph.rook.io cephobjectstoreusers.ceph.rook.io Within a few seconds you should see that the cluster CRD has been deleted and will no longer block other cleanup such as deleting the rook-ceph namespace. If the namespace is still stuck in Terminating state, you can check which resources are holding up the deletion and remove the finalizers and delete those 1 2 kubectl api-resources --verbs=list --namespaced -o name \\ | xargs -n 1 kubectl get --show-kind --ignore-not-found -n rook-ceph Remove critical resource finalizers \u00b6 Rook adds a finalizer ceph.rook.io/disaster-protection to resources critical to the Ceph cluster so that the resources will not be accidentally deleted. The operator is responsible for removing the finalizers when a CephCluster is deleted. If for some reason the operator is not able to remove the finalizers (i.e., the operator is not running anymore), you can remove the finalizers manually with the following commands: 1 2 kubectl -n rook-ceph patch configmap rook-ceph-mon-endpoints --type merge -p '{\"metadata\":{\"finalizers\": []}}' kubectl -n rook-ceph patch secrets rook-ceph-mon --type merge -p '{\"metadata\":{\"finalizers\": []}}'","title":"Cleanup"},{"location":"Storage-Configuration/ceph-teardown/#cleaning-up-a-cluster","text":"If you want to tear down the cluster and bring up a new one, be aware of the following resources that will need to be cleaned up: The resources created under Rook's namespace (assume rook-ceph here): The Rook operator and cluster created by operator.yaml and cluster.yaml (the cluster CRD) /var/lib/rook/rook-ceph : Path on each host in the cluster where configuration is cached by the ceph mons and osds Note that if you changed the default namespaces or paths such as dataDirHostPath in the sample yaml files, you will need to adjust these namespaces and paths throughout these instructions. If you see issues tearing down the cluster, see the Troubleshooting section below. If you are tearing down a cluster frequently for development purposes, it is instead recommended to use an environment such as Minikube that can easily be reset without worrying about any of these steps.","title":"Cleaning up a Cluster"},{"location":"Storage-Configuration/ceph-teardown/#delete-the-block-and-file-artifacts","text":"First you will need to clean up the resources created on top of the Rook cluster. These commands will clean up the resources from the block and file walkthroughs (unmount volumes, delete volume claims, etc). If you did not complete those parts of the walkthrough, you can skip these instructions: 1 2 3 4 5 6 kubectl delete -f ../wordpress.yaml kubectl delete -f ../mysql.yaml kubectl delete -n rook-ceph cephblockpool replicapool kubectl delete storageclass rook-ceph-block kubectl delete -f csi/cephfs/kube-registry.yaml kubectl delete storageclass csi-cephfs After those block and file resources have been cleaned up, you can then delete your Rook cluster. This is important to delete before removing the Rook operator and agent or else resources may not be cleaned up properly .","title":"Delete the Block and File artifacts"},{"location":"Storage-Configuration/ceph-teardown/#delete-the-cephcluster-crd","text":"Edit the CephCluster and add the cleanupPolicy WARNING: DATA WILL BE PERMANENTLY DELETED AFTER DELETING THE CephCluster CR WITH cleanupPolicy . 1 kubectl -n rook-ceph patch cephcluster rook-ceph --type merge -p '{\"spec\":{\"cleanupPolicy\":{\"confirmation\":\"yes-really-destroy-data\"}}}' Once the cleanup policy is enabled, any new configuration changes in the CephCluster will be blocked. Nothing will happen until the deletion of the CR is requested, so this cleanupPolicy change can still be reverted if needed. Checkout more details about the cleanupPolicy here Delete the CephCluster CR. 1 kubectl -n rook-ceph delete cephcluster rook-ceph Verify that the cluster CR has been deleted before continuing to the next step. 1 kubectl -n rook-ceph get cephcluster If the cleanupPolicy was applied, then wait for the rook-ceph-cleanup jobs to be completed on all the nodes. These jobs will perform the following operations: Delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph , on all the nodes Wipe the data on the drives on all the nodes where OSDs were running in this cluster Note: The cleanup jobs might not start if the resources created on top of Rook Cluster are not deleted completely. See","title":"Delete the CephCluster CRD"},{"location":"Storage-Configuration/ceph-teardown/#delete-the-operator-and-related-resources","text":"This will begin the process of the Rook Ceph operator and all other resources being cleaned up. This includes related resources such as the agent and discover daemonsets with the following commands: 1 2 3 kubectl delete -f operator.yaml kubectl delete -f common.yaml kubectl delete -f crds.yaml If the cleanupPolicy was applied and the cleanup jobs have completed on all the nodes, then the cluster tear down has been successful. If you skipped adding the cleanupPolicy then follow the manual steps mentioned below to tear down the cluster.","title":"Delete the Operator and related Resources"},{"location":"Storage-Configuration/ceph-teardown/#delete-the-data-on-hosts","text":"Attention The final cleanup step requires deleting files on each host in the cluster. All files under the dataDirHostPath property specified in the cluster CRD will need to be deleted. Otherwise, inconsistent state will remain when a new cluster is started. Connect to each machine and delete the namespace directory under dataDirHostPath , for example /var/lib/rook/rook-ceph . In the future this step will not be necessary when we build on the K8s local storage feature. If you modified the demo settings, additional cleanup is up to you for devices, host paths, etc.","title":"Delete the data on hosts"},{"location":"Storage-Configuration/ceph-teardown/#zapping-devices","text":"Disks on nodes used by Rook for osds can be reset to a usable state with methods suggested below. Note that these scripts are not one-size-fits-all. Please use them with discretion to ensure you are not removing data unrelated to Rook and/or Ceph. Disks can be zapped fairly easily. A single disk can usually be cleared with some or all of the steps below. 1 2 3 4 5 6 7 8 9 10 11 12 13 DISK=\"/dev/sdX\" # Zap the disk to a fresh, usable state ( zap-all is important, b/c MBR has to be clean ) sgdisk --zap-all $DISK # Wipe a large portion of the beginning of the disk to remove more LVM metadata that may be present dd if=/dev/zero of=\"$DISK\" bs=1M count=100 oflag=direct,dsync # SSDs may be better cleaned with blkdiscard instead of dd blkdiscard $DISK # Inform the OS of partition table changes partprobe $DISK Ceph can leave LVM and device mapper data that can lock the disks, preventing the disks from being used again. These steps can help to free up old Ceph disks for re-use. Note that this only needs to be run once on each node. If you have only one Rook cluster and all Ceph disks are being wiped, run the following command. 1 2 3 4 5 6 # This command hangs on some systems: with caution, 'dmsetup remove_all --force' can be used ls /dev/mapper/ceph-* | xargs -I% -- dmsetup remove % # ceph-volume setup can leave ceph- directories in /dev and /dev/mapper ( unnecessary clutter ) rm -rf /dev/ceph-* rm -rf /dev/mapper/ceph--* If disks are still reported locked, rebooting the node often helps clear LVM-related holds on disks. If there are multiple Ceph clusters and some disks are not wiped yet, it is necessary to manually determine which disks map to which device mapper devices.","title":"Zapping Devices"},{"location":"Storage-Configuration/ceph-teardown/#troubleshooting","text":"If the cleanup instructions are not executed in the order above, or you otherwise have difficulty cleaning up the cluster, here are a few things to try. The most common issue cleaning up the cluster is that the rook-ceph namespace or the cluster CRD remain indefinitely in the terminating state. A namespace cannot be removed until all of its resources are removed, so look at which resources are pending termination. Look at the pods: 1 kubectl -n rook-ceph get pod If a pod is still terminating, you will need to wait or else attempt to forcefully terminate it ( kubectl delete pod  ). Now look at the cluster CRD: 1 kubectl -n rook-ceph get cephcluster If the cluster CRD still exists even though you have executed the delete command earlier, see the next section on removing the finalizer.","title":"Troubleshooting"},{"location":"Storage-Configuration/ceph-teardown/#removing-the-cluster-crd-finalizer","text":"When a Cluster CRD is created, a finalizer is added automatically by the Rook operator. The finalizer will allow the operator to ensure that before the cluster CRD is deleted, all block and file mounts will be cleaned up. Without proper cleanup, pods consuming the storage will be hung indefinitely until a system reboot. The operator is responsible for removing the finalizer after the mounts have been cleaned up. If for some reason the operator is not able to remove the finalizer (i.e., the operator is not running anymore), you can delete the finalizer manually with the following command: 1 2 3 4 for CRD in $(kubectl get crd -n rook-ceph | awk '/ceph.rook.io/ {print $1}'); do kubectl get -n rook-ceph \"$CRD\" -o name | \\ xargs -I {} kubectl patch -n rook-ceph {} --type merge -p '{\"metadata\":{\"finalizers\": []}}' done This command will patch the following CRDs on v1.3: 1 2 3 4 5 6 cephblockpools.ceph.rook.io cephclients.ceph.rook.io cephfilesystems.ceph.rook.io cephnfses.ceph.rook.io cephobjectstores.ceph.rook.io cephobjectstoreusers.ceph.rook.io Within a few seconds you should see that the cluster CRD has been deleted and will no longer block other cleanup such as deleting the rook-ceph namespace. If the namespace is still stuck in Terminating state, you can check which resources are holding up the deletion and remove the finalizers and delete those 1 2 kubectl api-resources --verbs=list --namespaced -o name \\ | xargs -n 1 kubectl get --show-kind --ignore-not-found -n rook-ceph","title":"Removing the Cluster CRD Finalizer"},{"location":"Storage-Configuration/ceph-teardown/#remove-critical-resource-finalizers","text":"Rook adds a finalizer ceph.rook.io/disaster-protection to resources critical to the Ceph cluster so that the resources will not be accidentally deleted. The operator is responsible for removing the finalizers when a CephCluster is deleted. If for some reason the operator is not able to remove the finalizers (i.e., the operator is not running anymore), you can remove the finalizers manually with the following commands: 1 2 kubectl -n rook-ceph patch configmap rook-ceph-mon-endpoints --type merge -p '{\"metadata\":{\"finalizers\": []}}' kubectl -n rook-ceph patch secrets rook-ceph-mon --type merge -p '{\"metadata\":{\"finalizers\": []}}'","title":"Remove critical resource finalizers"},{"location":"Storage-Configuration/Advanced/ceph-configuration/","text":"These examples show how to perform advanced configuration tasks on your Rook storage cluster. Prerequisites \u00b6 Most of the examples make use of the ceph client command. A quick way to use the Ceph client suite is from a Rook Toolbox container . The Kubernetes based examples assume Rook OSD pods are in the rook-ceph namespace. If you run them in a different namespace, modify kubectl -n rook-ceph [...] to fit your situation. Using alternate namespaces \u00b6 If you wish to deploy the Rook Operator and/or Ceph clusters to namespaces other than the default rook-ceph , the manifests are commented to allow for easy sed replacements. Change ROOK_CLUSTER_NAMESPACE to tailor the manifests for additional Ceph clusters. You can choose to also change ROOK_OPERATOR_NAMESPACE to create a new Rook Operator for each Ceph cluster (don't forget to set ROOK_CURRENT_NAMESPACE_ONLY ), or you can leave it at the same value for every Ceph cluster if you only wish to have one Operator manage all Ceph clusters. This will help you manage namespaces more easily, but you should still make sure the resources are configured to your liking. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 cd deploy/examples export ROOK_OPERATOR_NAMESPACE=\"rook-ceph\" export ROOK_CLUSTER_NAMESPACE=\"rook-ceph\" sed -i.bak \\ -e \"s/\\(.*\\):.*# namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE # namespace:operator/g\" \\ -e \"s/\\(.*\\):.*# namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE # namespace:cluster/g\" \\ -e \"s/\\(.*serviceaccount\\):.*:\\(.*\\) # serviceaccount:namespace:operator/\\1:$ROOK_OPERATOR_NAMESPACE:\\2 # serviceaccount:namespace:operator/g\" \\ -e \"s/\\(.*serviceaccount\\):.*:\\(.*\\) # serviceaccount:namespace:cluster/\\1:$ROOK_CLUSTER_NAMESPACE:\\2 # serviceaccount:namespace:cluster/g\" \\ -e \"s/\\(.*\\): [-_A-Za-z0-9]*\\.\\(.*\\) # driver:namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE.\\2 # driver:namespace:operator/g\" \\ -e \"s/\\(.*\\): [-_A-Za-z0-9]*\\.\\(.*\\) # driver:namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE.\\2 # driver:namespace:cluster/g\" \\ common.yaml operator.yaml cluster.yaml # add other files or change these as desired for your config # You need to use ` apply ` for all Ceph clusters after the first if you have only one Operator kubectl apply -f common.yaml -f operator.yaml -f cluster.yaml # add other files as desired for yourconfig Deploying a second cluster \u00b6 If you wish to create a new CephCluster in a different namespace than rook-ceph while using a single operator to manage both clusters execute the following: 1 2 3 cd deploy/examples NAMESPACE=rook-ceph-secondary envsubst < common-second-cluster.yaml | kubectl create -f - This will create all the necessary RBACs as well as the new namespace. The script assumes that common.yaml was already created. When you create the second CephCluster CR, use the same NAMESPACE and the operator will configure the second cluster. Log Collection \u00b6 All Rook logs can be collected in a Kubernetes environment with the following command: 1 2 3 4 5 6 7 8 9 for p in $(kubectl -n rook-ceph get pods -o jsonpath='{.items[*].metadata.name}') do for c in $(kubectl -n rook-ceph get pod ${p} -o jsonpath='{.spec.containers[*].name}') do echo \"BEGIN logs from pod: ${p} ${c}\" kubectl -n rook-ceph logs -c ${c} ${p} echo \"END logs from pod: ${p} ${c}\" done done This gets the logs for every container in every Rook pod and then compresses them into a .gz archive for easy sharing. Note that instead of gzip , you could instead pipe to less or to a single text file. OSD Information \u00b6 Keeping track of OSDs and their underlying storage devices can be difficult. The following scripts will clear things up quickly. Kubernetes \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 # Get OSD Pods # This uses the example/default cluster name \"rook\" OSD_PODS=$(kubectl get pods --all-namespaces -l \\ app=rook-ceph-osd,rook_cluster=rook-ceph -o jsonpath='{.items[*].metadata.name}') # Find node and drive associations from OSD pods for pod in $(echo ${OSD_PODS}) do echo \"Pod: ${pod}\" echo \"Node: $(kubectl -n rook-ceph get pod ${pod} -o jsonpath='{.spec.nodeName}')\" kubectl -n rook-ceph exec ${pod} -- sh -c '\\ for i in /var/lib/ceph/osd/ceph-*; do [ -f ${i}/ready ] || continue echo -ne \"-$(basename ${i}) \" echo $(lsblk -n -o NAME,SIZE ${i}/block 2> /dev/null || \\ findmnt -n -v -o SOURCE,SIZE -T ${i}) $(cat ${i}/type) done | sort -V echo' done The output should look something like this. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 Pod: osd-m2fz2 Node: node1.zbrbdl -osd0 sda3 557.3G bluestore -osd1 sdf3 110.2G bluestore -osd2 sdd3 277.8G bluestore -osd3 sdb3 557.3G bluestore -osd4 sde3 464.2G bluestore -osd5 sdc3 557.3G bluestore Pod: osd-nxxnq Node: node3.zbrbdl -osd6 sda3 110.7G bluestore -osd17 sdd3 1.8T bluestore -osd18 sdb3 231.8G bluestore -osd19 sdc3 231.8G bluestore Pod: osd-tww1h Node: node2.zbrbdl -osd7 sdc3 464.2G bluestore -osd8 sdj3 557.3G bluestore -osd9 sdf3 66.7G bluestore -osd10 sdd3 464.2G bluestore -osd11 sdb3 147.4G bluestore -osd12 sdi3 557.3G bluestore -osd13 sdk3 557.3G bluestore -osd14 sde3 66.7G bluestore -osd15 sda3 110.2G bluestore -osd16 sdh3 135.1G bluestore Separate Storage Groups \u00b6 Attention It is deprecated to manually need to set this , the deviceClass property can be used on Pool structures in CephBlockPool , CephFilesystem and CephObjectStore CRD objects. By default Rook/Ceph puts all storage under one replication rule in the CRUSH Map which provides the maximum amount of storage capacity for a cluster. If you would like to use different storage endpoints for different purposes, you'll have to create separate storage groups. In the following example we will separate SSD drives from spindle-based drives, a common practice for those looking to target certain workloads onto faster (database) or slower (file archive) storage. Configuring Pools \u00b6 Placement Group Sizing \u00b6 Note Since Ceph Nautilus (v14.x), you can use the Ceph MGR pg_autoscaler module to auto scale the PGs as needed. It is highly advisable to configure default pg_num value on per-pool basis, If you want to enable this feature, please refer to Default PG and PGP counts . The general rules for deciding how many PGs your pool(s) should contain is: Fewer than 5 OSDs set pg_num to 128 Between 5 and 10 OSDs set pg_num to 512 Between 10 and 50 OSDs set pg_num to 1024 If you have more than 50 OSDs, you need to understand the tradeoffs and how to calculate the pg_num value by yourself. For calculating pg_num yourself please make use of the pgcalc tool . Setting PG Count \u00b6 Be sure to read the placement group sizing section before changing the number of PGs. 1 2 # Set the number of PGs in the rbd pool to 512 ceph osd pool set rbd pg_num 512 Custom ceph.conf Settings \u00b6 Warning The advised method for controlling Ceph configuration is to manually use the Ceph CLI or the Ceph dashboard because this offers the most flexibility. It is highly recommended that this only be used when absolutely necessary and that the config be reset to an empty string if/when the configurations are no longer necessary. Configurations in the config file will make the Ceph cluster less configurable from the CLI and dashboard and may make future tuning or debugging difficult. Setting configs via Ceph's CLI requires that at least one mon be available for the configs to be set, and setting configs via dashboard requires at least one mgr to be available. Ceph also has a number of very advanced settings that cannot be modified easily via the CLI or dashboard. In order to set configurations before monitors are available or to set advanced configuration settings, the rook-config-override ConfigMap exists, and the config field can be set with the contents of a ceph.conf file. The contents will be propagated to all mon, mgr, OSD, MDS, and RGW daemons as an /etc/ceph/ceph.conf file. Warning Rook performs no validation on the config, so the validity of the settings is the user's responsibility. If the rook-config-override ConfigMap is created before the cluster is started, the Ceph daemons will automatically pick up the settings. If you add the settings to the ConfigMap after the cluster has been initialized, each daemon will need to be restarted where you want the settings applied: mons: ensure all three mons are online and healthy before restarting each mon pod, one at a time. mgrs: the pods are stateless and can be restarted as needed, but note that this will disrupt the Ceph dashboard during restart. OSDs: restart your the pods by deleting them, one at a time, and running ceph -s between each restart to ensure the cluster goes back to \"active/clean\" state. RGW: the pods are stateless and can be restarted as needed. MDS: the pods are stateless and can be restarted as needed. After the pod restart, the new settings should be in effect. Note that if the ConfigMap in the Ceph cluster's namespace is created before the cluster is created, the daemons will pick up the settings at first launch. To automate the restart of the Ceph daemon pods, you will need to trigger an update to the pod specs. The simplest way to trigger the update is to add annotations or labels to the CephCluster CR for the daemons you want to restart. The operator will then proceed with a rolling update, similar to any other update to the cluster. Example \u00b6 In this example we will set the default pool size to two, and tell OSD daemons not to change the weight of OSDs on startup. Warning Modify Ceph settings carefully. You are leaving the sandbox tested by Rook. Changing the settings could result in unhealthy daemons or even data loss if used incorrectly. When the Rook Operator creates a cluster, a placeholder ConfigMap is created that will allow you to override Ceph configuration settings. When the daemon pods are started, the settings specified in this ConfigMap will be merged with the default settings generated by Rook. The default override settings are blank. Cutting out the extraneous properties, we would see the following defaults after creating a cluster: 1 kubectl -n rook-ceph get ConfigMap rook-config-override -o yaml 1 2 3 4 5 6 7 kind : ConfigMap apiVersion : v1 metadata : name : rook-config-override namespace : rook-ceph data : config : \"\" To apply your desired configuration, you will need to update this ConfigMap. The next time the daemon pod(s) start, they will use the updated configs. 1 kubectl -n rook-ceph edit configmap rook-config-override Modify the settings and save. Each line you add should be indented from the config property as such: 1 2 3 4 5 6 7 8 9 10 apiVersion : v1 kind : ConfigMap metadata : name : rook-config-override namespace : rook-ceph data : config : | [global] osd crush update on start = false osd pool default size = 2 Custom CSI ceph.conf Settings \u00b6 Warning It is highly recommended to use the default setting that comes with CephCSI and this can only be used when absolutely necessary. The ceph.conf should be reset back to default values if/when the configurations are no longer necessary. If the csi-ceph-conf-override ConfigMap is created before the cluster is started, the CephCSI pods will automatically pick up the settings. If you add the settings to the ConfigMap after the cluster has been initialized, you can restart the Rook operator pod and wait for Rook to recreate CSI pods to take immediate effect. After the CSI pods are restarted, the new settings should be in effect. Example CSI ceph.conf Settings \u00b6 In this Example we will set the rbd_validate_pool to false to skip rbd pool validation. Warning Modify Ceph settings carefully to avoid modifying the default configuration. Changing the settings could result in unexpected results if used incorrectly. 1 kubectl create -f csi-ceph-conf-override.yaml Restart the Rook operator pod and wait for CSI pods to be recreated. OSD CRUSH Settings \u00b6 A useful view of the CRUSH Map is generated with the following command: 1 ceph osd tree In this section we will be tweaking some of the values seen in the output. OSD Weight \u00b6 The CRUSH weight controls the ratio of data that should be distributed to each OSD. This also means a higher or lower amount of disk I/O operations for an OSD with higher/lower weight, respectively. By default OSDs get a weight relative to their storage capacity, which maximizes overall cluster capacity by filling all drives at the same rate, even if drive sizes vary. This should work for most use-cases, but the following situations could warrant weight changes: Your cluster has some relatively slow OSDs or nodes. Lowering their weight can reduce the impact of this bottleneck. You're using bluestore drives provisioned with Rook v0.3.1 or older. In this case you may notice OSD weights did not get set relative to their storage capacity. Changing the weight can fix this and maximize cluster capacity. This example sets the weight of osd.0 which is 600GiB 1 ceph osd crush reweight osd.0 .600 OSD Primary Affinity \u00b6 When pools are set with a size setting greater than one, data is replicated between nodes and OSDs. For every chunk of data a Primary OSD is selected to be used for reading that data to be sent to clients. You can control how likely it is for an OSD to become a Primary using the Primary Affinity setting. This is similar to the OSD weight setting, except it only affects reads on the storage device, not capacity or writes. In this example we will ensure that osd.0 is only selected as Primary if all other OSDs holding data replicas are unavailable: 1 ceph osd primary-affinity osd.0 0 OSD Dedicated Network \u00b6 It is possible to configure ceph to leverage a dedicated network for the OSDs to communicate across. A useful overview is the CEPH Networks section of the Ceph documentation. If you declare a cluster network, OSDs will route heartbeat, object replication and recovery traffic over the cluster network. This may improve performance compared to using a single network, especially when slower network technologies are used, with the tradeoff of additional expense and subtle failure modes. Two changes are necessary to the configuration to enable this capability: Use hostNetwork in the cluster configuration \u00b6 Enable the hostNetwork setting in the Ceph Cluster CRD configuration . For example, 1 2 network : provider : host Important Changing this setting is not supported in a running Rook cluster. Host networking should be configured when the cluster is first created. Define the subnets to use for public and private OSD networks \u00b6 Edit the rook-config-override configmap to define the custom network configuration: 1 kubectl -n rook-ceph edit configmap rook-config-override In the editor, add a custom configuration to instruct ceph which subnet is the public network and which subnet is the private network. For example: 1 2 3 4 5 6 7 8 apiVersion : v1 data : config : | [global] public network = 10.0.7.0/24 cluster network = 10.0.10.0/24 public addr = \"\" cluster addr = \"\" After applying the updated rook-config-override configmap, it will be necessary to restart the OSDs by deleting the OSD pods in order to apply the change. Restart the OSD pods by deleting them, one at a time, and running ceph -s between each restart to ensure the cluster goes back to \"active/clean\" state. Phantom OSD Removal \u00b6 If you have OSDs in which are not showing any disks, you can remove those \"Phantom OSDs\" by following the instructions below. To check for \"Phantom OSDs\", you can run (example output): 1 2 3 4 5 6 $ ceph osd tree ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF -1 57.38062 root default -13 7.17258 host node1.example.com 2 hdd 3.61859 osd.2 up 1.00000 1.00000 -7 0 host node2.example.com down 0 1.00000 The host node2.example.com in the output has no disks, so it is most likely a \"Phantom OSD\". Now to remove it, use the ID in the first column of the output and replace  with it. In the example output above the ID would be -7 . The commands are: 1 2 3 4 ceph osd out  ceph osd crush remove osd. ceph auth del osd. ceph osd rm  To recheck that the Phantom OSD was removed, re-run the following command and check if the OSD with the ID doesn't show up anymore: 1 ceph osd tree Auto Expansion of OSDs \u00b6 Prerequisites for Auto Expansion of OSDs \u00b6 1) A PVC-based cluster deployed in dynamic provisioning environment with a storageClassDeviceSet . 2) Create the Rook Toolbox . Note Prometheus Operator and [Prometheus ../Monitoring/ceph-monitoring.mdnitoring.md#prometheus-instances) are Prerequisites that are created by the auto-grow-storage script. To scale OSDs Vertically \u00b6 Run the following script to auto-grow the size of OSDs on a PVC-based Rook cluster whenever the OSDs have reached the storage near-full threshold. 1 tests/scripts/auto-grow-storage.sh size --max maxSize --growth-rate percent growth-rate percentage represents the percent increase you want in the OSD capacity and maxSize represent the maximum disk size. For example, if you need to increase the size of OSD by 30% and max disk size is 1Ti 1 ./auto-grow-storage.sh size --max 1Ti --growth-rate 30 To scale OSDs Horizontally \u00b6 Run the following script to auto-grow the number of OSDs on a PVC-based Rook cluster whenever the OSDs have reached the storage near-full threshold. 1 tests/scripts/auto-grow-storage.sh count --max maxCount --count rate Count of OSD represents the number of OSDs you need to add and maxCount represents the number of disks a storage cluster will support. For example, if you need to increase the number of OSDs by 3 and maxCount is 10 1 ./auto-grow-storage.sh count --max 10 --count 3","title":"Ceph Configuration"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#prerequisites","text":"Most of the examples make use of the ceph client command. A quick way to use the Ceph client suite is from a Rook Toolbox container . The Kubernetes based examples assume Rook OSD pods are in the rook-ceph namespace. If you run them in a different namespace, modify kubectl -n rook-ceph [...] to fit your situation.","title":"Prerequisites"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#using-alternate-namespaces","text":"If you wish to deploy the Rook Operator and/or Ceph clusters to namespaces other than the default rook-ceph , the manifests are commented to allow for easy sed replacements. Change ROOK_CLUSTER_NAMESPACE to tailor the manifests for additional Ceph clusters. You can choose to also change ROOK_OPERATOR_NAMESPACE to create a new Rook Operator for each Ceph cluster (don't forget to set ROOK_CURRENT_NAMESPACE_ONLY ), or you can leave it at the same value for every Ceph cluster if you only wish to have one Operator manage all Ceph clusters. This will help you manage namespaces more easily, but you should still make sure the resources are configured to your liking. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 cd deploy/examples export ROOK_OPERATOR_NAMESPACE=\"rook-ceph\" export ROOK_CLUSTER_NAMESPACE=\"rook-ceph\" sed -i.bak \\ -e \"s/\\(.*\\):.*# namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE # namespace:operator/g\" \\ -e \"s/\\(.*\\):.*# namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE # namespace:cluster/g\" \\ -e \"s/\\(.*serviceaccount\\):.*:\\(.*\\) # serviceaccount:namespace:operator/\\1:$ROOK_OPERATOR_NAMESPACE:\\2 # serviceaccount:namespace:operator/g\" \\ -e \"s/\\(.*serviceaccount\\):.*:\\(.*\\) # serviceaccount:namespace:cluster/\\1:$ROOK_CLUSTER_NAMESPACE:\\2 # serviceaccount:namespace:cluster/g\" \\ -e \"s/\\(.*\\): [-_A-Za-z0-9]*\\.\\(.*\\) # driver:namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE.\\2 # driver:namespace:operator/g\" \\ -e \"s/\\(.*\\): [-_A-Za-z0-9]*\\.\\(.*\\) # driver:namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE.\\2 # driver:namespace:cluster/g\" \\ common.yaml operator.yaml cluster.yaml # add other files or change these as desired for your config # You need to use ` apply ` for all Ceph clusters after the first if you have only one Operator kubectl apply -f common.yaml -f operator.yaml -f cluster.yaml # add other files as desired for yourconfig","title":"Using alternate namespaces"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#deploying-a-second-cluster","text":"If you wish to create a new CephCluster in a different namespace than rook-ceph while using a single operator to manage both clusters execute the following: 1 2 3 cd deploy/examples NAMESPACE=rook-ceph-secondary envsubst < common-second-cluster.yaml | kubectl create -f - This will create all the necessary RBACs as well as the new namespace. The script assumes that common.yaml was already created. When you create the second CephCluster CR, use the same NAMESPACE and the operator will configure the second cluster.","title":"Deploying a second cluster"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#log-collection","text":"All Rook logs can be collected in a Kubernetes environment with the following command: 1 2 3 4 5 6 7 8 9 for p in $(kubectl -n rook-ceph get pods -o jsonpath='{.items[*].metadata.name}') do for c in $(kubectl -n rook-ceph get pod ${p} -o jsonpath='{.spec.containers[*].name}') do echo \"BEGIN logs from pod: ${p} ${c}\" kubectl -n rook-ceph logs -c ${c} ${p} echo \"END logs from pod: ${p} ${c}\" done done This gets the logs for every container in every Rook pod and then compresses them into a .gz archive for easy sharing. Note that instead of gzip , you could instead pipe to less or to a single text file.","title":"Log Collection"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#osd-information","text":"Keeping track of OSDs and their underlying storage devices can be difficult. The following scripts will clear things up quickly.","title":"OSD Information"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#kubernetes","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 # Get OSD Pods # This uses the example/default cluster name \"rook\" OSD_PODS=$(kubectl get pods --all-namespaces -l \\ app=rook-ceph-osd,rook_cluster=rook-ceph -o jsonpath='{.items[*].metadata.name}') # Find node and drive associations from OSD pods for pod in $(echo ${OSD_PODS}) do echo \"Pod: ${pod}\" echo \"Node: $(kubectl -n rook-ceph get pod ${pod} -o jsonpath='{.spec.nodeName}')\" kubectl -n rook-ceph exec ${pod} -- sh -c '\\ for i in /var/lib/ceph/osd/ceph-*; do [ -f ${i}/ready ] || continue echo -ne \"-$(basename ${i}) \" echo $(lsblk -n -o NAME,SIZE ${i}/block 2> /dev/null || \\ findmnt -n -v -o SOURCE,SIZE -T ${i}) $(cat ${i}/type) done | sort -V echo' done The output should look something like this. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 Pod: osd-m2fz2 Node: node1.zbrbdl -osd0 sda3 557.3G bluestore -osd1 sdf3 110.2G bluestore -osd2 sdd3 277.8G bluestore -osd3 sdb3 557.3G bluestore -osd4 sde3 464.2G bluestore -osd5 sdc3 557.3G bluestore Pod: osd-nxxnq Node: node3.zbrbdl -osd6 sda3 110.7G bluestore -osd17 sdd3 1.8T bluestore -osd18 sdb3 231.8G bluestore -osd19 sdc3 231.8G bluestore Pod: osd-tww1h Node: node2.zbrbdl -osd7 sdc3 464.2G bluestore -osd8 sdj3 557.3G bluestore -osd9 sdf3 66.7G bluestore -osd10 sdd3 464.2G bluestore -osd11 sdb3 147.4G bluestore -osd12 sdi3 557.3G bluestore -osd13 sdk3 557.3G bluestore -osd14 sde3 66.7G bluestore -osd15 sda3 110.2G bluestore -osd16 sdh3 135.1G bluestore","title":"Kubernetes"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#separate-storage-groups","text":"Attention It is deprecated to manually need to set this , the deviceClass property can be used on Pool structures in CephBlockPool , CephFilesystem and CephObjectStore CRD objects. By default Rook/Ceph puts all storage under one replication rule in the CRUSH Map which provides the maximum amount of storage capacity for a cluster. If you would like to use different storage endpoints for different purposes, you'll have to create separate storage groups. In the following example we will separate SSD drives from spindle-based drives, a common practice for those looking to target certain workloads onto faster (database) or slower (file archive) storage.","title":"Separate Storage Groups"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#configuring-pools","text":"","title":"Configuring Pools"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#placement-group-sizing","text":"Note Since Ceph Nautilus (v14.x), you can use the Ceph MGR pg_autoscaler module to auto scale the PGs as needed. It is highly advisable to configure default pg_num value on per-pool basis, If you want to enable this feature, please refer to Default PG and PGP counts . The general rules for deciding how many PGs your pool(s) should contain is: Fewer than 5 OSDs set pg_num to 128 Between 5 and 10 OSDs set pg_num to 512 Between 10 and 50 OSDs set pg_num to 1024 If you have more than 50 OSDs, you need to understand the tradeoffs and how to calculate the pg_num value by yourself. For calculating pg_num yourself please make use of the pgcalc tool .","title":"Placement Group Sizing"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#setting-pg-count","text":"Be sure to read the placement group sizing section before changing the number of PGs. 1 2 # Set the number of PGs in the rbd pool to 512 ceph osd pool set rbd pg_num 512","title":"Setting PG Count"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#custom-cephconf-settings","text":"Warning The advised method for controlling Ceph configuration is to manually use the Ceph CLI or the Ceph dashboard because this offers the most flexibility. It is highly recommended that this only be used when absolutely necessary and that the config be reset to an empty string if/when the configurations are no longer necessary. Configurations in the config file will make the Ceph cluster less configurable from the CLI and dashboard and may make future tuning or debugging difficult. Setting configs via Ceph's CLI requires that at least one mon be available for the configs to be set, and setting configs via dashboard requires at least one mgr to be available. Ceph also has a number of very advanced settings that cannot be modified easily via the CLI or dashboard. In order to set configurations before monitors are available or to set advanced configuration settings, the rook-config-override ConfigMap exists, and the config field can be set with the contents of a ceph.conf file. The contents will be propagated to all mon, mgr, OSD, MDS, and RGW daemons as an /etc/ceph/ceph.conf file. Warning Rook performs no validation on the config, so the validity of the settings is the user's responsibility. If the rook-config-override ConfigMap is created before the cluster is started, the Ceph daemons will automatically pick up the settings. If you add the settings to the ConfigMap after the cluster has been initialized, each daemon will need to be restarted where you want the settings applied: mons: ensure all three mons are online and healthy before restarting each mon pod, one at a time. mgrs: the pods are stateless and can be restarted as needed, but note that this will disrupt the Ceph dashboard during restart. OSDs: restart your the pods by deleting them, one at a time, and running ceph -s between each restart to ensure the cluster goes back to \"active/clean\" state. RGW: the pods are stateless and can be restarted as needed. MDS: the pods are stateless and can be restarted as needed. After the pod restart, the new settings should be in effect. Note that if the ConfigMap in the Ceph cluster's namespace is created before the cluster is created, the daemons will pick up the settings at first launch. To automate the restart of the Ceph daemon pods, you will need to trigger an update to the pod specs. The simplest way to trigger the update is to add annotations or labels to the CephCluster CR for the daemons you want to restart. The operator will then proceed with a rolling update, similar to any other update to the cluster.","title":"Custom ceph.conf Settings"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#example","text":"In this example we will set the default pool size to two, and tell OSD daemons not to change the weight of OSDs on startup. Warning Modify Ceph settings carefully. You are leaving the sandbox tested by Rook. Changing the settings could result in unhealthy daemons or even data loss if used incorrectly. When the Rook Operator creates a cluster, a placeholder ConfigMap is created that will allow you to override Ceph configuration settings. When the daemon pods are started, the settings specified in this ConfigMap will be merged with the default settings generated by Rook. The default override settings are blank. Cutting out the extraneous properties, we would see the following defaults after creating a cluster: 1 kubectl -n rook-ceph get ConfigMap rook-config-override -o yaml 1 2 3 4 5 6 7 kind : ConfigMap apiVersion : v1 metadata : name : rook-config-override namespace : rook-ceph data : config : \"\" To apply your desired configuration, you will need to update this ConfigMap. The next time the daemon pod(s) start, they will use the updated configs. 1 kubectl -n rook-ceph edit configmap rook-config-override Modify the settings and save. Each line you add should be indented from the config property as such: 1 2 3 4 5 6 7 8 9 10 apiVersion : v1 kind : ConfigMap metadata : name : rook-config-override namespace : rook-ceph data : config : | [global] osd crush update on start = false osd pool default size = 2","title":"Example"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#custom-csi-cephconf-settings","text":"Warning It is highly recommended to use the default setting that comes with CephCSI and this can only be used when absolutely necessary. The ceph.conf should be reset back to default values if/when the configurations are no longer necessary. If the csi-ceph-conf-override ConfigMap is created before the cluster is started, the CephCSI pods will automatically pick up the settings. If you add the settings to the ConfigMap after the cluster has been initialized, you can restart the Rook operator pod and wait for Rook to recreate CSI pods to take immediate effect. After the CSI pods are restarted, the new settings should be in effect.","title":"Custom CSI ceph.conf Settings"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#example-csi-cephconf-settings","text":"In this Example we will set the rbd_validate_pool to false to skip rbd pool validation. Warning Modify Ceph settings carefully to avoid modifying the default configuration. Changing the settings could result in unexpected results if used incorrectly. 1 kubectl create -f csi-ceph-conf-override.yaml Restart the Rook operator pod and wait for CSI pods to be recreated.","title":"Example CSI ceph.conf Settings"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#osd-crush-settings","text":"A useful view of the CRUSH Map is generated with the following command: 1 ceph osd tree In this section we will be tweaking some of the values seen in the output.","title":"OSD CRUSH Settings"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#osd-weight","text":"The CRUSH weight controls the ratio of data that should be distributed to each OSD. This also means a higher or lower amount of disk I/O operations for an OSD with higher/lower weight, respectively. By default OSDs get a weight relative to their storage capacity, which maximizes overall cluster capacity by filling all drives at the same rate, even if drive sizes vary. This should work for most use-cases, but the following situations could warrant weight changes: Your cluster has some relatively slow OSDs or nodes. Lowering their weight can reduce the impact of this bottleneck. You're using bluestore drives provisioned with Rook v0.3.1 or older. In this case you may notice OSD weights did not get set relative to their storage capacity. Changing the weight can fix this and maximize cluster capacity. This example sets the weight of osd.0 which is 600GiB 1 ceph osd crush reweight osd.0 .600","title":"OSD Weight"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#osd-primary-affinity","text":"When pools are set with a size setting greater than one, data is replicated between nodes and OSDs. For every chunk of data a Primary OSD is selected to be used for reading that data to be sent to clients. You can control how likely it is for an OSD to become a Primary using the Primary Affinity setting. This is similar to the OSD weight setting, except it only affects reads on the storage device, not capacity or writes. In this example we will ensure that osd.0 is only selected as Primary if all other OSDs holding data replicas are unavailable: 1 ceph osd primary-affinity osd.0 0","title":"OSD Primary Affinity"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#osd-dedicated-network","text":"It is possible to configure ceph to leverage a dedicated network for the OSDs to communicate across. A useful overview is the CEPH Networks section of the Ceph documentation. If you declare a cluster network, OSDs will route heartbeat, object replication and recovery traffic over the cluster network. This may improve performance compared to using a single network, especially when slower network technologies are used, with the tradeoff of additional expense and subtle failure modes. Two changes are necessary to the configuration to enable this capability:","title":"OSD Dedicated Network"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#use-hostnetwork-in-the-cluster-configuration","text":"Enable the hostNetwork setting in the Ceph Cluster CRD configuration . For example, 1 2 network : provider : host Important Changing this setting is not supported in a running Rook cluster. Host networking should be configured when the cluster is first created.","title":"Use hostNetwork in the cluster configuration"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#define-the-subnets-to-use-for-public-and-private-osd-networks","text":"Edit the rook-config-override configmap to define the custom network configuration: 1 kubectl -n rook-ceph edit configmap rook-config-override In the editor, add a custom configuration to instruct ceph which subnet is the public network and which subnet is the private network. For example: 1 2 3 4 5 6 7 8 apiVersion : v1 data : config : | [global] public network = 10.0.7.0/24 cluster network = 10.0.10.0/24 public addr = \"\" cluster addr = \"\" After applying the updated rook-config-override configmap, it will be necessary to restart the OSDs by deleting the OSD pods in order to apply the change. Restart the OSD pods by deleting them, one at a time, and running ceph -s between each restart to ensure the cluster goes back to \"active/clean\" state.","title":"Define the subnets to use for public and private OSD networks"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#phantom-osd-removal","text":"If you have OSDs in which are not showing any disks, you can remove those \"Phantom OSDs\" by following the instructions below. To check for \"Phantom OSDs\", you can run (example output): 1 2 3 4 5 6 $ ceph osd tree ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF -1 57.38062 root default -13 7.17258 host node1.example.com 2 hdd 3.61859 osd.2 up 1.00000 1.00000 -7 0 host node2.example.com down 0 1.00000 The host node2.example.com in the output has no disks, so it is most likely a \"Phantom OSD\". Now to remove it, use the ID in the first column of the output and replace  with it. In the example output above the ID would be -7 . The commands are: 1 2 3 4 ceph osd out  ceph osd crush remove osd. ceph auth del osd. ceph osd rm  To recheck that the Phantom OSD was removed, re-run the following command and check if the OSD with the ID doesn't show up anymore: 1 ceph osd tree","title":"Phantom OSD Removal"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#auto-expansion-of-osds","text":"","title":"Auto Expansion of OSDs"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#prerequisites-for-auto-expansion-of-osds","text":"1) A PVC-based cluster deployed in dynamic provisioning environment with a storageClassDeviceSet . 2) Create the Rook Toolbox . Note Prometheus Operator and [Prometheus ../Monitoring/ceph-monitoring.mdnitoring.md#prometheus-instances) are Prerequisites that are created by the auto-grow-storage script.","title":"Prerequisites for Auto Expansion of OSDs"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#to-scale-osds-vertically","text":"Run the following script to auto-grow the size of OSDs on a PVC-based Rook cluster whenever the OSDs have reached the storage near-full threshold. 1 tests/scripts/auto-grow-storage.sh size --max maxSize --growth-rate percent growth-rate percentage represents the percent increase you want in the OSD capacity and maxSize represent the maximum disk size. For example, if you need to increase the size of OSD by 30% and max disk size is 1Ti 1 ./auto-grow-storage.sh size --max 1Ti --growth-rate 30","title":"To scale OSDs Vertically"},{"location":"Storage-Configuration/Advanced/ceph-configuration/#to-scale-osds-horizontally","text":"Run the following script to auto-grow the number of OSDs on a PVC-based Rook cluster whenever the OSDs have reached the storage near-full threshold. 1 tests/scripts/auto-grow-storage.sh count --max maxCount --count rate Count of OSD represents the number of OSDs you need to add and maxCount represents the number of disks a storage cluster will support. For example, if you need to increase the number of OSDs by 3 and maxCount is 10 1 ./auto-grow-storage.sh count --max 10 --count 3","title":"To scale OSDs Horizontally"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/","text":"Failure in a distributed system is to be expected. Ceph was designed from the ground up to deal with the failures of a distributed system. At the next layer, Rook was designed from the ground up to automate recovery of Ceph components that traditionally required admin intervention. Monitor health is the most critical piece of the equation that Rook actively monitors. If they are not in a good state, the operator will take action to restore their health and keep your cluster protected from disaster. The Ceph monitors (mons) are the brains of the distributed cluster. They control all of the metadata that is necessary to store and retrieve your data as well as keep it safe. If the monitors are not in a healthy state you will risk losing all the data in your system. Monitor Identity \u00b6 Each monitor in a Ceph cluster has a static identity. Every component in the cluster is aware of the identity, and that identity must be immutable. The identity of a mon is its IP address. To have an immutable IP address in Kubernetes, Rook creates a K8s service for each monitor. The clusterIP of the service will act as the stable identity. When a monitor pod starts, it will bind to its podIP and it will expect communication to be via its service IP address. Monitor Quorum \u00b6 Multiple mons work together to provide redundancy by each keeping a copy of the metadata. A variation of the distributed algorithm Paxos is used to establish consensus about the state of the cluster. Paxos requires a super-majority of mons to be running in order to establish quorum and perform operations in the cluster. If the majority of mons are not running, quorum is lost and nothing can be done in the cluster. How many mons? \u00b6 Most commonly a cluster will have three mons. This would mean that one mon could go down and allow the cluster to remain healthy. You would still have 2/3 mons running to give you consensus in the cluster for any operation. For highest availability, an odd number of mons is required. Fifty percent of mons will not be sufficient to maintain quorum. If you had two mons and one of them went down, you would have 1/2 of quorum. Since that is not a super-majority, the cluster would have to wait until the second mon is up again. Rook allows an even number of mons for higher durability. See the disaster recovery guide if quorum is lost and to recover mon quorum from a single mon. The number of mons to create in a cluster depends on your tolerance for losing a node. If you have 1 mon zero nodes can be lost to maintain quorum. With 3 mons one node can be lost, and with 5 mons two nodes can be lost. Because the Rook operator will automatically start a new monitor if one dies, you typically only need three mons. The more mons you have, the more overhead there will be to make a change to the cluster, which could become a performance issue in a large cluster. Mitigating Monitor Failure \u00b6 Whatever the reason that a mon may fail (power failure, software crash, software hang, etc), there are several layers of mitigation in place to help recover the mon. It is always better to bring an existing mon back up than to failover to bring up a new mon. The Rook operator creates a mon with a Deployment to ensure that the mon pod will always be restarted if it fails. If a mon pod stops for any reason, Kubernetes will automatically start the pod up again. In order for a mon to support a pod/node restart, the mon metadata is persisted to disk, either under the dataDirHostPath specified in the CephCluster CR, or in the volume defined by the volumeClaimTemplate in the CephCluster CR. This will allow the mon to start back up with its existing metadata and continue where it left off even if the pod had to be re-created. Without this persistence, the mon cannot restart. Failing over a Monitor \u00b6 If a mon is unhealthy and the K8s pod restart or liveness probe are not sufficient to bring a mon back up, the operator will make the decision to terminate the unhealthy monitor deployment and bring up a new monitor with a new identity. This is an operation that must be done while mon quorum is maintained by other mons in the cluster. The operator checks for mon health every 45 seconds. If a monitor is down, the operator will wait 10 minutes before failing over the unhealthy mon. These two intervals can be configured as parameters to the CephCluster CR (see below). If the intervals are too short, it could be unhealthy if the mons are failed over too aggressively. If the intervals are too long, the cluster could be at risk of losing quorum if a new monitor is not brought up before another mon fails. 1 2 3 4 5 6 healthCheck : daemonHealth : mon : disabled : false interval : 45s timeout : 10m If you want to force a mon to failover for testing or other purposes, you can scale down the mon deployment to 0, then wait for the timeout. Note that the operator may scale up the mon again automatically if the operator is restarted or if a full reconcile is triggered, such as when the CephCluster CR is updated. If the mon pod is in pending state and couldn't be assigned to a node (say, due to node drain), then the operator will wait for the timeout again before the mon failover. So the timeout waiting for the mon failover will be doubled in this case. To disable monitor automatic failover, the timeout can be set to 0 , if the monitor goes out of quorum Rook will never fail it over onto another node. This is especially useful for planned maintenance. Example Failover \u00b6 Rook will create mons with pod names such as mon-a, mon-b, and mon-c. Let's say mon-b had an issue and the pod failed. 1 2 3 4 5 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-74dc96545-ch5ns 1/1 Running 0 9m rook-ceph-mon-b-6b9d895c4c-bcl2h 1/1 Error 2 9m rook-ceph-mon-c-7d6df6d65c-5cjwl 1/1 Running 0 8m After a failover, you will see the unhealthy mon removed and a new mon added such as mon-d. A fully healthy mon quorum is now running again. 1 2 3 4 5 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-74dc96545-ch5ns 1/1 Running 0 19m rook-ceph-mon-c-7d6df6d65c-5cjwl 1/1 Running 0 18m rook-ceph-mon-d-9e7ea7e76d-4bhxm 1/1 Running 0 20s From the toolbox we can verify the status of the health mon quorum: 1 2 3 4 5 6 7 8 9 10 $ ceph -s cluster: id: 35179270-8a39-4e08-a352-a10c52bb04ff health: HEALTH_OK services: mon: 3 daemons, quorum a,b,d (age 2m) mgr: a(active, since 12m) osd: 3 osds: 3 up (since 10m), 3 in (since 10m) [...]","title":"Monitor Health"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#monitor-identity","text":"Each monitor in a Ceph cluster has a static identity. Every component in the cluster is aware of the identity, and that identity must be immutable. The identity of a mon is its IP address. To have an immutable IP address in Kubernetes, Rook creates a K8s service for each monitor. The clusterIP of the service will act as the stable identity. When a monitor pod starts, it will bind to its podIP and it will expect communication to be via its service IP address.","title":"Monitor Identity"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#monitor-quorum","text":"Multiple mons work together to provide redundancy by each keeping a copy of the metadata. A variation of the distributed algorithm Paxos is used to establish consensus about the state of the cluster. Paxos requires a super-majority of mons to be running in order to establish quorum and perform operations in the cluster. If the majority of mons are not running, quorum is lost and nothing can be done in the cluster.","title":"Monitor Quorum"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#how-many-mons","text":"Most commonly a cluster will have three mons. This would mean that one mon could go down and allow the cluster to remain healthy. You would still have 2/3 mons running to give you consensus in the cluster for any operation. For highest availability, an odd number of mons is required. Fifty percent of mons will not be sufficient to maintain quorum. If you had two mons and one of them went down, you would have 1/2 of quorum. Since that is not a super-majority, the cluster would have to wait until the second mon is up again. Rook allows an even number of mons for higher durability. See the disaster recovery guide if quorum is lost and to recover mon quorum from a single mon. The number of mons to create in a cluster depends on your tolerance for losing a node. If you have 1 mon zero nodes can be lost to maintain quorum. With 3 mons one node can be lost, and with 5 mons two nodes can be lost. Because the Rook operator will automatically start a new monitor if one dies, you typically only need three mons. The more mons you have, the more overhead there will be to make a change to the cluster, which could become a performance issue in a large cluster.","title":"How many mons?"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#mitigating-monitor-failure","text":"Whatever the reason that a mon may fail (power failure, software crash, software hang, etc), there are several layers of mitigation in place to help recover the mon. It is always better to bring an existing mon back up than to failover to bring up a new mon. The Rook operator creates a mon with a Deployment to ensure that the mon pod will always be restarted if it fails. If a mon pod stops for any reason, Kubernetes will automatically start the pod up again. In order for a mon to support a pod/node restart, the mon metadata is persisted to disk, either under the dataDirHostPath specified in the CephCluster CR, or in the volume defined by the volumeClaimTemplate in the CephCluster CR. This will allow the mon to start back up with its existing metadata and continue where it left off even if the pod had to be re-created. Without this persistence, the mon cannot restart.","title":"Mitigating Monitor Failure"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#failing-over-a-monitor","text":"If a mon is unhealthy and the K8s pod restart or liveness probe are not sufficient to bring a mon back up, the operator will make the decision to terminate the unhealthy monitor deployment and bring up a new monitor with a new identity. This is an operation that must be done while mon quorum is maintained by other mons in the cluster. The operator checks for mon health every 45 seconds. If a monitor is down, the operator will wait 10 minutes before failing over the unhealthy mon. These two intervals can be configured as parameters to the CephCluster CR (see below). If the intervals are too short, it could be unhealthy if the mons are failed over too aggressively. If the intervals are too long, the cluster could be at risk of losing quorum if a new monitor is not brought up before another mon fails. 1 2 3 4 5 6 healthCheck : daemonHealth : mon : disabled : false interval : 45s timeout : 10m If you want to force a mon to failover for testing or other purposes, you can scale down the mon deployment to 0, then wait for the timeout. Note that the operator may scale up the mon again automatically if the operator is restarted or if a full reconcile is triggered, such as when the CephCluster CR is updated. If the mon pod is in pending state and couldn't be assigned to a node (say, due to node drain), then the operator will wait for the timeout again before the mon failover. So the timeout waiting for the mon failover will be doubled in this case. To disable monitor automatic failover, the timeout can be set to 0 , if the monitor goes out of quorum Rook will never fail it over onto another node. This is especially useful for planned maintenance.","title":"Failing over a Monitor"},{"location":"Storage-Configuration/Advanced/ceph-mon-health/#example-failover","text":"Rook will create mons with pod names such as mon-a, mon-b, and mon-c. Let's say mon-b had an issue and the pod failed. 1 2 3 4 5 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-74dc96545-ch5ns 1/1 Running 0 9m rook-ceph-mon-b-6b9d895c4c-bcl2h 1/1 Error 2 9m rook-ceph-mon-c-7d6df6d65c-5cjwl 1/1 Running 0 8m After a failover, you will see the unhealthy mon removed and a new mon added such as mon-d. A fully healthy mon quorum is now running again. 1 2 3 4 5 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-74dc96545-ch5ns 1/1 Running 0 19m rook-ceph-mon-c-7d6df6d65c-5cjwl 1/1 Running 0 18m rook-ceph-mon-d-9e7ea7e76d-4bhxm 1/1 Running 0 20s From the toolbox we can verify the status of the health mon quorum: 1 2 3 4 5 6 7 8 9 10 $ ceph -s cluster: id: 35179270-8a39-4e08-a352-a10c52bb04ff health: HEALTH_OK services: mon: 3 daemons, quorum a,b,d (age 2m) mgr: a(active, since 12m) osd: 3 osds: 3 up (since 10m), 3 in (since 10m) [...]","title":"Example Failover"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/","text":"Ceph Object Storage Daemons (OSDs) are the heart and soul of the Ceph storage platform. Each OSD manages a local device and together they provide the distributed storage. Rook will automate creation and management of OSDs to hide the complexity based on the desired state in the CephCluster CR as much as possible. This guide will walk through some of the scenarios to configure OSDs where more configuration may be required. OSD Health \u00b6 The rook-ceph-tools pod provides a simple environment to run Ceph tools. The ceph commands mentioned in this document should be run from the toolbox. Once the is created, connect to the pod to execute the ceph commands to analyze the health of the cluster, in particular the OSDs and placement groups (PGs). Some common commands to analyze OSDs include: 1 2 3 4 5 ceph status ceph osd tree ceph osd status ceph osd df ceph osd utilization 1 kubectl -n rook-ceph exec -it $(kubectl -n rook-ceph get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[0].metadata.name}') bash Add an OSD \u00b6 The QuickStart Guide will provide the basic steps to create a cluster and start some OSDs. For more details on the OSD settings also see the Cluster CRD documentation. If you are not seeing OSDs created, see the Ceph Troubleshooting Guide . To add more OSDs, Rook will automatically watch for new nodes and devices being added to your cluster. If they match the filters or other settings in the storage section of the cluster CR, the operator will create new OSDs. Add an OSD on a PVC \u00b6 In more dynamic environments where storage can be dynamically provisioned with a raw block storage provider, the OSDs can be backed by PVCs. See the storageClassDeviceSets documentation in the Cluster CRD topic. To add more OSDs, you can either increase the count of the OSDs in an existing device set or you can add more device sets to the cluster CR. The operator will then automatically create new OSDs according to the updated cluster CR. Remove an OSD \u00b6 To remove an OSD due to a failed disk or other re-configuration, consider the following to ensure the health of the data through the removal process: Confirm you will have enough space on your cluster after removing your OSDs to properly handle the deletion Confirm the remaining OSDs and their placement groups (PGs) are healthy in order to handle the rebalancing of the data Do not remove too many OSDs at once Wait for rebalancing between removing multiple OSDs If all the PGs are active+clean and there are no warnings about being low on space, this means the data is fully replicated and it is safe to proceed. If an OSD is failing, the PGs will not be perfectly clean and you will need to proceed anyway. Host-based cluster \u00b6 Update your CephCluster CR. Depending on your CR settings, you may need to remove the device from the list or update the device filter. If you are using useAllDevices: true , no change to the CR is necessary. Important On host-based clusters, you may need to stop the Rook Operator while performing OSD removal steps in order to prevent Rook from detecting the old OSD and trying to re-create it before the disk is wiped or removed. To stop the Rook Operator, run: 1 kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas=0 You must perform steps below to (1) purge the OSD and either (2.a) delete the underlying data or (2.b)replace the disk before starting the Rook Operator again. Once you have done that, you can start the Rook operator again with: 1 kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas=1 PVC-based cluster \u00b6 To reduce the storage in your cluster or remove a failed OSD on a PVC: Shrink the number of OSDs in the storageClassDeviceSets in the CephCluster CR. If you have multiple device sets, you may need to change the index of 0 in this example path. kubectl -n rook-ceph patch CephCluster rook-ceph --type=json -p '[{\"op\": \"replace\", \"path\": \"/spec/storage/storageClassDeviceSets/0/count\", \"value\":}]' Reduce the count of the OSDs to the desired number. Rook will not take any action to automatically remove the extra OSD(s). Identify the PVC that belongs to the OSD that is failed or otherwise being removed. kubectl -n rook-ceph get pvc -l ceph.rook.io/DeviceSet= Identify the OSD you desire to remove. The OSD assigned to the PVC can be found in the labels on the PVC kubectl -n rook-ceph get pod -l ceph.rook.io/pvc= -o yaml | grep ceph-osd-id For example, this might return: ceph-osd-id: \"0\" Remember the OSD ID for purging the OSD below If you later increase the count in the device set, note that the operator will create PVCs with the highest index that is not currently in use by existing OSD PVCs. Confirm the OSD is down \u00b6 If you want to remove an unhealthy OSD, the osd pod may be in an error state such as CrashLoopBackoff or the ceph commands in the toolbox may show which OSD is down . If you want to remove a healthy OSD, you should run the following commands: 1 2 3 $ kubectl -n rook-ceph scale deployment rook-ceph-osd- --replicas = 0 # Inside the toolbox $ ceph osd down osd. Purge the OSD with Krew \u00b6 Note The rook-ceph Krew plugin must be installed 1 2 3 4 5 6 7 8 9 kubectl rook-ceph rook purge-osd 0 --force # 2022-09-14 08:58:28.888431 I | rookcmd: starting Rook v1.10.0-alpha.0.164.gcb73f728c with arguments 'rook ceph osd remove --osd-ids=0 --force-osd-removal=true' # 2022-09-14 08:58:28.889217 I | rookcmd: flag values: --force-osd-removal=true, --help=false, --log-level=INFO, --operator-image=, --osd-ids=0, --preserve-pvc=false, --service-account= # 2022-09-14 08:58:28.889582 I | op-mon: parsing mon endpoints: b=10.106.118.240:6789 # 2022-09-14 08:58:28.898898 I | cephclient: writing config file /var/lib/rook/rook-ceph/rook-ceph.config # 2022-09-14 08:58:28.899567 I | cephclient: generated admin config in /var/lib/rook/rook-ceph # 2022-09-14 08:58:29.421345 I | cephosd: validating status of osd.0 --- Purge the OSD with a Job \u00b6 OSD removal can be automated with the example found in the rook-ceph-purge-osd job . In the osd-purge.yaml, change the  to the ID(s) of the OSDs you want to remove. Run the job: kubectl create -f osd-purge.yaml When the job is completed, review the logs to ensure success: kubectl -n rook-ceph logs -l app=rook-ceph-purge-osd When finished, you can delete the job: kubectl delete -f osd-purge.yaml If you want to remove OSDs by hand, continue with the following sections. However, we recommend you use the above-mentioned steps to avoid operation errors. Purge the OSD manually \u00b6 If the OSD purge job fails or you need fine-grained control of the removal, here are the individual commands that can be run from the toolbox. Detach the OSD PVC from Rook kubectl -n rook-ceph label pvc  ceph.rook.io/DeviceSetPVCId- Mark the OSD as out if not already marked as such by Ceph. This signals Ceph to start moving (backfilling) the data that was on that OSD to another OSD. ceph osd out osd. (for example if the OSD ID is 23 this would be ceph osd out osd.23 ) Wait for the data to finish backfilling to other OSDs. ceph status will indicate the backfilling is done when all of the PGs are active+clean . If desired, it's safe to remove the disk after that. Remove the OSD from the Ceph cluster ceph osd purge  --yes-i-really-mean-it Verify the OSD is removed from the node in the CRUSH map ceph osd tree The operator can automatically remove OSD deployments that are considered \"safe-to-destroy\" by Ceph. After the steps above, the OSD will be considered safe to remove since the data has all been moved to other OSDs. But this will only be done automatically by the operator if you have this setting in the cluster CR: 1 removeOSDsIfOutAndSafeToRemove : true Otherwise, you will need to delete the deployment directly: 1 kubectl delete deployment -n rook-ceph rook-ceph-osd- In PVC-based cluster, remove the orphaned PVC, if necessary. Delete the underlying data \u00b6 If you want to clean the device where the OSD was running, see in the instructions to wipe a disk on the Cleaning up a Cluster topic. Replace an OSD \u00b6 To replace a disk that has failed: Run the steps in the previous section to Remove an OSD . Replace the physical device and verify the new device is attached. Check if your cluster CR will find the new device. If you are using useAllDevices: true you can skip this step. If your cluster CR lists individual devices or uses a device filter you may need to update the CR. The operator ideally will automatically create the new OSD within a few minutes of adding the new device or updating the CR. If you don't see a new OSD automatically created, restart the operator (by deleting the operator pod) to trigger the OSD creation. Verify if the OSD is created on the node by running ceph osd tree from the toolbox. Note The OSD might have a different ID than the previous OSD that was replaced.","title":"Ceph OSD Management"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#osd-health","text":"The rook-ceph-tools pod provides a simple environment to run Ceph tools. The ceph commands mentioned in this document should be run from the toolbox. Once the is created, connect to the pod to execute the ceph commands to analyze the health of the cluster, in particular the OSDs and placement groups (PGs). Some common commands to analyze OSDs include: 1 2 3 4 5 ceph status ceph osd tree ceph osd status ceph osd df ceph osd utilization 1 kubectl -n rook-ceph exec -it $(kubectl -n rook-ceph get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[0].metadata.name}') bash","title":"OSD Health"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#add-an-osd","text":"The QuickStart Guide will provide the basic steps to create a cluster and start some OSDs. For more details on the OSD settings also see the Cluster CRD documentation. If you are not seeing OSDs created, see the Ceph Troubleshooting Guide . To add more OSDs, Rook will automatically watch for new nodes and devices being added to your cluster. If they match the filters or other settings in the storage section of the cluster CR, the operator will create new OSDs.","title":"Add an OSD"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#add-an-osd-on-a-pvc","text":"In more dynamic environments where storage can be dynamically provisioned with a raw block storage provider, the OSDs can be backed by PVCs. See the storageClassDeviceSets documentation in the Cluster CRD topic. To add more OSDs, you can either increase the count of the OSDs in an existing device set or you can add more device sets to the cluster CR. The operator will then automatically create new OSDs according to the updated cluster CR.","title":"Add an OSD on a PVC"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#remove-an-osd","text":"To remove an OSD due to a failed disk or other re-configuration, consider the following to ensure the health of the data through the removal process: Confirm you will have enough space on your cluster after removing your OSDs to properly handle the deletion Confirm the remaining OSDs and their placement groups (PGs) are healthy in order to handle the rebalancing of the data Do not remove too many OSDs at once Wait for rebalancing between removing multiple OSDs If all the PGs are active+clean and there are no warnings about being low on space, this means the data is fully replicated and it is safe to proceed. If an OSD is failing, the PGs will not be perfectly clean and you will need to proceed anyway.","title":"Remove an OSD"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#host-based-cluster","text":"Update your CephCluster CR. Depending on your CR settings, you may need to remove the device from the list or update the device filter. If you are using useAllDevices: true , no change to the CR is necessary. Important On host-based clusters, you may need to stop the Rook Operator while performing OSD removal steps in order to prevent Rook from detecting the old OSD and trying to re-create it before the disk is wiped or removed. To stop the Rook Operator, run: 1 kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas=0 You must perform steps below to (1) purge the OSD and either (2.a) delete the underlying data or (2.b)replace the disk before starting the Rook Operator again. Once you have done that, you can start the Rook operator again with: 1 kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas=1","title":"Host-based cluster"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#pvc-based-cluster","text":"To reduce the storage in your cluster or remove a failed OSD on a PVC: Shrink the number of OSDs in the storageClassDeviceSets in the CephCluster CR. If you have multiple device sets, you may need to change the index of 0 in this example path. kubectl -n rook-ceph patch CephCluster rook-ceph --type=json -p '[{\"op\": \"replace\", \"path\": \"/spec/storage/storageClassDeviceSets/0/count\", \"value\":}]' Reduce the count of the OSDs to the desired number. Rook will not take any action to automatically remove the extra OSD(s). Identify the PVC that belongs to the OSD that is failed or otherwise being removed. kubectl -n rook-ceph get pvc -l ceph.rook.io/DeviceSet= Identify the OSD you desire to remove. The OSD assigned to the PVC can be found in the labels on the PVC kubectl -n rook-ceph get pod -l ceph.rook.io/pvc= -o yaml | grep ceph-osd-id For example, this might return: ceph-osd-id: \"0\" Remember the OSD ID for purging the OSD below If you later increase the count in the device set, note that the operator will create PVCs with the highest index that is not currently in use by existing OSD PVCs.","title":"PVC-based cluster"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#confirm-the-osd-is-down","text":"If you want to remove an unhealthy OSD, the osd pod may be in an error state such as CrashLoopBackoff or the ceph commands in the toolbox may show which OSD is down . If you want to remove a healthy OSD, you should run the following commands: 1 2 3 $ kubectl -n rook-ceph scale deployment rook-ceph-osd- --replicas = 0 # Inside the toolbox $ ceph osd down osd.","title":"Confirm the OSD is down"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#purge-the-osd-with-krew","text":"Note The rook-ceph Krew plugin must be installed 1 2 3 4 5 6 7 8 9 kubectl rook-ceph rook purge-osd 0 --force # 2022-09-14 08:58:28.888431 I | rookcmd: starting Rook v1.10.0-alpha.0.164.gcb73f728c with arguments 'rook ceph osd remove --osd-ids=0 --force-osd-removal=true' # 2022-09-14 08:58:28.889217 I | rookcmd: flag values: --force-osd-removal=true, --help=false, --log-level=INFO, --operator-image=, --osd-ids=0, --preserve-pvc=false, --service-account= # 2022-09-14 08:58:28.889582 I | op-mon: parsing mon endpoints: b=10.106.118.240:6789 # 2022-09-14 08:58:28.898898 I | cephclient: writing config file /var/lib/rook/rook-ceph/rook-ceph.config # 2022-09-14 08:58:28.899567 I | cephclient: generated admin config in /var/lib/rook/rook-ceph # 2022-09-14 08:58:29.421345 I | cephosd: validating status of osd.0 ---","title":"Purge the OSD with Krew"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#purge-the-osd-with-a-job","text":"OSD removal can be automated with the example found in the rook-ceph-purge-osd job . In the osd-purge.yaml, change the  to the ID(s) of the OSDs you want to remove. Run the job: kubectl create -f osd-purge.yaml When the job is completed, review the logs to ensure success: kubectl -n rook-ceph logs -l app=rook-ceph-purge-osd When finished, you can delete the job: kubectl delete -f osd-purge.yaml If you want to remove OSDs by hand, continue with the following sections. However, we recommend you use the above-mentioned steps to avoid operation errors.","title":"Purge the OSD with a Job"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#purge-the-osd-manually","text":"If the OSD purge job fails or you need fine-grained control of the removal, here are the individual commands that can be run from the toolbox. Detach the OSD PVC from Rook kubectl -n rook-ceph label pvc  ceph.rook.io/DeviceSetPVCId- Mark the OSD as out if not already marked as such by Ceph. This signals Ceph to start moving (backfilling) the data that was on that OSD to another OSD. ceph osd out osd. (for example if the OSD ID is 23 this would be ceph osd out osd.23 ) Wait for the data to finish backfilling to other OSDs. ceph status will indicate the backfilling is done when all of the PGs are active+clean . If desired, it's safe to remove the disk after that. Remove the OSD from the Ceph cluster ceph osd purge  --yes-i-really-mean-it Verify the OSD is removed from the node in the CRUSH map ceph osd tree The operator can automatically remove OSD deployments that are considered \"safe-to-destroy\" by Ceph. After the steps above, the OSD will be considered safe to remove since the data has all been moved to other OSDs. But this will only be done automatically by the operator if you have this setting in the cluster CR: 1 removeOSDsIfOutAndSafeToRemove : true Otherwise, you will need to delete the deployment directly: 1 kubectl delete deployment -n rook-ceph rook-ceph-osd- In PVC-based cluster, remove the orphaned PVC, if necessary.","title":"Purge the OSD manually"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#delete-the-underlying-data","text":"If you want to clean the device where the OSD was running, see in the instructions to wipe a disk on the Cleaning up a Cluster topic.","title":"Delete the underlying data"},{"location":"Storage-Configuration/Advanced/ceph-osd-mgmt/#replace-an-osd","text":"To replace a disk that has failed: Run the steps in the previous section to Remove an OSD . Replace the physical device and verify the new device is attached. Check if your cluster CR will find the new device. If you are using useAllDevices: true you can skip this step. If your cluster CR lists individual devices or uses a device filter you may need to update the CR. The operator ideally will automatically create the new OSD within a few minutes of adding the new device or updating the CR. If you don't see a new OSD automatically created, restart the operator (by deleting the operator pod) to trigger the OSD creation. Verify if the OSD is created on the node by running ceph osd tree from the toolbox. Note The OSD might have a different ID than the previous OSD that was replaced.","title":"Replace an OSD"},{"location":"Storage-Configuration/Advanced/configuration/","text":"For most any Ceph cluster, the user will want to--and may need to--change some Ceph configurations. These changes often may be warranted in order to alter performance to meet SLAs or to update default data resiliency settings. Warning Modify Ceph settings carefully, and review the Ceph configuration documentation before making any changes. Changing the settings could result in unhealthy daemons or even data loss if used incorrectly. Required configurations \u00b6 Rook and Ceph both strive to make configuration as easy as possible, but there are some configuration options which users are well advised to consider for any production cluster. Default PG and PGP counts \u00b6 The number of PGs and PGPs can be configured on a per-pool basis, but it is advised to set default values that are appropriate for your Ceph cluster. Appropriate values depend on the number of OSDs the user expects to have backing each pool. These can be configured by declaring pg_num and pgp_num parameters under CephBlockPool resource. For determining the right value for pg_num please refer placement group sizing In this example configuration, 128 PGs are applied to the pool: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : ceph-block-pool-test namespace : rook-ceph spec : deviceClass : hdd replicated : size : 3 spec : parameters : pg_num : '128' # create the pool with a pre-configured placement group number pgp_num : '128' # this should at least match `pg_num` so that all PGs are used Ceph OSD and Pool config docs provide detailed information about how to tune these parameters. Nautilus introduced the PG auto-scaler mgr module capable of automatically managing PG and PGP values for pools. Please see Ceph New in Nautilus: PG merging and autotuning for more information about this module. The pg_autoscaler module is enabled by default. To disable this module, in the CephCluster CR : 1 2 3 4 5 spec : mgr : modules : - name : pg_autoscaler enabled : false With that setting, the autoscaler will be enabled for all new pools. If you do not desire to have the autoscaler enabled for all new pools, you will need to use the Rook toolbox to enable the module and enable the autoscaling on individual pools. Specifying configuration options \u00b6 Toolbox + Ceph CLI \u00b6 The most recommended way of configuring Ceph is to set Ceph's configuration directly. The first method for doing so is to use Ceph's CLI from the Rook toolbox pod. Using the toolbox pod is detailed here . From the toolbox, the user can change Ceph configurations, enable manager modules, create users and pools, and much more. Ceph Dashboard \u00b6 The Ceph Dashboard, examined in more detail here , is another way of setting some of Ceph's configuration directly. Configuration by the Ceph dashboard is recommended with the same priority as configuration via the Ceph CLI (above). Advanced configuration via ceph.conf override ConfigMap \u00b6 Setting configs via Ceph's CLI requires that at least one mon be available for the configs to be set, and setting configs via dashboard requires at least one mgr to be available. Ceph may also have a small number of very advanced settings that aren't able to be modified easily via CLI or dashboard. The least recommended method for configuring Ceph is intended as a last-resort fallback in situations like these. This is covered in detail here .","title":"Configuration"},{"location":"Storage-Configuration/Advanced/configuration/#required-configurations","text":"Rook and Ceph both strive to make configuration as easy as possible, but there are some configuration options which users are well advised to consider for any production cluster.","title":"Required configurations"},{"location":"Storage-Configuration/Advanced/configuration/#default-pg-and-pgp-counts","text":"The number of PGs and PGPs can be configured on a per-pool basis, but it is advised to set default values that are appropriate for your Ceph cluster. Appropriate values depend on the number of OSDs the user expects to have backing each pool. These can be configured by declaring pg_num and pgp_num parameters under CephBlockPool resource. For determining the right value for pg_num please refer placement group sizing In this example configuration, 128 PGs are applied to the pool: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : ceph-block-pool-test namespace : rook-ceph spec : deviceClass : hdd replicated : size : 3 spec : parameters : pg_num : '128' # create the pool with a pre-configured placement group number pgp_num : '128' # this should at least match `pg_num` so that all PGs are used Ceph OSD and Pool config docs provide detailed information about how to tune these parameters. Nautilus introduced the PG auto-scaler mgr module capable of automatically managing PG and PGP values for pools. Please see Ceph New in Nautilus: PG merging and autotuning for more information about this module. The pg_autoscaler module is enabled by default. To disable this module, in the CephCluster CR : 1 2 3 4 5 spec : mgr : modules : - name : pg_autoscaler enabled : false With that setting, the autoscaler will be enabled for all new pools. If you do not desire to have the autoscaler enabled for all new pools, you will need to use the Rook toolbox to enable the module and enable the autoscaling on individual pools.","title":"Default PG and PGP counts"},{"location":"Storage-Configuration/Advanced/configuration/#specifying-configuration-options","text":"","title":"Specifying configuration options"},{"location":"Storage-Configuration/Advanced/configuration/#toolbox-ceph-cli","text":"The most recommended way of configuring Ceph is to set Ceph's configuration directly. The first method for doing so is to use Ceph's CLI from the Rook toolbox pod. Using the toolbox pod is detailed here . From the toolbox, the user can change Ceph configurations, enable manager modules, create users and pools, and much more.","title":"Toolbox + Ceph CLI"},{"location":"Storage-Configuration/Advanced/configuration/#ceph-dashboard","text":"The Ceph Dashboard, examined in more detail here , is another way of setting some of Ceph's configuration directly. Configuration by the Ceph dashboard is recommended with the same priority as configuration via the Ceph CLI (above).","title":"Ceph Dashboard"},{"location":"Storage-Configuration/Advanced/configuration/#advanced-configuration-via-cephconf-override-configmap","text":"Setting configs via Ceph's CLI requires that at least one mon be available for the configs to be set, and setting configs via dashboard requires at least one mgr to be available. Ceph may also have a small number of very advanced settings that aren't able to be modified easily via CLI or dashboard. The least recommended method for configuring Ceph is intended as a last-resort fallback in situations like these. This is covered in detail here .","title":"Advanced configuration via ceph.conf override ConfigMap"},{"location":"Storage-Configuration/Advanced/key-management-system/","text":"Rook has the ability to encrypt OSDs of clusters running on PVC via the flag ( encrypted: true ) in your storageClassDeviceSets template . Rook also has the ability to rotate encryption keys of OSDs using a cron job per OSD. By default, the Key Encryption Keys (also known as Data Encryption Keys) are stored in a Kubernetes Secret. However, if a Key Management System exists Rook is capable of using it. The security section contains settings related to encryption of the cluster. security : kms : Key Management System settings connectionDetails : the list of parameters representing kms connection details tokenSecretName : the name of the Kubernetes Secret containing the kms authentication token keyRotation : Key Rotation settings enabled : whether key rotation is enabled or not, default is false schedule : the schedule, written in cron format , with which key rotation CronJob is created, default value is \"@weekly\" . Note Currently key rotation is only supported for the default type, where the Key Encryption Keys are stored in a Kubernetes Secret. Supported KMS providers: Vault Authentication methods Token-based authentication Kubernetes-based authentication General Vault configuration TLS configuration IBM Key Protect Configuration Key Management Interoperability Protocol Configuration Vault \u00b6 Rook supports storing OSD encryption keys in HashiCorp Vault KMS . Authentication methods \u00b6 Rook support two authentication methods: token-based : a token is provided by the user and is stored in a Kubernetes Secret. It's used to authenticate the KMS by the Rook operator. This has several pitfalls such as: when the token expires it must be renewed, so the secret holding it must be updated no token automatic rotation Kubernetes Service Account uses Vault Kubernetes native authentication mechanism and alleviate some of the limitations from the token authentication such as token automatic renewal. This method is generally recommended over the token-based authentication. Token-based authentication \u00b6 When using the token-based authentication, a Kubernetes Secret must be created to hold the token. This is governed by the tokenSecretName parameter. Note: Rook supports all the Vault environment variables . The Kubernetes Secret rook-vault-token should contain: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : rook-vault-token namespace : rook-ceph data : token :  # base64 of a token to connect to Vault, for example: cy5GWXpsbzAyY2duVGVoRjhkWG5Bb3EyWjkK You can create a token in Vault by running the following command: 1 vault token create -policy=rook Refer to the official vault document for more details on how to create a token . For which policy to apply see the next section. In order for Rook to connect to Vault, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 9 10 11 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rook VAULT_SECRET_ENGINE : kv VAULT_AUTH_METHOD : token # name of the k8s secret containing the kms authentication token tokenSecretName : rook-vault-token Kubernetes-based authentication \u00b6 In order to use the Kubernetes Service Account authentication method, the following must be run to properly configure Vault: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 ROOK_NAMESPACE=rook-ceph ROOK_VAULT_SA=rook-vault-auth ROOK_SYSTEM_SA=rook-ceph-system ROOK_OSD_SA=rook-ceph-osd VAULT_POLICY_NAME=rook # create service account for vault to validate API token kubectl -n \"$ROOK_NAMESPACE\" create serviceaccount \"$ROOK_VAULT_SA\" # create the RBAC for this SA kubectl -n \"$ROOK_NAMESPACE\" create clusterrolebinding vault-tokenreview-binding --clusterrole=system:auth-delegator --serviceaccount=\"$ROOK_NAMESPACE\":\"$ROOK_VAULT_SA\" # get the service account common.yaml created earlier VAULT_SA_SECRET_NAME=$(kubectl -n \"$ROOK_NAMESPACE\" get sa \"$ROOK_VAULT_SA\" -o jsonpath=\"{.secrets[*]['name']}\") # Set SA_JWT_TOKEN value to the service account JWT used to access the TokenReview API SA_JWT_TOKEN=$(kubectl -n \"$ROOK_NAMESPACE\" get secret \"$VAULT_SA_SECRET_NAME\" -o jsonpath=\"{.data.token}\" | base64 --decode) # Set SA_CA_CRT to the PEM encoded CA cert used to talk to Kubernetes API SA_CA_CRT=$(kubectl -n \"$ROOK_NAMESPACE\" get secret \"$VAULT_SA_SECRET_NAME\" -o jsonpath=\"{.data['ca\\.crt']}\" | base64 --decode) # get kubernetes endpoint K8S_HOST=$(kubectl config view --minify --flatten -o jsonpath=\"{.clusters[0].cluster.server}\") # enable kubernetes auth vault auth enable kubernetes # To fetch the service account issuer kubectl proxy & proxy_pid=$! # configure the kubernetes auth vault write auth/kubernetes/config \\ token_reviewer_jwt=\"$SA_JWT_TOKEN\" \\ kubernetes_host=\"$K8S_HOST\" \\ kubernetes_ca_cert=\"$SA_CA_CRT\" \\ issuer=\"$(curl --silent http://127.0.0.1:8001/.well-known/openid-configuration | jq -r .issuer)\" kill $proxy_pid # configure a role for rook vault write auth/kubernetes/role/\"$ROOK_NAMESPACE\" \\ bound_service_account_names=\"$ROOK_SYSTEM_SA\",\"$ROOK_OSD_SA\" \\ bound_service_account_namespaces=\"$ROOK_NAMESPACE\" \\ policies=\"$VAULT_POLICY_NAME\" \\ ttl=1440h Once done, your CephCluster CR should look like: 1 2 3 4 5 6 7 8 9 security : kms : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rook VAULT_SECRET_ENGINE : kv VAULT_AUTH_METHOD : kubernetes VAULT_AUTH_KUBERNETES_ROLE : rook-ceph Note The VAULT_ADDR value above assumes that Vault is accessible within the cluster itself on the default port (8200). If running elsewhere, please update the URL accordingly. General Vault configuration \u00b6 As part of the token, here is an example of a policy that can be used: 1 2 3 4 5 6 path \"rook/*\" { capabilities = [ \"create\", \"read\", \"update\", \"delete\", \"list\" ] } path \"sys/mounts\" { capabilities = [ \"read\" ] } You can write the policy like so and then create a token: 1 2 3 4 5 6 7 8 9 10 11 $ vault policy write rook /tmp/rook.hcl $ vault token create -policy = rook Key Value --- ----- token s.FYzlo02cgnTehF8dXnAoq2Z9 token_accessor oMo7sAXQKbYtxU4HtO8k3pko token_duration 768h token_renewable true token_policies [\"default\" \"rook\"] identity_policies [] policies [\"default\" \"rook\"] In the above example, Vault's secret backend path name is rook . It must be enabled with the following: 1 vault secrets enable -path=rook kv If a different path is used, the VAULT_BACKEND_PATH key in connectionDetails must be changed. TLS configuration \u00b6 This is an advanced but recommended configuration for production deployments, in this case the vault-connection-details will look like: 1 2 3 4 5 6 7 8 9 10 11 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_CACERT :  VAULT_CLIENT_CERT :  VAULT_CLIENT_KEY :  # name of the k8s secret containing the kms authentication token tokenSecretName : rook-vault-token Each secret keys are expected to be: VAULT_CACERT: cert VAULT_CLIENT_CERT: cert VAULT_CLIENT_KEY: key For instance VAULT_CACERT Secret named vault-tls-ca-certificate will look like: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : vault-tls-ca-certificate namespace : rook-ceph data : cert :  Note: if you are using self-signed certificates (not known/approved by a proper CA) you must pass VAULT_SKIP_VERIFY: true . Communications will remain encrypted but the validity of the certificate will not be verified. IBM Key Protect \u00b6 Rook supports storing OSD encryption keys in IBM Key Protect . The current implementation stores OSD encryption keys as Standard Keys using the Bring Your Own Key (BYOK) method. This means that the Key Protect instance policy must have Standard Imported Key enabled. Configuration \u00b6 First, you need to provision the Key Protect service on the IBM Cloud. Once completed, retrieve the instance ID . Make a record of it; we need it in the CRD. On the IBM Cloud, the user must create a Service ID, then assign an Access Policy to this service. Ultimately, a Service API Key needs to be generated. All the steps are summarized in the official documentation . The Service ID must be granted access to the Key Protect Service. Once the Service API Key is generated, store it in a Kubernetes Secret. 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : ibm-kp-svc-api-key namespace : rook-ceph data : IBM_KP_SERVICE_API_KEY :  In order for Rook to connect to IBM Key Protect, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : ibmkeyprotect IBM_KP_SERVICE_INSTANCE_ID :  # name of the k8s secret containing the service API Key tokenSecretName : ibm-kp-svc-api-key More options are supported such as: IBM_BASE_URL : the base URL of the Key Protect instance, depending on your region . Defaults to https://us-south.kms.cloud.ibm.com . IBM_TOKEN_URL : the URL of the Key Protect instance to retrieve the token. Defaults to https://iam.cloud.ibm.com/oidc/token . Only needed for private instances. Key Management Interoperability Protocol \u00b6 Rook supports storing OSD encryption keys in Key Management Interoperability Protocol (KMIP) supported KMS. The current implementation stores OSD encryption keys using the Register operation. Key is fetched and deleted using Get and Destroy operations respectively. Configuration \u00b6 The Secret with credentials for the KMIP KMS is expected to contain the following. 1 2 3 4 5 6 7 8 9 apiVersion : v1 kind : Secret metadata : name : kmip-credentials namespace : rook-ceph stringData : CA_CERT :  CLIENT_CERT :  CLIENT_KEY :  In order for Rook to connect to KMIP, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : kmip KMIP_ENDPOINT :  # (optional) The endpoint server name. Useful when the KMIP endpoint does not have a DNS entry. TLS_SERVER_NAME :  # (optional) Network read timeout, in seconds. The default value is 10. READ_TIMEOUT :  # (optional) Network write timeout, in seconds. The default value is 10. WRITE_TIMEOUT :  # name of the k8s secret containing the credentials. tokenSecretName : kmip-credentials","title":"Key Management System"},{"location":"Storage-Configuration/Advanced/key-management-system/#vault","text":"Rook supports storing OSD encryption keys in HashiCorp Vault KMS .","title":"Vault"},{"location":"Storage-Configuration/Advanced/key-management-system/#authentication-methods","text":"Rook support two authentication methods: token-based : a token is provided by the user and is stored in a Kubernetes Secret. It's used to authenticate the KMS by the Rook operator. This has several pitfalls such as: when the token expires it must be renewed, so the secret holding it must be updated no token automatic rotation Kubernetes Service Account uses Vault Kubernetes native authentication mechanism and alleviate some of the limitations from the token authentication such as token automatic renewal. This method is generally recommended over the token-based authentication.","title":"Authentication methods"},{"location":"Storage-Configuration/Advanced/key-management-system/#token-based-authentication","text":"When using the token-based authentication, a Kubernetes Secret must be created to hold the token. This is governed by the tokenSecretName parameter. Note: Rook supports all the Vault environment variables . The Kubernetes Secret rook-vault-token should contain: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : rook-vault-token namespace : rook-ceph data : token :  # base64 of a token to connect to Vault, for example: cy5GWXpsbzAyY2duVGVoRjhkWG5Bb3EyWjkK You can create a token in Vault by running the following command: 1 vault token create -policy=rook Refer to the official vault document for more details on how to create a token . For which policy to apply see the next section. In order for Rook to connect to Vault, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 9 10 11 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rook VAULT_SECRET_ENGINE : kv VAULT_AUTH_METHOD : token # name of the k8s secret containing the kms authentication token tokenSecretName : rook-vault-token","title":"Token-based authentication"},{"location":"Storage-Configuration/Advanced/key-management-system/#kubernetes-based-authentication","text":"In order to use the Kubernetes Service Account authentication method, the following must be run to properly configure Vault: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 ROOK_NAMESPACE=rook-ceph ROOK_VAULT_SA=rook-vault-auth ROOK_SYSTEM_SA=rook-ceph-system ROOK_OSD_SA=rook-ceph-osd VAULT_POLICY_NAME=rook # create service account for vault to validate API token kubectl -n \"$ROOK_NAMESPACE\" create serviceaccount \"$ROOK_VAULT_SA\" # create the RBAC for this SA kubectl -n \"$ROOK_NAMESPACE\" create clusterrolebinding vault-tokenreview-binding --clusterrole=system:auth-delegator --serviceaccount=\"$ROOK_NAMESPACE\":\"$ROOK_VAULT_SA\" # get the service account common.yaml created earlier VAULT_SA_SECRET_NAME=$(kubectl -n \"$ROOK_NAMESPACE\" get sa \"$ROOK_VAULT_SA\" -o jsonpath=\"{.secrets[*]['name']}\") # Set SA_JWT_TOKEN value to the service account JWT used to access the TokenReview API SA_JWT_TOKEN=$(kubectl -n \"$ROOK_NAMESPACE\" get secret \"$VAULT_SA_SECRET_NAME\" -o jsonpath=\"{.data.token}\" | base64 --decode) # Set SA_CA_CRT to the PEM encoded CA cert used to talk to Kubernetes API SA_CA_CRT=$(kubectl -n \"$ROOK_NAMESPACE\" get secret \"$VAULT_SA_SECRET_NAME\" -o jsonpath=\"{.data['ca\\.crt']}\" | base64 --decode) # get kubernetes endpoint K8S_HOST=$(kubectl config view --minify --flatten -o jsonpath=\"{.clusters[0].cluster.server}\") # enable kubernetes auth vault auth enable kubernetes # To fetch the service account issuer kubectl proxy & proxy_pid=$! # configure the kubernetes auth vault write auth/kubernetes/config \\ token_reviewer_jwt=\"$SA_JWT_TOKEN\" \\ kubernetes_host=\"$K8S_HOST\" \\ kubernetes_ca_cert=\"$SA_CA_CRT\" \\ issuer=\"$(curl --silent http://127.0.0.1:8001/.well-known/openid-configuration | jq -r .issuer)\" kill $proxy_pid # configure a role for rook vault write auth/kubernetes/role/\"$ROOK_NAMESPACE\" \\ bound_service_account_names=\"$ROOK_SYSTEM_SA\",\"$ROOK_OSD_SA\" \\ bound_service_account_namespaces=\"$ROOK_NAMESPACE\" \\ policies=\"$VAULT_POLICY_NAME\" \\ ttl=1440h Once done, your CephCluster CR should look like: 1 2 3 4 5 6 7 8 9 security : kms : connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_BACKEND_PATH : rook VAULT_SECRET_ENGINE : kv VAULT_AUTH_METHOD : kubernetes VAULT_AUTH_KUBERNETES_ROLE : rook-ceph Note The VAULT_ADDR value above assumes that Vault is accessible within the cluster itself on the default port (8200). If running elsewhere, please update the URL accordingly.","title":"Kubernetes-based authentication"},{"location":"Storage-Configuration/Advanced/key-management-system/#general-vault-configuration","text":"As part of the token, here is an example of a policy that can be used: 1 2 3 4 5 6 path \"rook/*\" { capabilities = [ \"create\", \"read\", \"update\", \"delete\", \"list\" ] } path \"sys/mounts\" { capabilities = [ \"read\" ] } You can write the policy like so and then create a token: 1 2 3 4 5 6 7 8 9 10 11 $ vault policy write rook /tmp/rook.hcl $ vault token create -policy = rook Key Value --- ----- token s.FYzlo02cgnTehF8dXnAoq2Z9 token_accessor oMo7sAXQKbYtxU4HtO8k3pko token_duration 768h token_renewable true token_policies [\"default\" \"rook\"] identity_policies [] policies [\"default\" \"rook\"] In the above example, Vault's secret backend path name is rook . It must be enabled with the following: 1 vault secrets enable -path=rook kv If a different path is used, the VAULT_BACKEND_PATH key in connectionDetails must be changed.","title":"General Vault configuration"},{"location":"Storage-Configuration/Advanced/key-management-system/#tls-configuration","text":"This is an advanced but recommended configuration for production deployments, in this case the vault-connection-details will look like: 1 2 3 4 5 6 7 8 9 10 11 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : vault VAULT_ADDR : https://vault.default.svc.cluster.local:8200 VAULT_CACERT :  VAULT_CLIENT_CERT :  VAULT_CLIENT_KEY :  # name of the k8s secret containing the kms authentication token tokenSecretName : rook-vault-token Each secret keys are expected to be: VAULT_CACERT: cert VAULT_CLIENT_CERT: cert VAULT_CLIENT_KEY: key For instance VAULT_CACERT Secret named vault-tls-ca-certificate will look like: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : vault-tls-ca-certificate namespace : rook-ceph data : cert :  Note: if you are using self-signed certificates (not known/approved by a proper CA) you must pass VAULT_SKIP_VERIFY: true . Communications will remain encrypted but the validity of the certificate will not be verified.","title":"TLS configuration"},{"location":"Storage-Configuration/Advanced/key-management-system/#ibm-key-protect","text":"Rook supports storing OSD encryption keys in IBM Key Protect . The current implementation stores OSD encryption keys as Standard Keys using the Bring Your Own Key (BYOK) method. This means that the Key Protect instance policy must have Standard Imported Key enabled.","title":"IBM Key Protect"},{"location":"Storage-Configuration/Advanced/key-management-system/#configuration","text":"First, you need to provision the Key Protect service on the IBM Cloud. Once completed, retrieve the instance ID . Make a record of it; we need it in the CRD. On the IBM Cloud, the user must create a Service ID, then assign an Access Policy to this service. Ultimately, a Service API Key needs to be generated. All the steps are summarized in the official documentation . The Service ID must be granted access to the Key Protect Service. Once the Service API Key is generated, store it in a Kubernetes Secret. 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : ibm-kp-svc-api-key namespace : rook-ceph data : IBM_KP_SERVICE_API_KEY :  In order for Rook to connect to IBM Key Protect, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : ibmkeyprotect IBM_KP_SERVICE_INSTANCE_ID :  # name of the k8s secret containing the service API Key tokenSecretName : ibm-kp-svc-api-key More options are supported such as: IBM_BASE_URL : the base URL of the Key Protect instance, depending on your region . Defaults to https://us-south.kms.cloud.ibm.com . IBM_TOKEN_URL : the URL of the Key Protect instance to retrieve the token. Defaults to https://iam.cloud.ibm.com/oidc/token . Only needed for private instances.","title":"Configuration"},{"location":"Storage-Configuration/Advanced/key-management-system/#key-management-interoperability-protocol","text":"Rook supports storing OSD encryption keys in Key Management Interoperability Protocol (KMIP) supported KMS. The current implementation stores OSD encryption keys using the Register operation. Key is fetched and deleted using Get and Destroy operations respectively.","title":"Key Management Interoperability Protocol"},{"location":"Storage-Configuration/Advanced/key-management-system/#configuration_1","text":"The Secret with credentials for the KMIP KMS is expected to contain the following. 1 2 3 4 5 6 7 8 9 apiVersion : v1 kind : Secret metadata : name : kmip-credentials namespace : rook-ceph stringData : CA_CERT :  CLIENT_CERT :  CLIENT_KEY :  In order for Rook to connect to KMIP, you must configure the following in your CephCluster template: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 security : kms : # name of the k8s config map containing all the kms connection details connectionDetails : KMS_PROVIDER : kmip KMIP_ENDPOINT :  # (optional) The endpoint server name. Useful when the KMIP endpoint does not have a DNS entry. TLS_SERVER_NAME :  # (optional) Network read timeout, in seconds. The default value is 10. READ_TIMEOUT :  # (optional) Network write timeout, in seconds. The default value is 10. WRITE_TIMEOUT :  # name of the k8s secret containing the credentials. tokenSecretName : kmip-credentials","title":"Configuration"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/","text":"Block storage allows a single pod to mount storage. This guide shows how to create a simple, multi-tier web application on Kubernetes using persistent volumes enabled by Rook. Prerequisites \u00b6 This guide assumes a Rook cluster as explained in the Quickstart . Provision Storage \u00b6 Before Rook can provision storage, a StorageClass and CephBlockPool CR need to be created. This will allow Kubernetes to interoperate with Rook when provisioning persistent volumes. Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because the failureDomain is set to host and the replicated.size is set to 3 . Save this StorageClass definition as storageclass.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 --- apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-block # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.rbd.csi.ceph.com parameters : # clusterID is the namespace where the rook cluster is running clusterID : rook-ceph # Ceph pool into which the RBD image shall be created pool : replicapool # (optional) mapOptions is a comma-separated list of map options. # For krbd options refer # https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options # For nbd options refer # https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options # mapOptions: lock_on_read,queue_depth=1024 # (optional) unmapOptions is a comma-separated list of unmap options. # For krbd options refer # https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options # For nbd options refer # https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options # unmapOptions: force # RBD image format. Defaults to \"2\". imageFormat : \"2\" # RBD image features # Available for imageFormat: \"2\". Older releases of CSI RBD # support only the `layering` feature. The Linux kernel (KRBD) supports the # full complement of features as of 5.4 # `layering` alone corresponds to Ceph's bitfield value of \"2\" ; # `layering` + `fast-diff` + `object-map` + `deep-flatten` + `exclusive-lock` together # correspond to Ceph's OR'd bitfield value of \"63\". Here we use # a symbolic, comma-separated format: # For 5.4 or later kernels: #imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock # For 5.3 or earlier kernels: imageFeatures : layering # The secrets contain Ceph admin credentials. csi.storage.k8s.io/provisioner-secret-name : rook-csi-rbd-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-rbd-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-rbd-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph # Specify the filesystem type of the volume. If not specified, csi-provisioner # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock # in hyperconverged settings where the volume is mounted on the same node as the osds. csi.storage.k8s.io/fstype : ext4 # Delete the rbd volume when a PVC is deleted reclaimPolicy : Delete # Optional, if you want to add dynamic resize for PVC. # For now only ext3, ext4, xfs resize support provided, like in Kubernetes itself. allowVolumeExpansion : true If you've deployed the Rook operator in a namespace other than \"rook-ceph\", change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace \"my-namespace\" the provisioner value should be \"my-namespace.rbd.csi.ceph.com\". Create the storage class. 1 kubectl create -f deploy/examples/csi/rbd/storageclass.yaml Note As specified by Kubernetes , when using the Retain reclaim policy, any Ceph RBD image that is backed by a PersistentVolume will continue to exist even after the PersistentVolume has been deleted. These Ceph RBD images will need to be cleaned up manually using rbd rm . Consume the storage: Wordpress sample \u00b6 We create a sample app to consume the block storage provisioned by Rook with the classic wordpress and mysql apps. Both of these apps will make use of block volumes provisioned by Rook. Start mysql and wordpress from the deploy/examples folder: 1 2 kubectl create -f mysql.yaml kubectl create -f wordpress.yaml Both of these apps create a block volume and mount it to their respective pod. You can see the Kubernetes volume claims by running the following: 1 kubectl get pvc Example Output: kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESSMODES AGE mysql-pv-claim Bound pvc-95402dbc-efc0-11e6-bc9a-0cc47a3459ee 20Gi RWO 1m wp-pv-claim Bound pvc-39e43169-efc1-11e6-bc9a-0cc47a3459ee 20Gi RWO 1m Once the wordpress and mysql pods are in the Running state, get the cluster IP of the wordpress app and enter it in your browser: 1 kubectl get svc wordpress Example Output: kubectl get svc wordpress 1 2 NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE wordpress 10.3.0.155  80:30841/TCP 2m You should see the wordpress app running. If you are using Minikube, the Wordpress URL can be retrieved with this one-line command: 1 echo http://$(minikube ip):$(kubectl get service wordpress -o jsonpath='{.spec.ports[0].nodePort}') Note When running in a vagrant environment, there will be no external IP address to reach wordpress with. You will only be able to reach wordpress via the CLUSTER-IP from inside the Kubernetes cluster. Consume the storage: Toolbox \u00b6 With the pool that was created above, we can also create a block image and mount it directly in a pod. See the Direct Block Tools topic for more details. Teardown \u00b6 To clean up all the artifacts created by the block demo: 1 2 3 4 kubectl delete -f wordpress.yaml kubectl delete -f mysql.yaml kubectl delete -n rook-ceph cephblockpools.ceph.rook.io replicapool kubectl delete storageclass rook-ceph-block Advanced Example: Erasure Coded Block Storage \u00b6 If you want to use erasure coded pool with RBD, your OSDs must use bluestore as their storeType . Additionally the nodes that are going to mount the erasure coded RBD block storage must have Linux kernel >= 4.11 . Attention This example requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). To be able to use an erasure coded pool you need to create two pools (as seen below in the definitions): one erasure coded and one replicated. Attention This example requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). Erasure Coded CSI Driver \u00b6 The erasure coded pool must be set as the dataPool parameter in storageclass-ec.yaml It is used for the data of the RBD images. Node Loss \u00b6 If a node goes down where a pod is running where a RBD RWO volume is mounted, the volume cannot automatically be mounted on another node. The node must be guaranteed to be offline before the volume can be mounted on another node. Note These instructions are for clusters with Kubernetes version 1.26 or greater. For K8s 1.25 or older, see the manual steps in the CSI troubleshooting guide to recover from the node loss. Configure CSI-Addons \u00b6 Deploy the csi-addons manifests: 1 2 3 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/crds.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/rbac.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/setup-controller.yaml Enable the csi-addons sidecar in the Rook operator configuration. 1 kubectl patch cm rook-ceph-operator-config -n -p $'data:\\n \"CSI_ENABLE_CSIADDONS\": \"true\"' Handling Node Loss \u00b6 When a node is confirmed to be down, add the following taints to the node: 1 2 kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoExecute kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoSchedule After the taint is added to the node, Rook will automatically blocklist the node to prevent connections to Ceph from the RBD volume on that node. To verify a node is blocklisted: 1 2 3 kubectl get networkfences.csiaddons.openshift.io NAME DRIVER CIDRS FENCESTATE AGE RESULT minikube-m02 rook-ceph.rbd.csi.ceph.com [\"192.168.39.187:0/32\"] Fenced 20s Succeeded The node is blocklisted if the state is Fenced and the result is Succeeded as seen above. Node Recovery \u00b6 If the node comes back online, the network fence can be removed from the node by removing the node taints: 1 2 kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoExecute- kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoSchedule-","title":"Block Storage Overview"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#prerequisites","text":"This guide assumes a Rook cluster as explained in the Quickstart .","title":"Prerequisites"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#provision-storage","text":"Before Rook can provision storage, a StorageClass and CephBlockPool CR need to be created. This will allow Kubernetes to interoperate with Rook when provisioning persistent volumes. Note This sample requires at least 1 OSD per node , with each OSD located on 3 different nodes . Each OSD must be located on a different node, because the failureDomain is set to host and the replicated.size is set to 3 . Save this StorageClass definition as storageclass.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : replicapool namespace : rook-ceph spec : failureDomain : host replicated : size : 3 --- apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-block # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.rbd.csi.ceph.com parameters : # clusterID is the namespace where the rook cluster is running clusterID : rook-ceph # Ceph pool into which the RBD image shall be created pool : replicapool # (optional) mapOptions is a comma-separated list of map options. # For krbd options refer # https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options # For nbd options refer # https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options # mapOptions: lock_on_read,queue_depth=1024 # (optional) unmapOptions is a comma-separated list of unmap options. # For krbd options refer # https://docs.ceph.com/docs/master/man/8/rbd/#kernel-rbd-krbd-options # For nbd options refer # https://docs.ceph.com/docs/master/man/8/rbd-nbd/#options # unmapOptions: force # RBD image format. Defaults to \"2\". imageFormat : \"2\" # RBD image features # Available for imageFormat: \"2\". Older releases of CSI RBD # support only the `layering` feature. The Linux kernel (KRBD) supports the # full complement of features as of 5.4 # `layering` alone corresponds to Ceph's bitfield value of \"2\" ; # `layering` + `fast-diff` + `object-map` + `deep-flatten` + `exclusive-lock` together # correspond to Ceph's OR'd bitfield value of \"63\". Here we use # a symbolic, comma-separated format: # For 5.4 or later kernels: #imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock # For 5.3 or earlier kernels: imageFeatures : layering # The secrets contain Ceph admin credentials. csi.storage.k8s.io/provisioner-secret-name : rook-csi-rbd-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-rbd-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-rbd-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph # Specify the filesystem type of the volume. If not specified, csi-provisioner # will set default as `ext4`. Note that `xfs` is not recommended due to potential deadlock # in hyperconverged settings where the volume is mounted on the same node as the osds. csi.storage.k8s.io/fstype : ext4 # Delete the rbd volume when a PVC is deleted reclaimPolicy : Delete # Optional, if you want to add dynamic resize for PVC. # For now only ext3, ext4, xfs resize support provided, like in Kubernetes itself. allowVolumeExpansion : true If you've deployed the Rook operator in a namespace other than \"rook-ceph\", change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace \"my-namespace\" the provisioner value should be \"my-namespace.rbd.csi.ceph.com\". Create the storage class. 1 kubectl create -f deploy/examples/csi/rbd/storageclass.yaml Note As specified by Kubernetes , when using the Retain reclaim policy, any Ceph RBD image that is backed by a PersistentVolume will continue to exist even after the PersistentVolume has been deleted. These Ceph RBD images will need to be cleaned up manually using rbd rm .","title":"Provision Storage"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#consume-the-storage-wordpress-sample","text":"We create a sample app to consume the block storage provisioned by Rook with the classic wordpress and mysql apps. Both of these apps will make use of block volumes provisioned by Rook. Start mysql and wordpress from the deploy/examples folder: 1 2 kubectl create -f mysql.yaml kubectl create -f wordpress.yaml Both of these apps create a block volume and mount it to their respective pod. You can see the Kubernetes volume claims by running the following: 1 kubectl get pvc Example Output: kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESSMODES AGE mysql-pv-claim Bound pvc-95402dbc-efc0-11e6-bc9a-0cc47a3459ee 20Gi RWO 1m wp-pv-claim Bound pvc-39e43169-efc1-11e6-bc9a-0cc47a3459ee 20Gi RWO 1m Once the wordpress and mysql pods are in the Running state, get the cluster IP of the wordpress app and enter it in your browser: 1 kubectl get svc wordpress Example Output: kubectl get svc wordpress 1 2 NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE wordpress 10.3.0.155  80:30841/TCP 2m You should see the wordpress app running. If you are using Minikube, the Wordpress URL can be retrieved with this one-line command: 1 echo http://$(minikube ip):$(kubectl get service wordpress -o jsonpath='{.spec.ports[0].nodePort}') Note When running in a vagrant environment, there will be no external IP address to reach wordpress with. You will only be able to reach wordpress via the CLUSTER-IP from inside the Kubernetes cluster.","title":"Consume the storage: Wordpress sample"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#consume-the-storage-toolbox","text":"With the pool that was created above, we can also create a block image and mount it directly in a pod. See the Direct Block Tools topic for more details.","title":"Consume the storage: Toolbox"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#teardown","text":"To clean up all the artifacts created by the block demo: 1 2 3 4 kubectl delete -f wordpress.yaml kubectl delete -f mysql.yaml kubectl delete -n rook-ceph cephblockpools.ceph.rook.io replicapool kubectl delete storageclass rook-ceph-block","title":"Teardown"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#advanced-example-erasure-coded-block-storage","text":"If you want to use erasure coded pool with RBD, your OSDs must use bluestore as their storeType . Additionally the nodes that are going to mount the erasure coded RBD block storage must have Linux kernel >= 4.11 . Attention This example requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). To be able to use an erasure coded pool you need to create two pools (as seen below in the definitions): one erasure coded and one replicated. Attention This example requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ).","title":"Advanced Example: Erasure Coded Block Storage"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#erasure-coded-csi-driver","text":"The erasure coded pool must be set as the dataPool parameter in storageclass-ec.yaml It is used for the data of the RBD images.","title":"Erasure Coded CSI Driver"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#node-loss","text":"If a node goes down where a pod is running where a RBD RWO volume is mounted, the volume cannot automatically be mounted on another node. The node must be guaranteed to be offline before the volume can be mounted on another node. Note These instructions are for clusters with Kubernetes version 1.26 or greater. For K8s 1.25 or older, see the manual steps in the CSI troubleshooting guide to recover from the node loss.","title":"Node Loss"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#configure-csi-addons","text":"Deploy the csi-addons manifests: 1 2 3 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/crds.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/rbac.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/setup-controller.yaml Enable the csi-addons sidecar in the Rook operator configuration. 1 kubectl patch cm rook-ceph-operator-config -n -p $'data:\\n \"CSI_ENABLE_CSIADDONS\": \"true\"'","title":"Configure CSI-Addons"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#handling-node-loss","text":"When a node is confirmed to be down, add the following taints to the node: 1 2 kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoExecute kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoSchedule After the taint is added to the node, Rook will automatically blocklist the node to prevent connections to Ceph from the RBD volume on that node. To verify a node is blocklisted: 1 2 3 kubectl get networkfences.csiaddons.openshift.io NAME DRIVER CIDRS FENCESTATE AGE RESULT minikube-m02 rook-ceph.rbd.csi.ceph.com [\"192.168.39.187:0/32\"] Fenced 20s Succeeded The node is blocklisted if the state is Fenced and the result is Succeeded as seen above.","title":"Handling Node Loss"},{"location":"Storage-Configuration/Block-Storage-RBD/block-storage/#node-recovery","text":"If the node comes back online, the network fence can be removed from the node by removing the node taints: 1 2 kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoExecute- kubectl taint nodes  node.kubernetes.io/out-of-service=nodeshutdown:NoSchedule-","title":"Node Recovery"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/","text":"Planned Migration and Disaster Recovery \u00b6 Rook comes with the volume replication support, which allows users to perform disaster recovery and planned migration of clusters. The following document will help to track the procedure for failover and failback in case of a Disaster recovery or Planned migration use cases. Note The document assumes that RBD Mirroring is set up between the peer clusters. For information on rbd mirroring and how to set it up using rook, please refer to the rbd-mirroring guide . Planned Migration \u00b6 Info Use cases: Datacenter maintenance, technology refresh, disaster avoidance, etc. Relocation \u00b6 The Relocation operation is the process of switching production to a backup facility(normally your recovery site) or vice versa. For relocation, access to the image on the primary site should be stopped. The image should now be made primary on the secondary cluster so that the access can be resumed there. Note Periodic or one-time backup of the application should be available for restore on the secondary site (cluster-2). Follow the below steps for planned migration of workload from the primary cluster to the secondary cluster: Scale down all the application pods which are using the mirrored PVC on the Primary Cluster. Take a backup of PVC and PV object from the primary cluster. This can be done using some backup tools like velero . Update VolumeReplication CR to set replicationState to secondary at the Primary Site. When the operator sees this change, it will pass the information down to the driver via GRPC request to mark the dataSource as secondary . If you are manually recreating the PVC and PV on the secondary cluster, remove the claimRef section in the PV objects. (See this for details) Recreate the storageclass, PVC, and PV objects on the secondary site. As you are creating the static binding between PVC and PV, a new PV won\u2019t be created here, the PVC will get bind to the existing PV. Create the VolumeReplicationClass on the secondary site. Create VolumeReplications for all the PVC\u2019s for which mirroring is enabled replicationState should be primary for all the PVC\u2019s on the secondary site. Check VolumeReplication CR status to verify if the image is marked primary on the secondary site. Once the Image is marked as primary , the PVC is now ready to be used. Now, we can scale up the applications to use the PVC. Warning In Async Disaster recovery use case, we don't get the complete data. We will only get the crash-consistent data based on the snapshot interval time. Disaster Recovery \u00b6 Info Use cases: Natural disasters, Power failures, System failures, and crashes, etc. Note To effectively resume operations after a failover/relocation, backup of the kubernetes artifacts like deployment, PVC, PV, etc need to be created beforehand by the admin; so that the application can be restored on the peer cluster. For more information, see backup and restore . Failover (abrupt shutdown) \u00b6 In case of Disaster recovery, create VolumeReplication CR at the Secondary Site. Since the connection to the Primary Site is lost, the operator automatically sends a GRPC request down to the driver to forcefully mark the dataSource as primary on the Secondary Site. If you are manually creating the PVC and PV on the secondary cluster, remove the claimRef section in the PV objects. (See this for details) Create the storageclass, PVC, and PV objects on the secondary site. As you are creating the static binding between PVC and PV, a new PV won\u2019t be created here, the PVC will get bind to the existing PV. Create the VolumeReplicationClass and VolumeReplication CR on the secondary site. Check VolumeReplication CR status to verify if the image is marked primary on the secondary site. Once the Image is marked as primary , the PVC is now ready to be used. Now, we can scale up the applications to use the PVC. Failback (post-disaster recovery) \u00b6 Once the failed cluster is recovered on the primary site and you want to failback from secondary site, follow the below steps: Scale down the running applications (if any) on the primary site. Ensure that all persistent volumes in use by the workload are no longer in use on the primary cluster. Update VolumeReplication CR replicationState from primary to secondary on the primary site. Scale down the applications on the secondary site. Update VolumeReplication CR replicationState state from primary to secondary in secondary site. On the primary site, verify the VolumeReplication status is marked as volume ready to use. Once the volume is marked to ready to use, change the replicationState state from secondary to primary in primary site. Scale up the applications again on the primary site.","title":"RBD Asynchronous DR Failover and Failback"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#planned-migration-and-disaster-recovery","text":"Rook comes with the volume replication support, which allows users to perform disaster recovery and planned migration of clusters. The following document will help to track the procedure for failover and failback in case of a Disaster recovery or Planned migration use cases. Note The document assumes that RBD Mirroring is set up between the peer clusters. For information on rbd mirroring and how to set it up using rook, please refer to the rbd-mirroring guide .","title":"Planned Migration and Disaster Recovery"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#planned-migration","text":"Info Use cases: Datacenter maintenance, technology refresh, disaster avoidance, etc.","title":"Planned Migration"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#relocation","text":"The Relocation operation is the process of switching production to a backup facility(normally your recovery site) or vice versa. For relocation, access to the image on the primary site should be stopped. The image should now be made primary on the secondary cluster so that the access can be resumed there. Note Periodic or one-time backup of the application should be available for restore on the secondary site (cluster-2). Follow the below steps for planned migration of workload from the primary cluster to the secondary cluster: Scale down all the application pods which are using the mirrored PVC on the Primary Cluster. Take a backup of PVC and PV object from the primary cluster. This can be done using some backup tools like velero . Update VolumeReplication CR to set replicationState to secondary at the Primary Site. When the operator sees this change, it will pass the information down to the driver via GRPC request to mark the dataSource as secondary . If you are manually recreating the PVC and PV on the secondary cluster, remove the claimRef section in the PV objects. (See this for details) Recreate the storageclass, PVC, and PV objects on the secondary site. As you are creating the static binding between PVC and PV, a new PV won\u2019t be created here, the PVC will get bind to the existing PV. Create the VolumeReplicationClass on the secondary site. Create VolumeReplications for all the PVC\u2019s for which mirroring is enabled replicationState should be primary for all the PVC\u2019s on the secondary site. Check VolumeReplication CR status to verify if the image is marked primary on the secondary site. Once the Image is marked as primary , the PVC is now ready to be used. Now, we can scale up the applications to use the PVC. Warning In Async Disaster recovery use case, we don't get the complete data. We will only get the crash-consistent data based on the snapshot interval time.","title":"Relocation"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#disaster-recovery","text":"Info Use cases: Natural disasters, Power failures, System failures, and crashes, etc. Note To effectively resume operations after a failover/relocation, backup of the kubernetes artifacts like deployment, PVC, PV, etc need to be created beforehand by the admin; so that the application can be restored on the peer cluster. For more information, see backup and restore .","title":"Disaster Recovery"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#failover-abrupt-shutdown","text":"In case of Disaster recovery, create VolumeReplication CR at the Secondary Site. Since the connection to the Primary Site is lost, the operator automatically sends a GRPC request down to the driver to forcefully mark the dataSource as primary on the Secondary Site. If you are manually creating the PVC and PV on the secondary cluster, remove the claimRef section in the PV objects. (See this for details) Create the storageclass, PVC, and PV objects on the secondary site. As you are creating the static binding between PVC and PV, a new PV won\u2019t be created here, the PVC will get bind to the existing PV. Create the VolumeReplicationClass and VolumeReplication CR on the secondary site. Check VolumeReplication CR status to verify if the image is marked primary on the secondary site. Once the Image is marked as primary , the PVC is now ready to be used. Now, we can scale up the applications to use the PVC.","title":"Failover (abrupt shutdown)"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-async-disaster-recovery-failover-failback/#failback-post-disaster-recovery","text":"Once the failed cluster is recovered on the primary site and you want to failback from secondary site, follow the below steps: Scale down the running applications (if any) on the primary site. Ensure that all persistent volumes in use by the workload are no longer in use on the primary cluster. Update VolumeReplication CR replicationState from primary to secondary on the primary site. Scale down the applications on the secondary site. Update VolumeReplication CR replicationState state from primary to secondary in secondary site. On the primary site, verify the VolumeReplication status is marked as volume ready to use. Once the volume is marked to ready to use, change the replicationState state from secondary to primary in primary site. Scale up the applications again on the primary site.","title":"Failback (post-disaster recovery)"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/","text":"Disaster Recovery \u00b6 Disaster recovery (DR) is an organization's ability to react to and recover from an incident that negatively affects business operations. This plan comprises strategies for minimizing the consequences of a disaster, so an organization can continue to operate \u2013 or quickly resume the key operations. Thus, disaster recovery is one of the aspects of business continuity . One of the solutions, to achieve the same, is RBD mirroring . RBD Mirroring \u00b6 RBD mirroring is an asynchronous replication of RBD images between multiple Ceph clusters. This capability is available in two modes: Journal-based: Every write to the RBD image is first recorded to the associated journal before modifying the actual image. The remote cluster will read from this associated journal and replay the updates to its local image. Snapshot-based: This mode uses periodically scheduled or manually created RBD image mirror-snapshots to replicate crash-consistent RBD images between clusters. Note This document sheds light on rbd mirroring and how to set it up using rook. See also the topic on Failover and Failback Create RBD Pools \u00b6 In this section, we create specific RBD pools that are RBD mirroring enabled for use with the DR use case. Execute the following steps on each peer cluster to create mirror enabled pools: Create a RBD pool that is enabled for mirroring by adding the section spec.mirroring in the CephBlockPool CR: 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : mirrored-pool namespace : rook-ceph spec : replicated : size : 1 mirroring : enabled : true mode : image 1 kubectl create -f pool-mirrored.yaml Repeat the steps on the peer cluster. Note Pool name across the cluster peers must be the same for RBD replication to function. See the CephBlockPool documentation for more details. Note It is also feasible to edit existing pools and enable them for replication. Bootstrap Peers \u00b6 In order for the rbd-mirror daemon to discover its peer cluster, the peer must be registered and a user account must be created. The following steps enable bootstrapping peers to discover and authenticate to each other: For Bootstrapping a peer cluster its bootstrap secret is required. To determine the name of the secret that contains the bootstrap secret execute the following command on the remote cluster (cluster-2) 1 [cluster-2]$ kubectl get cephblockpool.ceph.rook.io/mirrored-pool -n rook-ceph -ojsonpath = '{.status.info.rbdMirrorBootstrapPeerSecretName}' Here, pool-peer-token-mirrored-pool is the desired bootstrap secret name. The secret pool-peer-token-mirrored-pool contains all the information related to the token and needs to be injected to the peer, to fetch the decoded secret: 1 2 [cluster-2]$ kubectl get secret -n rook-ceph pool-peer-token-mirrored-pool -o jsonpath = '{.data.token}' | base64 -d eyJmc2lkIjoiNGQ1YmNiNDAtNDY3YS00OWVkLThjMGEtOWVhOGJkNDY2OTE3IiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFDZ3hmZGdxN013R0JBQWZzcUtCaGpZVjJUZDRxVzJYQm5kemc9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMzkuMzY6MzMwMCx2MToxOTIuMTY4LjM5LjM2OjY3ODldIn0= With this Decoded value, create a secret on the primary site (cluster-1): 1 [cluster-1]$ kubectl -n rook-ceph create secret generic rbd-primary-site-secret --from-literal = token = eyJmc2lkIjoiNGQ1YmNiNDAtNDY3YS00OWVkLThjMGEtOWVhOGJkNDY2OTE3IiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFDZ3hmZGdxN013R0JBQWZzcUtCaGpZVjJUZDRxVzJYQm5kemc9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMzkuMzY6MzMwMCx2MToxOTIuMTY4LjM5LjM2OjY3ODldIn0 = --from-literal = pool = mirrored-pool This completes the bootstrap process for cluster-1 to be peered with cluster-2. Repeat the process switching cluster-2 in place of cluster-1, to complete the bootstrap process across both peer clusters. For more details, refer to the official rbd mirror documentation on how to create a bootstrap peer . Configure the RBDMirror Daemon \u00b6 Replication is handled by the rbd-mirror daemon. The rbd-mirror daemon is responsible for pulling image updates from the remote, peer cluster, and applying them to image within the local cluster. Creation of the rbd-mirror daemon(s) is done through the custom resource definitions (CRDs), as follows: Create mirror.yaml, to deploy the rbd-mirror daemon 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephRBDMirror metadata : name : my-rbd-mirror namespace : rook-ceph spec : # the number of rbd-mirror daemons to deploy count : 1 Create the RBD mirror daemon 1 [cluster-1]$ kubectl create -f mirror.yaml -n rook-ceph Validate if rbd-mirror daemon pod is now up 1 2 [cluster-1]$ kubectl get pods -n rook-ceph rook-ceph-rbd-mirror-a-6985b47c8c-dpv4k 1/1 Running 0 10s Verify that daemon health is OK 1 2 kubectl get cephblockpools.ceph.rook.io mirrored-pool -n rook-ceph -o jsonpath='{.status.mirroringStatus.summary}' {\"daemon_health\":\"OK\",\"health\":\"OK\",\"image_health\":\"OK\",\"states\":{\"replaying\":1}} Repeat the above steps on the peer cluster. See the CephRBDMirror CRD for more details on the mirroring settings. Add mirroring peer information to RBD pools \u00b6 Each pool can have its own peer. To add the peer information, patch the already created mirroring enabled pool to update the CephBlockPool CRD. 1 [cluster-1]$ kubectl -n rook-ceph patch cephblockpool mirrored-pool --type merge -p '{\"spec\":{\"mirroring\":{\"peers\": {\"secretNames\": [\"rbd-primary-site-secret\"]}}}}' Create VolumeReplication CRDs \u00b6 Volume Replication Operator follows controller pattern and provides extended APIs for storage disaster recovery. The extended APIs are provided via Custom Resource Definition(CRD). Create the VolumeReplication CRDs on all the peer clusters. 1 2 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.5.0/config/crd/bases/replication.storage.openshift.io_volumereplicationclasses.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.5.0/config/crd/bases/replication.storage.openshift.io_volumereplications.yaml Enable CSI Replication Sidecars \u00b6 To achieve RBD Mirroring, csi-omap-generator and csi-addons containers need to be deployed in the RBD provisioner pods, which are not enabled by default. Omap Generator : Omap generator is a sidecar container that when deployed with the CSI provisioner pod, generates the internal CSI omaps between the PV and the RBD image. This is required as static PVs are transferred across peer clusters in the DR use case, and hence is needed to preserve PVC to storage mappings. Volume Replication Operator : Volume Replication Operator is a kubernetes operator that provides common and reusable APIs for storage disaster recovery. The volume replication operation is supported by the CSIAddons It is based on csi-addons/spec specification and can be used by any storage provider. Execute the following steps on each peer cluster to enable the OMap generator and CSIADDONS sidecars: Edit the rook-ceph-operator-config configmap and add the following configurations 1 kubectl edit cm rook-ceph-operator-config -n rook-ceph Add the following properties if not present: 1 2 3 data : CSI_ENABLE_OMAP_GENERATOR : \"true\" CSI_ENABLE_CSIADDONS : \"true\" After updating the configmap with those settings, two new sidecars should now start automatically in the CSI provisioner pod. Repeat the steps on the peer cluster. Volume Replication Custom Resources \u00b6 VolumeReplication CRDs provide support for two custom resources: VolumeReplicationClass : VolumeReplicationClass is a cluster scoped resource that contains driver related configuration parameters. It holds the storage admin information required for the volume replication operator. VolumeReplication : VolumeReplication is a namespaced resource that contains references to storage object to be replicated and VolumeReplicationClass corresponding to the driver providing replication. Enable mirroring on a PVC \u00b6 Below guide assumes that we have a PVC (rbd-pvc) in BOUND state; created using StorageClass with Retain reclaimPolicy. 1 2 3 [cluster-1]$ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec 1Gi RWO csi-rbd-sc 44s Create a Volume Replication Class CR \u00b6 In this case, we create a Volume Replication Class on cluster-1 1 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication-class.yaml Note The schedulingInterval can be specified in formats of minutes, hours or days using suffix m , h and d respectively. The optional schedulingStartTime can be specified using the ISO 8601 time format. Create a VolumeReplication CR \u00b6 Once VolumeReplicationClass is created, create a Volume Replication for the PVC which we intend to replicate to secondary cluster. 1 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication.yaml Note VolumeReplication is a namespace scoped object. Thus, it should be created in the same namespace as of PVC. Checking Replication Status \u00b6 replicationState is the state of the volume being referenced. Possible values are primary, secondary, and resync. primary denotes that the volume is primary. secondary denotes that the volume is secondary. resync denotes that the volume needs to be resynced. To check VolumeReplication CR status: 1 [cluster-1]$ kubectl get volumereplication pvc-volumereplication -oyaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 ... spec : dataSource : apiGroup : \"\" kind : PersistentVolumeClaim name : rbd-pvc replicationState : primary volumeReplicationClass : rbd-volumereplicationclass status : conditions : - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : Promoted status : \"True\" type : Completed - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : Healthy status : \"False\" type : Degraded - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : NotResyncing status : \"False\" type : Resyncing lastCompletionTime : \"2021-05-04T07:39:00Z\" lastStartTime : \"2021-05-04T07:38:59Z\" message : volume is marked primary observedGeneration : 1 state : Primary Backup & Restore \u00b6 Note To effectively resume operations after a failover/relocation, backup of the kubernetes artifacts like deployment, PVC, PV, etc need to be created beforehand by the admin; so that the application can be restored on the peer cluster. Here, we take a backup of PVC and PV object on one site, so that they can be restored later to the peer cluster. Take backup on cluster-1 \u00b6 Take backup of the PVC rbd-pvc 1 [cluster-1]$ kubectl get pvc rbd-pvc -oyaml > pvc-backup.yaml Take a backup of the PV, corresponding to the PVC 1 [cluster-1]$ kubectl get pv/pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec -oyaml > pv_backup.yaml Note We can also take backup using external tools like Velero . See velero documentation for more information. Restore the backup on cluster-2 \u00b6 Create storageclass on the secondary cluster 1 [cluster-2]$ kubectl create -f deploy/examples/csi/rbd/storageclass.yaml Create VolumeReplicationClass on the secondary cluster 1 2 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication-class.yaml volumereplicationclass.replication.storage.openshift.io/rbd-volumereplicationclass created If Persistent Volumes and Claims are created manually on the secondary cluster, remove the claimRef on the backed up PV objects in yaml files; so that the PV can get bound to the new claim on the secondary cluster. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 ... spec : accessModes : - ReadWriteOnce capacity : storage : 1Gi claimRef : apiVersion : v1 kind : PersistentVolumeClaim name : rbd-pvc namespace : default resourceVersion : \"64252\" uid : 65dc0aac-5e15-4474-90f4-7a3532c621ec csi : ... Apply the Persistent Volume backup from the primary cluster 1 [cluster-2]$ kubectl create -f pv-backup.yaml Apply the Persistent Volume claim from the restored backup 1 [cluster-2]$ kubectl create -f pvc-backup.yaml 1 2 3 [cluster-2]$ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec 1Gi RWO rook-ceph-block 44s","title":"RBD Mirroring"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#disaster-recovery","text":"Disaster recovery (DR) is an organization's ability to react to and recover from an incident that negatively affects business operations. This plan comprises strategies for minimizing the consequences of a disaster, so an organization can continue to operate \u2013 or quickly resume the key operations. Thus, disaster recovery is one of the aspects of business continuity . One of the solutions, to achieve the same, is RBD mirroring .","title":"Disaster Recovery"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#rbd-mirroring","text":"RBD mirroring is an asynchronous replication of RBD images between multiple Ceph clusters. This capability is available in two modes: Journal-based: Every write to the RBD image is first recorded to the associated journal before modifying the actual image. The remote cluster will read from this associated journal and replay the updates to its local image. Snapshot-based: This mode uses periodically scheduled or manually created RBD image mirror-snapshots to replicate crash-consistent RBD images between clusters. Note This document sheds light on rbd mirroring and how to set it up using rook. See also the topic on Failover and Failback","title":"RBD Mirroring"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#create-rbd-pools","text":"In this section, we create specific RBD pools that are RBD mirroring enabled for use with the DR use case. Execute the following steps on each peer cluster to create mirror enabled pools: Create a RBD pool that is enabled for mirroring by adding the section spec.mirroring in the CephBlockPool CR: 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephBlockPool metadata : name : mirrored-pool namespace : rook-ceph spec : replicated : size : 1 mirroring : enabled : true mode : image 1 kubectl create -f pool-mirrored.yaml Repeat the steps on the peer cluster. Note Pool name across the cluster peers must be the same for RBD replication to function. See the CephBlockPool documentation for more details. Note It is also feasible to edit existing pools and enable them for replication.","title":"Create RBD Pools"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#bootstrap-peers","text":"In order for the rbd-mirror daemon to discover its peer cluster, the peer must be registered and a user account must be created. The following steps enable bootstrapping peers to discover and authenticate to each other: For Bootstrapping a peer cluster its bootstrap secret is required. To determine the name of the secret that contains the bootstrap secret execute the following command on the remote cluster (cluster-2) 1 [cluster-2]$ kubectl get cephblockpool.ceph.rook.io/mirrored-pool -n rook-ceph -ojsonpath = '{.status.info.rbdMirrorBootstrapPeerSecretName}' Here, pool-peer-token-mirrored-pool is the desired bootstrap secret name. The secret pool-peer-token-mirrored-pool contains all the information related to the token and needs to be injected to the peer, to fetch the decoded secret: 1 2 [cluster-2]$ kubectl get secret -n rook-ceph pool-peer-token-mirrored-pool -o jsonpath = '{.data.token}' | base64 -d eyJmc2lkIjoiNGQ1YmNiNDAtNDY3YS00OWVkLThjMGEtOWVhOGJkNDY2OTE3IiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFDZ3hmZGdxN013R0JBQWZzcUtCaGpZVjJUZDRxVzJYQm5kemc9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMzkuMzY6MzMwMCx2MToxOTIuMTY4LjM5LjM2OjY3ODldIn0= With this Decoded value, create a secret on the primary site (cluster-1): 1 [cluster-1]$ kubectl -n rook-ceph create secret generic rbd-primary-site-secret --from-literal = token = eyJmc2lkIjoiNGQ1YmNiNDAtNDY3YS00OWVkLThjMGEtOWVhOGJkNDY2OTE3IiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFDZ3hmZGdxN013R0JBQWZzcUtCaGpZVjJUZDRxVzJYQm5kemc9PSIsIm1vbl9ob3N0IjoiW3YyOjE5Mi4xNjguMzkuMzY6MzMwMCx2MToxOTIuMTY4LjM5LjM2OjY3ODldIn0 = --from-literal = pool = mirrored-pool This completes the bootstrap process for cluster-1 to be peered with cluster-2. Repeat the process switching cluster-2 in place of cluster-1, to complete the bootstrap process across both peer clusters. For more details, refer to the official rbd mirror documentation on how to create a bootstrap peer .","title":"Bootstrap Peers"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#configure-the-rbdmirror-daemon","text":"Replication is handled by the rbd-mirror daemon. The rbd-mirror daemon is responsible for pulling image updates from the remote, peer cluster, and applying them to image within the local cluster. Creation of the rbd-mirror daemon(s) is done through the custom resource definitions (CRDs), as follows: Create mirror.yaml, to deploy the rbd-mirror daemon 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephRBDMirror metadata : name : my-rbd-mirror namespace : rook-ceph spec : # the number of rbd-mirror daemons to deploy count : 1 Create the RBD mirror daemon 1 [cluster-1]$ kubectl create -f mirror.yaml -n rook-ceph Validate if rbd-mirror daemon pod is now up 1 2 [cluster-1]$ kubectl get pods -n rook-ceph rook-ceph-rbd-mirror-a-6985b47c8c-dpv4k 1/1 Running 0 10s Verify that daemon health is OK 1 2 kubectl get cephblockpools.ceph.rook.io mirrored-pool -n rook-ceph -o jsonpath='{.status.mirroringStatus.summary}' {\"daemon_health\":\"OK\",\"health\":\"OK\",\"image_health\":\"OK\",\"states\":{\"replaying\":1}} Repeat the above steps on the peer cluster. See the CephRBDMirror CRD for more details on the mirroring settings.","title":"Configure the RBDMirror Daemon"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#add-mirroring-peer-information-to-rbd-pools","text":"Each pool can have its own peer. To add the peer information, patch the already created mirroring enabled pool to update the CephBlockPool CRD. 1 [cluster-1]$ kubectl -n rook-ceph patch cephblockpool mirrored-pool --type merge -p '{\"spec\":{\"mirroring\":{\"peers\": {\"secretNames\": [\"rbd-primary-site-secret\"]}}}}'","title":"Add mirroring peer information to RBD pools"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#create-volumereplication-crds","text":"Volume Replication Operator follows controller pattern and provides extended APIs for storage disaster recovery. The extended APIs are provided via Custom Resource Definition(CRD). Create the VolumeReplication CRDs on all the peer clusters. 1 2 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.5.0/config/crd/bases/replication.storage.openshift.io_volumereplicationclasses.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.5.0/config/crd/bases/replication.storage.openshift.io_volumereplications.yaml","title":"Create VolumeReplication CRDs"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#enable-csi-replication-sidecars","text":"To achieve RBD Mirroring, csi-omap-generator and csi-addons containers need to be deployed in the RBD provisioner pods, which are not enabled by default. Omap Generator : Omap generator is a sidecar container that when deployed with the CSI provisioner pod, generates the internal CSI omaps between the PV and the RBD image. This is required as static PVs are transferred across peer clusters in the DR use case, and hence is needed to preserve PVC to storage mappings. Volume Replication Operator : Volume Replication Operator is a kubernetes operator that provides common and reusable APIs for storage disaster recovery. The volume replication operation is supported by the CSIAddons It is based on csi-addons/spec specification and can be used by any storage provider. Execute the following steps on each peer cluster to enable the OMap generator and CSIADDONS sidecars: Edit the rook-ceph-operator-config configmap and add the following configurations 1 kubectl edit cm rook-ceph-operator-config -n rook-ceph Add the following properties if not present: 1 2 3 data : CSI_ENABLE_OMAP_GENERATOR : \"true\" CSI_ENABLE_CSIADDONS : \"true\" After updating the configmap with those settings, two new sidecars should now start automatically in the CSI provisioner pod. Repeat the steps on the peer cluster.","title":"Enable CSI Replication Sidecars"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#volume-replication-custom-resources","text":"VolumeReplication CRDs provide support for two custom resources: VolumeReplicationClass : VolumeReplicationClass is a cluster scoped resource that contains driver related configuration parameters. It holds the storage admin information required for the volume replication operator. VolumeReplication : VolumeReplication is a namespaced resource that contains references to storage object to be replicated and VolumeReplicationClass corresponding to the driver providing replication.","title":"Volume Replication Custom Resources"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#enable-mirroring-on-a-pvc","text":"Below guide assumes that we have a PVC (rbd-pvc) in BOUND state; created using StorageClass with Retain reclaimPolicy. 1 2 3 [cluster-1]$ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec 1Gi RWO csi-rbd-sc 44s","title":"Enable mirroring on a PVC"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#create-a-volume-replication-class-cr","text":"In this case, we create a Volume Replication Class on cluster-1 1 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication-class.yaml Note The schedulingInterval can be specified in formats of minutes, hours or days using suffix m , h and d respectively. The optional schedulingStartTime can be specified using the ISO 8601 time format.","title":"Create a Volume Replication Class CR"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#create-a-volumereplication-cr","text":"Once VolumeReplicationClass is created, create a Volume Replication for the PVC which we intend to replicate to secondary cluster. 1 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication.yaml Note VolumeReplication is a namespace scoped object. Thus, it should be created in the same namespace as of PVC.","title":"Create a VolumeReplication CR"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#checking-replication-status","text":"replicationState is the state of the volume being referenced. Possible values are primary, secondary, and resync. primary denotes that the volume is primary. secondary denotes that the volume is secondary. resync denotes that the volume needs to be resynced. To check VolumeReplication CR status: 1 [cluster-1]$ kubectl get volumereplication pvc-volumereplication -oyaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 ... spec : dataSource : apiGroup : \"\" kind : PersistentVolumeClaim name : rbd-pvc replicationState : primary volumeReplicationClass : rbd-volumereplicationclass status : conditions : - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : Promoted status : \"True\" type : Completed - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : Healthy status : \"False\" type : Degraded - lastTransitionTime : \"2021-05-04T07:39:00Z\" message : \"\" observedGeneration : 1 reason : NotResyncing status : \"False\" type : Resyncing lastCompletionTime : \"2021-05-04T07:39:00Z\" lastStartTime : \"2021-05-04T07:38:59Z\" message : volume is marked primary observedGeneration : 1 state : Primary","title":"Checking Replication Status"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#backup-restore","text":"Note To effectively resume operations after a failover/relocation, backup of the kubernetes artifacts like deployment, PVC, PV, etc need to be created beforehand by the admin; so that the application can be restored on the peer cluster. Here, we take a backup of PVC and PV object on one site, so that they can be restored later to the peer cluster.","title":"Backup & Restore"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#take-backup-on-cluster-1","text":"Take backup of the PVC rbd-pvc 1 [cluster-1]$ kubectl get pvc rbd-pvc -oyaml > pvc-backup.yaml Take a backup of the PV, corresponding to the PVC 1 [cluster-1]$ kubectl get pv/pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec -oyaml > pv_backup.yaml Note We can also take backup using external tools like Velero . See velero documentation for more information.","title":"Take backup on cluster-1"},{"location":"Storage-Configuration/Block-Storage-RBD/rbd-mirroring/#restore-the-backup-on-cluster-2","text":"Create storageclass on the secondary cluster 1 [cluster-2]$ kubectl create -f deploy/examples/csi/rbd/storageclass.yaml Create VolumeReplicationClass on the secondary cluster 1 2 [cluster-1]$ kubectl apply -f deploy/examples/volume-replication-class.yaml volumereplicationclass.replication.storage.openshift.io/rbd-volumereplicationclass created If Persistent Volumes and Claims are created manually on the secondary cluster, remove the claimRef on the backed up PV objects in yaml files; so that the PV can get bound to the new claim on the secondary cluster. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 ... spec : accessModes : - ReadWriteOnce capacity : storage : 1Gi claimRef : apiVersion : v1 kind : PersistentVolumeClaim name : rbd-pvc namespace : default resourceVersion : \"64252\" uid : 65dc0aac-5e15-4474-90f4-7a3532c621ec csi : ... Apply the Persistent Volume backup from the primary cluster 1 [cluster-2]$ kubectl create -f pv-backup.yaml Apply the Persistent Volume claim from the restored backup 1 [cluster-2]$ kubectl create -f pvc-backup.yaml 1 2 3 [cluster-2]$ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-65dc0aac-5e15-4474-90f4-7a3532c621ec 1Gi RWO rook-ceph-block 44s","title":"Restore the backup on cluster-2"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/","text":"There are three CSI drivers integrated with Rook that will enable different scenarios: RBD: This block storage driver is optimized for RWO pod access where only one pod may access the storage. More information . CephFS: This file storage driver allows for RWX with one or more pods accessing the same storage. More information . NFS (experimental): This file storage driver allows creating NFS exports that can be mounted to pods, or the exports can be mounted directly via an NFS client from inside or outside the Kubernetes cluster. More information The Ceph Filesystem (CephFS) and RADOS Block Device (RBD) drivers are enabled automatically with the Rook operator. The NFS driver is disabled by default. All drivers will be started in the same namespace as the operator when the first CephCluster CR is created. Supported Versions \u00b6 The supported Ceph CSI version is 3.3.0 or greater with Rook. Refer to ceph csi releases for more information. Static Provisioning \u00b6 Both drivers also support the creation of static PV and static PVC from existing RBD image/CephFS volume. Refer to static PVC for more information. Configure CSI Drivers in non-default namespace \u00b6 If you've deployed the Rook operator in a namespace other than \"rook-ceph\", change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace \"my-namespace\" the provisioner value should be \"my-namespace.rbd.csi.ceph.com\". The same provisioner name needs to be set in both the storageclass and snapshotclass. Liveness Sidecar \u00b6 All CSI pods are deployed with a sidecar container that provides a prometheus metric for tracking if the CSI plugin is alive and running. These metrics are meant to be collected by prometheus but can be accesses through a GET request to a specific node ip. for example curl -X get http://[pod ip]:[liveness-port][liveness-path] 2>/dev/null | grep csi the expected output should be 1 2 3 4 $ curl -X GET http://10.109.65.142:9080/metrics 2 >/dev/null | grep csi # HELP csi_liveness Liveness Probe # TYPE csi_liveness gauge csi_liveness 1 Check the monitoring doc to see how to integrate CSI liveness and grpc metrics into ceph monitoring. Dynamically Expand Volume \u00b6 Prerequisite \u00b6 For filesystem resize to be supported for your Kubernetes cluster, the kubernetes version running in your cluster should be >= v1.15 and for block volume resize support the Kubernetes version should be >= v1.16. Also, ExpandCSIVolumes feature gate has to be enabled for the volume resize functionality to work. To expand the PVC the controlling StorageClass must have allowVolumeExpansion set to true . csi.storage.k8s.io/controller-expand-secret-name and csi.storage.k8s.io/controller-expand-secret-namespace values set in storageclass. Now expand the PVC by editing the PVC pvc.spec.resource.requests.storage to a higher values than the current size. Once PVC is expanded on backend and same is reflected size is reflected on application mountpoint, the status capacity pvc.status.capacity.storage of PVC will be updated to new size. RBD Mirroring \u00b6 To support RBD Mirroring, the CSI-Addons sidecar will be started in the RBD provisioner pod. The CSI-Addons supports the VolumeReplication operation. The volume replication controller provides common and reusable APIs for storage disaster recovery. It is based on csi-addons/spec specification and can be used by any storage provider. It follows the controller pattern and provides extended APIs for storage disaster recovery. The extended APIs are provided via Custom Resource Definitions (CRDs). Prerequisites \u00b6 Kubernetes version 1.21 or greater is required. Enable CSIAddons Sidecar \u00b6 To enable the CSIAddons sidecar and deploy the controller, Please follow the steps below Ephemeral volume support \u00b6 The generic ephemeral volume feature adds support for specifying PVCs in the volumes field to indicate a user would like to create a Volume as part of the pod spec. This feature requires the GenericEphemeralVolume feature gate to be enabled. For example: 1 2 3 4 5 6 7 8 9 10 11 12 13 kind : Pod apiVersion : v1 ... volumes : - name : mypvc ephemeral : volumeClaimTemplate : spec : accessModes : [ \"ReadWriteOnce\" ] storageClassName : \"rook-ceph-block\" resources : requests : storage : 1Gi A volume claim template is defined inside the pod spec which refers to a volume provisioned and used by the pod with its lifecycle. The volumes are provisioned when pod get spawned and destroyed at time of pod delete. Refer to ephemeral-doc for more info. Also, See the example manifests for an RBD ephemeral volume and a CephFS ephemeral volume . CSI-Addons Controller \u00b6 The CSI-Addons Controller handles the requests from users to initiate an operation. Users create a CR that the controller inspects, and forwards a request to one or more CSI-Addons side-cars for execution. Deploying the controller \u00b6 Users can deploy the controller by running the following commands: 1 2 3 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/crds.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/rbac.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/setup-controller.yaml This creates the required crds and configure permissions. Enable the CSI-Addons Sidecar \u00b6 To use the features provided by the CSI-Addons, the csi-addons containers need to be deployed in the RBD provisioner and nodeplugin pods, which are not enabled by default. Execute the following command in the cluster to enable the CSI-Addons sidecar: Update the rook-ceph-operator-config configmap and patch the following configurations 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_CSIADDONS\": \"true\"' After enabling CSI_ENABLE_CSIADDONS in the configmap, a new sidecar container with name csi-addons should now start automatically in the RBD CSI provisioner and nodeplugin pods. Note Make sure the version of ceph-csi used is v3.5.0+ . CSI-ADDONS Operation \u00b6 CSI-Addons supports the following operations: Reclaim Space Creating a ReclaimSpaceJob Creating a ReclaimSpaceCronJob Annotating PersistentVolumeClaims Annotating Namespace Network Fencing Creating a NetworkFence Volume Replication Creating VolumeReplicationClass Creating VolumeReplication CR Enable RBD Encryption Support \u00b6 Ceph-CSI supports encrypting individual RBD PersistentVolumeClaim with LUKS encryption. More details can be found here with full list of supported encryption configurations. A sample configmap can be found here . Note Rook also supports OSD encryption (see encryptedDevice option here ). Using both RBD PVC encryption and OSD encryption together will lead to double encryption and may reduce read/write performance. Unlike OSD encryption, existing ceph clusters can also enable Ceph-CSI RBD PVC encryption support and multiple kinds of encryption KMS can be used on the same ceph cluster using different storageclasses. Following steps demonstrate how to enable support for encryption: Create the rook-ceph-csi-kms-config configmap with required encryption configuration in the same namespace where the Rook operator is deployed. An example is shown below: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : v1 kind : ConfigMap metadata : name : rook-ceph-csi-kms-config namespace : rook-ceph data : config.json : |- { \"user-secret-metadata\": { \"encryptionKMSType\": \"metadata\", \"secretName\": \"storage-encryption-secret\" } } Update the rook-ceph-operator-config configmap and patch the following configurations 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_ENCRYPTION\": \"true\"' Create necessary resources (secrets, configmaps etc) as required by the encryption type. In this case, create storage-encryption-secret secret in the namespace of pvc as shown: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : storage-encryption-secret namespace : rook-ceph stringData : encryptionPassphrase : test-encryption Create a new storageclass with additional parameters encrypted: \"true\" and encryptionKMSID: \"\" . An example is show below: 1 2 3 4 5 6 7 8 9 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-block-encrypted parameters : # additional parameters required for encryption encrypted : \"true\" encryptionKMSID : \"user-secret-metadata\" # ... PVCs created using the new storageclass will be encrypted. Enable Read affinity for RBD volumes \u00b6 Ceph CSI supports mapping RBD volumes with krbd options to allow serving reads from an OSD in proximity to the client, according to OSD locations defined in the CRUSH map and topology labels on nodes. Refer to the krbd-options for more details. Execute the following steps: Patch the rook-ceph-operator-config configmap using the following command. 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_READ_AFFINITY\": \"true\"' Add topology labels to the Kubernetes nodes. The same labels may be used as mentioned in the OSD topology topic. (optional) Rook will pass the labels mentioned in osd-topology as the default set of labels. This can overridden to supply custom labels by updating the CSI_CRUSH_LOCATION_LABELS value in the rook-ceph-operator-config configmap. Ceph CSI will extract the CRUSH location from the topology labels found on the node and pass it though krbd options during mapping RBD volumes. Note This requires kernel version 5.8 or higher.","title":"Ceph CSI Drivers"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#supported-versions","text":"The supported Ceph CSI version is 3.3.0 or greater with Rook. Refer to ceph csi releases for more information.","title":"Supported Versions"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#static-provisioning","text":"Both drivers also support the creation of static PV and static PVC from existing RBD image/CephFS volume. Refer to static PVC for more information.","title":"Static Provisioning"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#configure-csi-drivers-in-non-default-namespace","text":"If you've deployed the Rook operator in a namespace other than \"rook-ceph\", change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace \"my-namespace\" the provisioner value should be \"my-namespace.rbd.csi.ceph.com\". The same provisioner name needs to be set in both the storageclass and snapshotclass.","title":"Configure CSI Drivers in non-default namespace"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#liveness-sidecar","text":"All CSI pods are deployed with a sidecar container that provides a prometheus metric for tracking if the CSI plugin is alive and running. These metrics are meant to be collected by prometheus but can be accesses through a GET request to a specific node ip. for example curl -X get http://[pod ip]:[liveness-port][liveness-path] 2>/dev/null | grep csi the expected output should be 1 2 3 4 $ curl -X GET http://10.109.65.142:9080/metrics 2 >/dev/null | grep csi # HELP csi_liveness Liveness Probe # TYPE csi_liveness gauge csi_liveness 1 Check the monitoring doc to see how to integrate CSI liveness and grpc metrics into ceph monitoring.","title":"Liveness Sidecar"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#dynamically-expand-volume","text":"","title":"Dynamically Expand Volume"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#prerequisite","text":"For filesystem resize to be supported for your Kubernetes cluster, the kubernetes version running in your cluster should be >= v1.15 and for block volume resize support the Kubernetes version should be >= v1.16. Also, ExpandCSIVolumes feature gate has to be enabled for the volume resize functionality to work. To expand the PVC the controlling StorageClass must have allowVolumeExpansion set to true . csi.storage.k8s.io/controller-expand-secret-name and csi.storage.k8s.io/controller-expand-secret-namespace values set in storageclass. Now expand the PVC by editing the PVC pvc.spec.resource.requests.storage to a higher values than the current size. Once PVC is expanded on backend and same is reflected size is reflected on application mountpoint, the status capacity pvc.status.capacity.storage of PVC will be updated to new size.","title":"Prerequisite"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#rbd-mirroring","text":"To support RBD Mirroring, the CSI-Addons sidecar will be started in the RBD provisioner pod. The CSI-Addons supports the VolumeReplication operation. The volume replication controller provides common and reusable APIs for storage disaster recovery. It is based on csi-addons/spec specification and can be used by any storage provider. It follows the controller pattern and provides extended APIs for storage disaster recovery. The extended APIs are provided via Custom Resource Definitions (CRDs).","title":"RBD Mirroring"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#prerequisites","text":"Kubernetes version 1.21 or greater is required.","title":"Prerequisites"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#enable-csiaddons-sidecar","text":"To enable the CSIAddons sidecar and deploy the controller, Please follow the steps below","title":"Enable CSIAddons Sidecar"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#ephemeral-volume-support","text":"The generic ephemeral volume feature adds support for specifying PVCs in the volumes field to indicate a user would like to create a Volume as part of the pod spec. This feature requires the GenericEphemeralVolume feature gate to be enabled. For example: 1 2 3 4 5 6 7 8 9 10 11 12 13 kind : Pod apiVersion : v1 ... volumes : - name : mypvc ephemeral : volumeClaimTemplate : spec : accessModes : [ \"ReadWriteOnce\" ] storageClassName : \"rook-ceph-block\" resources : requests : storage : 1Gi A volume claim template is defined inside the pod spec which refers to a volume provisioned and used by the pod with its lifecycle. The volumes are provisioned when pod get spawned and destroyed at time of pod delete. Refer to ephemeral-doc for more info. Also, See the example manifests for an RBD ephemeral volume and a CephFS ephemeral volume .","title":"Ephemeral volume support"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#csi-addons-controller","text":"The CSI-Addons Controller handles the requests from users to initiate an operation. Users create a CR that the controller inspects, and forwards a request to one or more CSI-Addons side-cars for execution.","title":"CSI-Addons Controller"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#deploying-the-controller","text":"Users can deploy the controller by running the following commands: 1 2 3 kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/crds.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/rbac.yaml kubectl create -f https://raw.githubusercontent.com/csi-addons/kubernetes-csi-addons/v0.7.0/deploy/controller/setup-controller.yaml This creates the required crds and configure permissions.","title":"Deploying the controller"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#enable-the-csi-addons-sidecar","text":"To use the features provided by the CSI-Addons, the csi-addons containers need to be deployed in the RBD provisioner and nodeplugin pods, which are not enabled by default. Execute the following command in the cluster to enable the CSI-Addons sidecar: Update the rook-ceph-operator-config configmap and patch the following configurations 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_CSIADDONS\": \"true\"' After enabling CSI_ENABLE_CSIADDONS in the configmap, a new sidecar container with name csi-addons should now start automatically in the RBD CSI provisioner and nodeplugin pods. Note Make sure the version of ceph-csi used is v3.5.0+ .","title":"Enable the CSI-Addons Sidecar"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#csi-addons-operation","text":"CSI-Addons supports the following operations: Reclaim Space Creating a ReclaimSpaceJob Creating a ReclaimSpaceCronJob Annotating PersistentVolumeClaims Annotating Namespace Network Fencing Creating a NetworkFence Volume Replication Creating VolumeReplicationClass Creating VolumeReplication CR","title":"CSI-ADDONS Operation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#enable-rbd-encryption-support","text":"Ceph-CSI supports encrypting individual RBD PersistentVolumeClaim with LUKS encryption. More details can be found here with full list of supported encryption configurations. A sample configmap can be found here . Note Rook also supports OSD encryption (see encryptedDevice option here ). Using both RBD PVC encryption and OSD encryption together will lead to double encryption and may reduce read/write performance. Unlike OSD encryption, existing ceph clusters can also enable Ceph-CSI RBD PVC encryption support and multiple kinds of encryption KMS can be used on the same ceph cluster using different storageclasses. Following steps demonstrate how to enable support for encryption: Create the rook-ceph-csi-kms-config configmap with required encryption configuration in the same namespace where the Rook operator is deployed. An example is shown below: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : v1 kind : ConfigMap metadata : name : rook-ceph-csi-kms-config namespace : rook-ceph data : config.json : |- { \"user-secret-metadata\": { \"encryptionKMSType\": \"metadata\", \"secretName\": \"storage-encryption-secret\" } } Update the rook-ceph-operator-config configmap and patch the following configurations 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_ENCRYPTION\": \"true\"' Create necessary resources (secrets, configmaps etc) as required by the encryption type. In this case, create storage-encryption-secret secret in the namespace of pvc as shown: 1 2 3 4 5 6 7 apiVersion : v1 kind : Secret metadata : name : storage-encryption-secret namespace : rook-ceph stringData : encryptionPassphrase : test-encryption Create a new storageclass with additional parameters encrypted: \"true\" and encryptionKMSID: \"\" . An example is show below: 1 2 3 4 5 6 7 8 9 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-block-encrypted parameters : # additional parameters required for encryption encrypted : \"true\" encryptionKMSID : \"user-secret-metadata\" # ... PVCs created using the new storageclass will be encrypted.","title":"Enable RBD Encryption Support"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-drivers/#enable-read-affinity-for-rbd-volumes","text":"Ceph CSI supports mapping RBD volumes with krbd options to allow serving reads from an OSD in proximity to the client, according to OSD locations defined in the CRUSH map and topology labels on nodes. Refer to the krbd-options for more details. Execute the following steps: Patch the rook-ceph-operator-config configmap using the following command. 1 kubectl patch cm rook-ceph-operator-config -nrook-ceph -p $'data:\\n \"CSI_ENABLE_READ_AFFINITY\": \"true\"' Add topology labels to the Kubernetes nodes. The same labels may be used as mentioned in the OSD topology topic. (optional) Rook will pass the labels mentioned in osd-topology as the default set of labels. This can overridden to supply custom labels by updating the CSI_CRUSH_LOCATION_LABELS value in the rook-ceph-operator-config configmap. Ceph CSI will extract the CRUSH location from the topology labels found on the node and pass it though krbd options during mapping RBD volumes. Note This requires kernel version 5.8 or higher.","title":"Enable Read affinity for RBD volumes"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/","text":"Prerequisites \u00b6 Rook officially supports v1 snapshots for Kubernetes v1.20+. Install the snapshot controller and snapshot v1 CRD as required. More info can be found here . Note If only Alpha snapshots are available, enable snapshotter in rook-ceph-operator-config or helm chart values.yaml , change the external-snapshotter image to registry.k8s.io/sig-storage/csi-snapshotter:v1.2.2 and refer to the alpha snapshots documentation VolumeSnapshot betav1 is deprecated in Kubernetes 1.20+ and removed in 1.24.0. If you still require betav1 snapshots, change the external-snapshotter image to registry.k8s.io/sig-storage/csi-snapshotter:v5.0.1 and refer to the betav1 snapshots documentation We also need a VolumeSnapshotClass for volume snapshot to work. The purpose of a VolumeSnapshotClass is defined in the kubernetes documentation . In short, as the documentation describes it: Info Just like StorageClass provides a way for administrators to describe the \"classes\" of storage they offer when provisioning a volume, VolumeSnapshotClass provides a way to describe the \"classes\" of storage when provisioning a volume snapshot. Upgrade Snapshot API \u00b6 If your Kubernetes version is updated to a newer version of the snapshot API, follow the upgrade guide here to upgrade from v1alpha1 to v1beta1, or v1beta1 to v1. RBD Snapshots \u00b6 VolumeSnapshotClass \u00b6 In VolumeSnapshotClass , the csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the rbdplugin and pool to reflect the Ceph pool name. Update the value of the clusterID field to match the namespace that Rook is running in. When Ceph CSI is deployed by Rook, the operator will automatically maintain a configmap whose contents will match this key. By default this is \"rook-ceph\". 1 kubectl create -f deploy/examples/csi/rbd/snapshotclass.yaml Volumesnapshot \u00b6 In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the RBD CSI driver. 1 kubectl create -f deploy/examples/csi/rbd/snapshot.yaml Verify RBD Snapshot Creation \u00b6 1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-rbdplugin-snapclass rook-ceph.rbd.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE rbd-pvc-snapshot true rbd-pvc 1Gi csi-rbdplugin-snapclass snapcontent-79090db0-7c66-4b18-bf4a-634772c7cac7 3h50m 3h51m The snapshot will be ready to restore to a new PVC when the READYTOUSE field of the volumesnapshot is set to true. Restore the snapshot to a new PVC \u00b6 In pvc-restore , dataSource should be the name of the VolumeSnapshot previously created. The dataSource kind should be the VolumeSnapshot . Create a new PVC from the snapshot 1 kubectl create -f deploy/examples/csi/rbd/pvc-restore.yaml Verify RBD Clone PVC Creation \u00b6 1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-84294e34-577a-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 34m rbd-pvc-restore Bound pvc-575537bf-577f-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 8s RBD snapshot resource Cleanup \u00b6 To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/rbd/pvc-restore.yaml kubectl delete -f deploy/examples/csi/rbd/snapshot.yaml kubectl delete -f deploy/examples/csi/rbd/snapshotclass.yaml CephFS Snapshots \u00b6 VolumeSnapshotClass \u00b6 In VolumeSnapshotClass , the csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the cephfsplugin. In the volumesnapshotclass, update the value of the clusterID field to match the namespace that Rook is running in. When Ceph CSI is deployed by Rook, the operator will automatically maintain a configmap whose contents will match this key. By default this is \"rook-ceph\". 1 kubectl create -f deploy/examples/csi/cephfs/snapshotclass.yaml VolumeSnapshot \u00b6 In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the CephFS CSI driver. 1 kubectl create -f deploy/examples/csi/cephfs/snapshot.yaml Verify CephFS Snapshot Creation \u00b6 1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-cephfslugin-snapclass rook-ceph.cephfs.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE cephfs-pvc-snapshot true cephfs-pvc 1Gi csi-cephfsplugin-snapclass snapcontent-34476204-a14a-4d59-bfbc-2bbba695652c 3h50m 3h51m The snapshot will be ready to restore to a new PVC when READYTOUSE field of the volumesnapshot is set to true. Restore the snapshot to a new PVC \u00b6 In pvc-restore , dataSource should be the name of the VolumeSnapshot previously created. The dataSource kind should be the VolumeSnapshot . Create a new PVC from the snapshot 1 kubectl create -f deploy/examples/csi/cephfs/pvc-restore.yaml Verify CephFS Restore PVC Creation \u00b6 1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE cephfs-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi RWX rook-cephfs 55m cephfs-pvc-restore Bound pvc-95308c75-6c93-4928-a551-6b5137192209 1Gi RWX rook-cephfs 34s CephFS snapshot resource Cleanup \u00b6 To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/cephfs/pvc-restore.yaml kubectl delete -f deploy/examples/csi/cephfs/snapshot.yaml kubectl delete -f deploy/examples/csi/cephfs/snapshotclass.yaml Limitations \u00b6 There is a limit of 400 snapshots per cephFS filesystem. The PVC cannot be deleted if it has snapshots. make sure all the snapshots on the PVC are deleted before you delete the PVC.","title":"Snapshots"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#prerequisites","text":"Rook officially supports v1 snapshots for Kubernetes v1.20+. Install the snapshot controller and snapshot v1 CRD as required. More info can be found here . Note If only Alpha snapshots are available, enable snapshotter in rook-ceph-operator-config or helm chart values.yaml , change the external-snapshotter image to registry.k8s.io/sig-storage/csi-snapshotter:v1.2.2 and refer to the alpha snapshots documentation VolumeSnapshot betav1 is deprecated in Kubernetes 1.20+ and removed in 1.24.0. If you still require betav1 snapshots, change the external-snapshotter image to registry.k8s.io/sig-storage/csi-snapshotter:v5.0.1 and refer to the betav1 snapshots documentation We also need a VolumeSnapshotClass for volume snapshot to work. The purpose of a VolumeSnapshotClass is defined in the kubernetes documentation . In short, as the documentation describes it: Info Just like StorageClass provides a way for administrators to describe the \"classes\" of storage they offer when provisioning a volume, VolumeSnapshotClass provides a way to describe the \"classes\" of storage when provisioning a volume snapshot.","title":"Prerequisites"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#upgrade-snapshot-api","text":"If your Kubernetes version is updated to a newer version of the snapshot API, follow the upgrade guide here to upgrade from v1alpha1 to v1beta1, or v1beta1 to v1.","title":"Upgrade Snapshot API"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#rbd-snapshots","text":"","title":"RBD Snapshots"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#volumesnapshotclass","text":"In VolumeSnapshotClass , the csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the rbdplugin and pool to reflect the Ceph pool name. Update the value of the clusterID field to match the namespace that Rook is running in. When Ceph CSI is deployed by Rook, the operator will automatically maintain a configmap whose contents will match this key. By default this is \"rook-ceph\". 1 kubectl create -f deploy/examples/csi/rbd/snapshotclass.yaml","title":"VolumeSnapshotClass"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#volumesnapshot","text":"In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the RBD CSI driver. 1 kubectl create -f deploy/examples/csi/rbd/snapshot.yaml","title":"Volumesnapshot"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#verify-rbd-snapshot-creation","text":"1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-rbdplugin-snapclass rook-ceph.rbd.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE rbd-pvc-snapshot true rbd-pvc 1Gi csi-rbdplugin-snapclass snapcontent-79090db0-7c66-4b18-bf4a-634772c7cac7 3h50m 3h51m The snapshot will be ready to restore to a new PVC when the READYTOUSE field of the volumesnapshot is set to true.","title":"Verify RBD Snapshot Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#restore-the-snapshot-to-a-new-pvc","text":"In pvc-restore , dataSource should be the name of the VolumeSnapshot previously created. The dataSource kind should be the VolumeSnapshot . Create a new PVC from the snapshot 1 kubectl create -f deploy/examples/csi/rbd/pvc-restore.yaml","title":"Restore the snapshot to a new PVC"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#verify-rbd-clone-pvc-creation","text":"1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-84294e34-577a-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 34m rbd-pvc-restore Bound pvc-575537bf-577f-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 8s","title":"Verify RBD Clone PVC Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#rbd-snapshot-resource-cleanup","text":"To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/rbd/pvc-restore.yaml kubectl delete -f deploy/examples/csi/rbd/snapshot.yaml kubectl delete -f deploy/examples/csi/rbd/snapshotclass.yaml","title":"RBD snapshot resource Cleanup"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#cephfs-snapshots","text":"","title":"CephFS Snapshots"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#volumesnapshotclass_1","text":"In VolumeSnapshotClass , the csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the cephfsplugin. In the volumesnapshotclass, update the value of the clusterID field to match the namespace that Rook is running in. When Ceph CSI is deployed by Rook, the operator will automatically maintain a configmap whose contents will match this key. By default this is \"rook-ceph\". 1 kubectl create -f deploy/examples/csi/cephfs/snapshotclass.yaml","title":"VolumeSnapshotClass"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#volumesnapshot_1","text":"In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the CephFS CSI driver. 1 kubectl create -f deploy/examples/csi/cephfs/snapshot.yaml","title":"VolumeSnapshot"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#verify-cephfs-snapshot-creation","text":"1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-cephfslugin-snapclass rook-ceph.cephfs.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE cephfs-pvc-snapshot true cephfs-pvc 1Gi csi-cephfsplugin-snapclass snapcontent-34476204-a14a-4d59-bfbc-2bbba695652c 3h50m 3h51m The snapshot will be ready to restore to a new PVC when READYTOUSE field of the volumesnapshot is set to true.","title":"Verify CephFS Snapshot Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#restore-the-snapshot-to-a-new-pvc_1","text":"In pvc-restore , dataSource should be the name of the VolumeSnapshot previously created. The dataSource kind should be the VolumeSnapshot . Create a new PVC from the snapshot 1 kubectl create -f deploy/examples/csi/cephfs/pvc-restore.yaml","title":"Restore the snapshot to a new PVC"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#verify-cephfs-restore-pvc-creation","text":"1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE cephfs-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi RWX rook-cephfs 55m cephfs-pvc-restore Bound pvc-95308c75-6c93-4928-a551-6b5137192209 1Gi RWX rook-cephfs 34s","title":"Verify CephFS Restore PVC Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#cephfs-snapshot-resource-cleanup","text":"To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/cephfs/pvc-restore.yaml kubectl delete -f deploy/examples/csi/cephfs/snapshot.yaml kubectl delete -f deploy/examples/csi/cephfs/snapshotclass.yaml","title":"CephFS snapshot resource Cleanup"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-snapshot/#limitations","text":"There is a limit of 400 snapshots per cephFS filesystem. The PVC cannot be deleted if it has snapshots. make sure all the snapshots on the PVC are deleted before you delete the PVC.","title":"Limitations"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/","text":"The CSI Volume Cloning feature adds support for specifying existing PVCs in the dataSource field to indicate a user would like to clone a Volume. A Clone is defined as a duplicate of an existing Kubernetes Volume that can be consumed as any standard Volume would be. The only difference is that upon provisioning, rather than creating a \"new\" empty Volume, the back end device creates an exact duplicate of the specified Volume. Refer to clone-doc for more info. RBD Volume Cloning \u00b6 Volume Clone Prerequisites \u00b6 Requires Kubernetes v1.16+ which supports volume clone. Ceph-csi diver v3.0.0+ which supports volume clone. Volume Cloning \u00b6 In pvc-clone , dataSource should be the name of the PVC which is already created by RBD CSI driver. The dataSource kind should be the PersistentVolumeClaim and also storageclass should be same as the source PVC . Create a new PVC Clone from the PVC 1 kubectl create -f deploy/examples/csi/rbd/pvc-clone.yaml Verify RBD volume Clone PVC Creation \u00b6 1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi >RWO rook-ceph-block 34m rbd-pvc-clone Bound pvc-70473135-577f-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 8s RBD clone resource Cleanup \u00b6 To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/rbd/pvc-clone.yaml CephFS Volume Cloning \u00b6 Volume Clone Prerequisites \u00b6 Requires Kubernetes v1.16+ which supports volume clone. Ceph-csi diver v3.1.0+ which supports volume clone. Volume Cloning \u00b6 In pvc-clone , dataSource should be the name of the PVC which is already created by CephFS CSI driver. The dataSource kind should be the PersistentVolumeClaim and also storageclass should be same as the source PVC . Create a new PVC Clone from the PVC 1 kubectl create -f deploy/examples/csi/cephfs/pvc-clone.yaml Verify CephFS volume Clone PVC Creation \u00b6 1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE cephfs-pvc Bound pvc-1ea51547-a88b-4ab0-8b4a-812caeaf025d 1Gi RWX rook-cephfs 39m cephfs-pvc-clone Bound pvc-b575bc35-d521-4c41-b4f9-1d733cd28fdf 1Gi RWX rook-cephfs 8s CephFS clone resource Cleanup \u00b6 To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/cephfs/pvc-clone.yaml","title":"Volume clone"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#rbd-volume-cloning","text":"","title":"RBD Volume Cloning"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#volume-clone-prerequisites","text":"Requires Kubernetes v1.16+ which supports volume clone. Ceph-csi diver v3.0.0+ which supports volume clone.","title":"Volume Clone Prerequisites"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#volume-cloning","text":"In pvc-clone , dataSource should be the name of the PVC which is already created by RBD CSI driver. The dataSource kind should be the PersistentVolumeClaim and also storageclass should be same as the source PVC . Create a new PVC Clone from the PVC 1 kubectl create -f deploy/examples/csi/rbd/pvc-clone.yaml","title":"Volume Cloning"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#verify-rbd-volume-clone-pvc-creation","text":"1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE rbd-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi >RWO rook-ceph-block 34m rbd-pvc-clone Bound pvc-70473135-577f-11e9-b34f-525400581048 1Gi RWO rook-ceph-block 8s","title":"Verify RBD volume Clone PVC Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#rbd-clone-resource-cleanup","text":"To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/rbd/pvc-clone.yaml","title":"RBD clone resource Cleanup"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#cephfs-volume-cloning","text":"","title":"CephFS Volume Cloning"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#volume-clone-prerequisites_1","text":"Requires Kubernetes v1.16+ which supports volume clone. Ceph-csi diver v3.1.0+ which supports volume clone.","title":"Volume Clone Prerequisites"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#volume-cloning_1","text":"In pvc-clone , dataSource should be the name of the PVC which is already created by CephFS CSI driver. The dataSource kind should be the PersistentVolumeClaim and also storageclass should be same as the source PVC . Create a new PVC Clone from the PVC 1 kubectl create -f deploy/examples/csi/cephfs/pvc-clone.yaml","title":"Volume Cloning"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#verify-cephfs-volume-clone-pvc-creation","text":"1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE cephfs-pvc Bound pvc-1ea51547-a88b-4ab0-8b4a-812caeaf025d 1Gi RWX rook-cephfs 39m cephfs-pvc-clone Bound pvc-b575bc35-d521-4c41-b4f9-1d733cd28fdf 1Gi RWX rook-cephfs 8s","title":"Verify CephFS volume Clone PVC Creation"},{"location":"Storage-Configuration/Ceph-CSI/ceph-csi-volume-clone/#cephfs-clone-resource-cleanup","text":"To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/cephfs/pvc-clone.yaml","title":"CephFS clone resource Cleanup"},{"location":"Storage-Configuration/Ceph-CSI/custom-images/","text":"By default, Rook will deploy the latest stable version of the Ceph CSI driver. Commonly, there is no need to change this default version that is deployed. For scenarios that require deploying a custom image (e.g. downstream releases), the defaults can be overridden with the following settings. The CSI configuration variables are found in the rook-ceph-operator-config ConfigMap. These settings can also be specified as environment variables on the operator deployment, though the configmap values will override the env vars if both are specified. 1 kubectl -n $ROOK_OPERATOR_NAMESPACE edit configmap rook-ceph-operator-config The default upstream images are included below, which you can change to your desired images. 1 2 3 4 5 6 7 ROOK_CSI_CEPH_IMAGE : \"quay.io/cephcsi/cephcsi:v3.9.0\" ROOK_CSI_REGISTRAR_IMAGE : \"registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.8.0\" ROOK_CSI_PROVISIONER_IMAGE : \"registry.k8s.io/sig-storage/csi-provisioner:v3.5.0\" ROOK_CSI_ATTACHER_IMAGE : \"registry.k8s.io/sig-storage/csi-attacher:v4.3.0\" ROOK_CSI_RESIZER_IMAGE : \"registry.k8s.io/sig-storage/csi-resizer:v1.8.0\" ROOK_CSI_SNAPSHOTTER_IMAGE : \"registry.k8s.io/sig-storage/csi-snapshotter:v6.2.2\" ROOK_CSIADDONS_IMAGE : \"quay.io/csiaddons/k8s-sidecar:v0.7.0\" Use private repository \u00b6 If image version is not passed along with the image name in any of the variables above, Rook will add the corresponding default version to that image. Example: if ROOK_CSI_CEPH_IMAGE: \"quay.io/private-repo/cephcsi\" is passed, Rook will add internal default version and consume it as \"quay.io/private-repo/cephcsi:v3.9.0\" . Use default images \u00b6 If you would like Rook to use the default upstream images, then you may simply remove all variables matching ROOK_CSI_*_IMAGE from the above ConfigMap and/or the operator deployment. Verifying updates \u00b6 You can use the below command to see the CSI images currently being used in the cluster. Note that not all images (like volumereplication-operator ) may be present in every cluster depending on which CSI features are enabled. 1 kubectl --namespace rook-ceph get pod -o jsonpath='{range .items[*]}{range .spec.containers[*]}{.image}{\"\\n\"}' -l 'app in (csi-rbdplugin,csi-rbdplugin-provisioner,csi-cephfsplugin,csi-cephfsplugin-provisioner)' | sort | uniq The default images can also be found with each release in the images list","title":"Custom Images"},{"location":"Storage-Configuration/Ceph-CSI/custom-images/#use-private-repository","text":"If image version is not passed along with the image name in any of the variables above, Rook will add the corresponding default version to that image. Example: if ROOK_CSI_CEPH_IMAGE: \"quay.io/private-repo/cephcsi\" is passed, Rook will add internal default version and consume it as \"quay.io/private-repo/cephcsi:v3.9.0\" .","title":"Use private repository"},{"location":"Storage-Configuration/Ceph-CSI/custom-images/#use-default-images","text":"If you would like Rook to use the default upstream images, then you may simply remove all variables matching ROOK_CSI_*_IMAGE from the above ConfigMap and/or the operator deployment.","title":"Use default images"},{"location":"Storage-Configuration/Ceph-CSI/custom-images/#verifying-updates","text":"You can use the below command to see the CSI images currently being used in the cluster. Note that not all images (like volumereplication-operator ) may be present in every cluster depending on which CSI features are enabled. 1 kubectl --namespace rook-ceph get pod -o jsonpath='{range .items[*]}{range .spec.containers[*]}{.image}{\"\\n\"}' -l 'app in (csi-rbdplugin,csi-rbdplugin-provisioner,csi-cephfsplugin,csi-cephfsplugin-provisioner)' | sort | uniq The default images can also be found with each release in the images list","title":"Verifying updates"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/","text":"The dashboard is a very helpful tool to give you an overview of the status of your Ceph cluster, including overall health, status of the mon quorum, status of the mgr, osd, and other Ceph daemons, view pools and PG status, show logs for the daemons, and more. Rook makes it simple to enable the dashboard. Enable the Ceph Dashboard \u00b6 The dashboard can be enabled with settings in the CephCluster CRD. The CephCluster CRD must have the dashboard enabled setting set to true . This is the default setting in the example manifests. 1 2 3 4 [ ... ] spec : dashboard : enabled : true The Rook operator will enable the ceph-mgr dashboard module. A service object will be created to expose that port inside the Kubernetes cluster. Rook will enable port 8443 for https access. This example shows that port 8443 was configured. 1 2 3 4 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 10.108.111.192  9283/TCP 3h rook-ceph-mgr-dashboard ClusterIP 10.110.113.240  8443/TCP 3h The first service is for reporting the Prometheus metrics , while the latter service is for the dashboard. If you are on a node in the cluster, you will be able to connect to the dashboard by using either the DNS name of the service at https://rook-ceph-mgr-dashboard-https:8443 or by connecting to the cluster IP, in this example at https://10.110.113.240:8443 . Login Credentials \u00b6 After you connect to the dashboard you will need to login for secure access. Rook creates a default user named admin and generates a secret called rook-ceph-dashboard-password in the namespace where the Rook Ceph cluster is running. To retrieve the generated password, you can run the following: 1 kubectl -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath=\"{['data']['password']}\" | base64 --decode && echo Configure the Dashboard \u00b6 The following dashboard configuration settings are supported: 1 2 3 4 5 spec : dashboard : urlPrefix : /ceph-dashboard port : 8443 ssl : true urlPrefix If you are accessing the dashboard via a reverse proxy, you may wish to serve it under a URL prefix. To get the dashboard to use hyperlinks that include your prefix, you can set the urlPrefix setting. port The port that the dashboard is served on may be changed from the default using the port setting. The corresponding K8s service exposing the port will automatically be updated. ssl The dashboard may be served without SSL (useful for when you deploy the dashboard behind a proxy already served using SSL) by setting the ssl option to be false. Visualization of 'Physical Disks' section in the dashboard \u00b6 Information about physical disks is available only in Rook host clusters . The Rook manager module is required by the dashboard to obtain the information about physical disks, but it is disabled by default. Before it is enabled, the dashboard 'Physical Disks' section will show an error message. To prepare the Rook manager module to be used in the dashboard, modify your Ceph Cluster CRD: 1 2 3 4 mgr : modules : - name : rook enabled : true And apply the changes: 1 $ kubectl apply -f cluster.yaml Once the Rook manager module is enabled as the orchestrator backend, there are two settings required for showing disk information: ROOK_ENABLE_DISCOVERY_DAEMON : Set to true to provide the dashboard the information about physical disks. The default is false . ROOK_DISCOVER_DEVICES_INTERVAL : The interval for changes to be refreshed in the set of physical disks in the cluster. The default is 60 minutes. Modify the operator.yaml, and apply the changes: 1 $ kubectl apply -f operator.yaml Viewing the Dashboard External to the Cluster \u00b6 Commonly you will want to view the dashboard from outside the cluster. For example, on a development machine with the cluster running inside minikube you will want to access the dashboard from the host. There are several ways to expose a service that will depend on the environment you are running in. You can use an Ingress Controller or other methods for exposing services such as NodePort, LoadBalancer, or ExternalIPs. Node Port \u00b6 The simplest way to expose the service in minikube or similar environment is using the NodePort to open a port on the VM that can be accessed by the host. To create a service with the NodePort, save this yaml as dashboard-external-https.yaml . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : v1 kind : Service metadata : name : rook-ceph-mgr-dashboard-external-https namespace : rook-ceph labels : app : rook-ceph-mgr rook_cluster : rook-ceph spec : ports : - name : dashboard port : 8443 protocol : TCP targetPort : 8443 selector : app : rook-ceph-mgr rook_cluster : rook-ceph sessionAffinity : None type : NodePort Now create the service: 1 kubectl create -f dashboard-external-https.yaml You will see the new service rook-ceph-mgr-dashboard-external-https created: 1 2 3 4 5 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 10.108.111.192  9283/TCP 4h rook-ceph-mgr-dashboard ClusterIP 10.110.113.240  8443/TCP 4h rook-ceph-mgr-dashboard-external-https NodePort 10.101.209.6  8443:31176/TCP 4h In this example, port 31176 will be opened to expose port 8443 from the ceph-mgr pod. Find the ip address of the VM. If using minikube, you can run minikube ip to find the ip address. Now you can enter the URL in your browser such as https://192.168.99.110:31176 and the dashboard will appear. Load Balancer \u00b6 If you have a cluster on a cloud provider that supports load balancers, you can create a service that is provisioned with a public hostname. The yaml is the same as dashboard-external-https.yaml except for the following property: 1 2 3 spec : [ ... ] type : LoadBalancer Now create the service: 1 kubectl create -f dashboard-loadbalancer.yaml You will see the new service rook-ceph-mgr-dashboard-loadbalancer created: 1 2 3 4 5 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 172.30.11.40  9283/TCP 4h rook-ceph-mgr-dashboard ClusterIP 172.30.203.185  8443/TCP 4h rook-ceph-mgr-dashboard-loadbalancer LoadBalancer 172.30.27.242 a7f23e8e2839511e9b7a5122b08f2038-1251669398.us-east-1.elb.amazonaws.com 8443:32747/TCP 4h Now you can enter the URL in your browser such as https://a7f23e8e2839511e9b7a5122b08f2038-1251669398.us-east-1.elb.amazonaws.com:8443 and the dashboard will appear. Ingress Controller \u00b6 If you have a cluster with an nginx Ingress Controller and a Certificate Manager (e.g. cert-manager ) then you can create an Ingress like the one below. This example achieves four things: Exposes the dashboard on the Internet (using a reverse proxy) Issues a valid TLS Certificate for the specified domain name (using ACME ) Tells the reverse proxy that the dashboard itself uses HTTPS Tells the reverse proxy that the dashboard itself does not have a valid certificate (it is self-signed) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 apiVersion : networking.k8s.io/v1 kind : Ingress metadata : name : rook-ceph-mgr-dashboard namespace : rook-ceph annotations : kubernetes.io/tls-acme : \"true\" nginx.ingress.kubernetes.io/backend-protocol : \"HTTPS\" nginx.ingress.kubernetes.io/server-snippet : | proxy_ssl_verify off; spec : ingressClassName : \"nginx\" tls : - hosts : - rook-ceph.example.com secretName : rook-ceph.example.com rules : - host : rook-ceph.example.com http : paths : - path : / pathType : Prefix backend : service : name : rook-ceph-mgr-dashboard port : name : https-dashboard Customise the Ingress resource to match your cluster. Replace the example domain name rook-ceph.example.com with a domain name that will resolve to your Ingress Controller (creating the DNS entry if required). Now create the Ingress: 1 kubectl create -f dashboard-ingress-https.yaml You will see the new Ingress rook-ceph-mgr-dashboard created: 1 2 3 $ kubectl -n rook-ceph get ingress NAME HOSTS ADDRESS PORTS AGE rook-ceph-mgr-dashboard rook-ceph.example.com 80, 443 5m And the new Secret for the TLS certificate: 1 2 3 kubectl -n rook-ceph get secret rook-ceph.example.com NAME TYPE DATA AGE rook-ceph.example.com kubernetes.io/tls 2 4m You can now browse to https://rook-ceph.example.com/ to log into the dashboard.","title":"Ceph Dashboard"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#enable-the-ceph-dashboard","text":"The dashboard can be enabled with settings in the CephCluster CRD. The CephCluster CRD must have the dashboard enabled setting set to true . This is the default setting in the example manifests. 1 2 3 4 [ ... ] spec : dashboard : enabled : true The Rook operator will enable the ceph-mgr dashboard module. A service object will be created to expose that port inside the Kubernetes cluster. Rook will enable port 8443 for https access. This example shows that port 8443 was configured. 1 2 3 4 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 10.108.111.192  9283/TCP 3h rook-ceph-mgr-dashboard ClusterIP 10.110.113.240  8443/TCP 3h The first service is for reporting the Prometheus metrics , while the latter service is for the dashboard. If you are on a node in the cluster, you will be able to connect to the dashboard by using either the DNS name of the service at https://rook-ceph-mgr-dashboard-https:8443 or by connecting to the cluster IP, in this example at https://10.110.113.240:8443 .","title":"Enable the Ceph Dashboard"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#login-credentials","text":"After you connect to the dashboard you will need to login for secure access. Rook creates a default user named admin and generates a secret called rook-ceph-dashboard-password in the namespace where the Rook Ceph cluster is running. To retrieve the generated password, you can run the following: 1 kubectl -n rook-ceph get secret rook-ceph-dashboard-password -o jsonpath=\"{['data']['password']}\" | base64 --decode && echo","title":"Login Credentials"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#configure-the-dashboard","text":"The following dashboard configuration settings are supported: 1 2 3 4 5 spec : dashboard : urlPrefix : /ceph-dashboard port : 8443 ssl : true urlPrefix If you are accessing the dashboard via a reverse proxy, you may wish to serve it under a URL prefix. To get the dashboard to use hyperlinks that include your prefix, you can set the urlPrefix setting. port The port that the dashboard is served on may be changed from the default using the port setting. The corresponding K8s service exposing the port will automatically be updated. ssl The dashboard may be served without SSL (useful for when you deploy the dashboard behind a proxy already served using SSL) by setting the ssl option to be false.","title":"Configure the Dashboard"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#visualization-of-physical-disks-section-in-the-dashboard","text":"Information about physical disks is available only in Rook host clusters . The Rook manager module is required by the dashboard to obtain the information about physical disks, but it is disabled by default. Before it is enabled, the dashboard 'Physical Disks' section will show an error message. To prepare the Rook manager module to be used in the dashboard, modify your Ceph Cluster CRD: 1 2 3 4 mgr : modules : - name : rook enabled : true And apply the changes: 1 $ kubectl apply -f cluster.yaml Once the Rook manager module is enabled as the orchestrator backend, there are two settings required for showing disk information: ROOK_ENABLE_DISCOVERY_DAEMON : Set to true to provide the dashboard the information about physical disks. The default is false . ROOK_DISCOVER_DEVICES_INTERVAL : The interval for changes to be refreshed in the set of physical disks in the cluster. The default is 60 minutes. Modify the operator.yaml, and apply the changes: 1 $ kubectl apply -f operator.yaml","title":"Visualization of 'Physical Disks' section in the dashboard"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#viewing-the-dashboard-external-to-the-cluster","text":"Commonly you will want to view the dashboard from outside the cluster. For example, on a development machine with the cluster running inside minikube you will want to access the dashboard from the host. There are several ways to expose a service that will depend on the environment you are running in. You can use an Ingress Controller or other methods for exposing services such as NodePort, LoadBalancer, or ExternalIPs.","title":"Viewing the Dashboard External to the Cluster"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#node-port","text":"The simplest way to expose the service in minikube or similar environment is using the NodePort to open a port on the VM that can be accessed by the host. To create a service with the NodePort, save this yaml as dashboard-external-https.yaml . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : v1 kind : Service metadata : name : rook-ceph-mgr-dashboard-external-https namespace : rook-ceph labels : app : rook-ceph-mgr rook_cluster : rook-ceph spec : ports : - name : dashboard port : 8443 protocol : TCP targetPort : 8443 selector : app : rook-ceph-mgr rook_cluster : rook-ceph sessionAffinity : None type : NodePort Now create the service: 1 kubectl create -f dashboard-external-https.yaml You will see the new service rook-ceph-mgr-dashboard-external-https created: 1 2 3 4 5 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 10.108.111.192  9283/TCP 4h rook-ceph-mgr-dashboard ClusterIP 10.110.113.240  8443/TCP 4h rook-ceph-mgr-dashboard-external-https NodePort 10.101.209.6  8443:31176/TCP 4h In this example, port 31176 will be opened to expose port 8443 from the ceph-mgr pod. Find the ip address of the VM. If using minikube, you can run minikube ip to find the ip address. Now you can enter the URL in your browser such as https://192.168.99.110:31176 and the dashboard will appear.","title":"Node Port"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#load-balancer","text":"If you have a cluster on a cloud provider that supports load balancers, you can create a service that is provisioned with a public hostname. The yaml is the same as dashboard-external-https.yaml except for the following property: 1 2 3 spec : [ ... ] type : LoadBalancer Now create the service: 1 kubectl create -f dashboard-loadbalancer.yaml You will see the new service rook-ceph-mgr-dashboard-loadbalancer created: 1 2 3 4 5 $ kubectl -n rook-ceph get service NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mgr ClusterIP 172.30.11.40  9283/TCP 4h rook-ceph-mgr-dashboard ClusterIP 172.30.203.185  8443/TCP 4h rook-ceph-mgr-dashboard-loadbalancer LoadBalancer 172.30.27.242 a7f23e8e2839511e9b7a5122b08f2038-1251669398.us-east-1.elb.amazonaws.com 8443:32747/TCP 4h Now you can enter the URL in your browser such as https://a7f23e8e2839511e9b7a5122b08f2038-1251669398.us-east-1.elb.amazonaws.com:8443 and the dashboard will appear.","title":"Load Balancer"},{"location":"Storage-Configuration/Monitoring/ceph-dashboard/#ingress-controller","text":"If you have a cluster with an nginx Ingress Controller and a Certificate Manager (e.g. cert-manager ) then you can create an Ingress like the one below. This example achieves four things: Exposes the dashboard on the Internet (using a reverse proxy) Issues a valid TLS Certificate for the specified domain name (using ACME ) Tells the reverse proxy that the dashboard itself uses HTTPS Tells the reverse proxy that the dashboard itself does not have a valid certificate (it is self-signed) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 apiVersion : networking.k8s.io/v1 kind : Ingress metadata : name : rook-ceph-mgr-dashboard namespace : rook-ceph annotations : kubernetes.io/tls-acme : \"true\" nginx.ingress.kubernetes.io/backend-protocol : \"HTTPS\" nginx.ingress.kubernetes.io/server-snippet : | proxy_ssl_verify off; spec : ingressClassName : \"nginx\" tls : - hosts : - rook-ceph.example.com secretName : rook-ceph.example.com rules : - host : rook-ceph.example.com http : paths : - path : / pathType : Prefix backend : service : name : rook-ceph-mgr-dashboard port : name : https-dashboard Customise the Ingress resource to match your cluster. Replace the example domain name rook-ceph.example.com with a domain name that will resolve to your Ingress Controller (creating the DNS entry if required). Now create the Ingress: 1 kubectl create -f dashboard-ingress-https.yaml You will see the new Ingress rook-ceph-mgr-dashboard created: 1 2 3 $ kubectl -n rook-ceph get ingress NAME HOSTS ADDRESS PORTS AGE rook-ceph-mgr-dashboard rook-ceph.example.com 80, 443 5m And the new Secret for the TLS certificate: 1 2 3 kubectl -n rook-ceph get secret rook-ceph.example.com NAME TYPE DATA AGE rook-ceph.example.com kubernetes.io/tls 2 4m You can now browse to https://rook-ceph.example.com/ to log into the dashboard.","title":"Ingress Controller"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/","text":"Each Rook Ceph cluster has some built in metrics collectors/exporters for monitoring with Prometheus . If you do not have Prometheus running, follow the steps below to enable monitoring of Rook. If your cluster already contains a Prometheus instance, it will automatically discover Rook's scrape endpoint using the standard prometheus.io/scrape and prometheus.io/port annotations. Attention This assumes that the Prometheus instances is searching all your Kubernetes namespaces for Pods with these annotations. If prometheus is already installed in a cluster, it may not be configured to watch for third-party service monitors such as for Rook. Normally you should be able to add the prometheus annotations prometheus.io/scrape=true and prometheus.io/port={port} and prometheus would automatically configure the scrape points and start gathering metrics. If prometheus isn't configured to do this, see the prometheus operator docs . Prometheus Operator \u00b6 First the Prometheus operator needs to be started in the cluster so it can watch for our requests to start monitoring Rook and respond by deploying the correct Prometheus pods and configuration. A full explanation can be found in the Prometheus operator repository on GitHub , but the quick instructions can be found here: 1 kubectl apply -f https://raw.githubusercontent.com/coreos/prometheus-operator/v0.40.0/bundle.yaml This will start the Prometheus operator, but before moving on, wait until the operator is in the Running state: 1 kubectl get pod Once the Prometheus operator is in the Running state, proceed to the next section to create a Prometheus instance. Prometheus Instances \u00b6 With the Prometheus operator running, we can create service monitors that will watch the Rook cluster. There are two sources for metrics collection: Prometheus manager module: It is responsible for exposing all metrics other than ceph daemons performance counters. Ceph exporter: It is responsible for exposing only ceph daemons performance counters as prometheus metrics. From the root of your locally cloned Rook repo, go the monitoring directory: 1 2 $ git clone --single-branch --branch master https://github.com/rook/rook.git cd rook/deploy/examples/monitoring Create the service monitor as well as the Prometheus server pod and service: 1 2 3 4 kubectl create -f service-monitor.yaml kubectl create -f exporter-service-monitor.yaml kubectl create -f prometheus.yaml kubectl create -f prometheus-service.yaml Ensure that the Prometheus server pod gets created and advances to the Running state before moving on: 1 kubectl -n rook-ceph get pod prometheus-rook-prometheus-0 Note It is not recommended to consume storage from the Ceph cluster for Prometheus. If the Ceph cluster fails, Prometheus would become unresponsive and thus not alert you of the failure. Prometheus Web Console \u00b6 Once the Prometheus server is running, you can open a web browser and go to the URL that is output from this command: 1 echo \"http://$(kubectl -n rook-ceph -o jsonpath={.status.hostIP} get pod prometheus-rook-prometheus-0):30900\" You should now see the Prometheus monitoring website. Click on Graph in the top navigation bar. In the dropdown that says insert metric at cursor , select any metric you would like to see, for example ceph_cluster_total_used_bytes Click on the Execute button. Below the Execute button, ensure the Graph tab is selected and you should now see a graph of your chosen metric over time. Prometheus Consoles \u00b6 You can find Prometheus Consoles for and from Ceph here: GitHub ceph/cephmetrics - dashboards/current directory . A guide to how you can write your own Prometheus consoles can be found on the official Prometheus site here: Prometheus.io Documentation - Console Templates . Prometheus Alerts \u00b6 To enable the Ceph Prometheus alerts via the helm charts, set the following properties in values.yaml: rook-ceph chart: monitoring.enabled: true rook-ceph-cluster chart: monitoring.enabled: true monitoring.createPrometheusRules: true Alternatively, to enable the Ceph Prometheus alerts with example manifests follow these steps: Create the RBAC and prometheus rules: 1 2 kubectl create -f deploy/examples/monitoring/rbac.yaml kubectl create -f deploy/examples/monitoring/localrules.yaml Make following changes to your CephCluster object (e.g., cluster.yaml ). 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph [ ... ] spec : [ ... ] monitoring : enabled : true [ ... ] Deploy or update the CephCluster object. 1 kubectl apply -f cluster.yaml Note This expects the Prometheus Operator and a Prometheus instance to be pre-installed by the admin. Customize Alerts \u00b6 The Prometheus alerts can be customized with a post-processor using tools such as Kustomize . For example, first extract the helm chart: 1 helm template -f values.yaml rook-release/rook-ceph-cluster > cluster-chart.yaml Now create the desired customization configuration files. This simple example will show how to update the severity of a rule, add a label to a rule, and change the for time value. Create a file named kustomization.yaml: 1 2 3 4 5 6 7 8 9 patches : - path : modifications.yaml target : group : monitoring.coreos.com kind : PrometheusRule name : prometheus-ceph-rules version : v1 resources : - cluster-chart.yaml Create a file named modifications.yaml 1 2 3 4 5 6 7 8 - op : add path : /spec/groups/0/rules/0/labels value : my-label : foo severity : none - op : add path : /spec/groups/0/rules/0/for value : 15m Finally, run kustomize to update the desired prometheus rules: 1 2 kustomize build . > updated-chart.yaml kubectl create -f updated-chart.yaml Grafana Dashboards \u00b6 The dashboards have been created by @galexrt . For feedback on the dashboards please reach out to him on the Rook.io Slack . Note The dashboards are only compatible with Grafana 7.2.0 or higher. Also note that the dashboards are updated from time to time, to fix issues and improve them. The following Grafana dashboards are available: Ceph - Cluster Ceph - OSD (Single) Ceph - Pools Updates and Upgrades \u00b6 When updating Rook, there may be updates to RBAC for monitoring. It is easy to apply the changes with each update or upgrade. This should be done at the same time you update Rook common resources like common.yaml . 1 kubectl apply -f deploy/examples/monitoring/rbac.yaml Hint This is updated automatically if you are upgrading via the helm chart Teardown \u00b6 To clean up all the artifacts created by the monitoring walk-through, copy/paste the entire block below (note that errors about resources \"not found\" can be ignored): 1 2 3 4 kubectl delete -f service-monitor.yaml kubectl delete -f prometheus.yaml kubectl delete -f prometheus-service.yaml kubectl delete -f https://raw.githubusercontent.com/coreos/prometheus-operator/v0.40.0/bundle.yaml Then the rest of the instructions in the Prometheus Operator docs can be followed to finish cleaning up. Special Cases \u00b6 Tectonic Bare Metal \u00b6 Tectonic strongly discourages the tectonic-system Prometheus instance to be used outside their intentions, so you need to create a new Prometheus Operator yourself. After this you only need to create the service monitor as stated above. CSI Liveness \u00b6 To integrate CSI liveness and grpc into ceph monitoring we will need to deploy a service and service monitor. 1 kubectl create -f csi-metrics-service-monitor.yaml This will create the service monitor to have prometheus monitor CSI Collecting RBD per-image IO statistics \u00b6 RBD per-image IO statistics collection is disabled by default. This can be enabled by setting enableRBDStats: true in the CephBlockPool spec. Prometheus does not need to be restarted after enabling it. Using custom label selectors in Prometheus \u00b6 If Prometheus needs to select specific resources, we can do so by injecting labels into these objects and using it as label selector. 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph [ ... ] spec : [ ... ] labels : monitoring : prometheus : k8s [ ... ] Horizontal Pod Scaling using Kubernetes Event-driven Autoscaling (KEDA) \u00b6 Using metrics exported from the Prometheus service, the horizontal pod scaling can use the custom metrics other than CPU and memory consumption. It can be done with help of Prometheus Scaler provided by the KEDA . See the KEDA deployment guide for details. Following is an example to autoscale RGW: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : keda.sh/v1alpha1 kind : ScaledObject metadata : name : rgw-scale namespace : rook-ceph spec : scaleTargetRef : kind : Deployment name : rook-ceph-rgw-my-store-a # deployment for the autoscaling minReplicaCount : 1 maxReplicaCount : 5 triggers : - type : prometheus metadata : serverAddress : http://rook-prometheus.rook-ceph.svc:9090 metricName : collecting_ceph_rgw_put query : | sum(rate(ceph_rgw_put[2m])) # prometheus query used for autoscaling threshold : \"90\" Warning During reconciliation of a CephObjectStore , the Rook Operator will reset the replica count for RGW which was set by horizontal pod scaler. The horizontal pod autoscaler will change the again once it re-evaluates the rule. This can result in a performance hiccup of several seconds after a reconciliation. This is briefly discussed (here)[ https://github.com/rook/rook/issues/10001 ]","title":"Prometheus Monitoring"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#prometheus-operator","text":"First the Prometheus operator needs to be started in the cluster so it can watch for our requests to start monitoring Rook and respond by deploying the correct Prometheus pods and configuration. A full explanation can be found in the Prometheus operator repository on GitHub , but the quick instructions can be found here: 1 kubectl apply -f https://raw.githubusercontent.com/coreos/prometheus-operator/v0.40.0/bundle.yaml This will start the Prometheus operator, but before moving on, wait until the operator is in the Running state: 1 kubectl get pod Once the Prometheus operator is in the Running state, proceed to the next section to create a Prometheus instance.","title":"Prometheus Operator"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#prometheus-instances","text":"With the Prometheus operator running, we can create service monitors that will watch the Rook cluster. There are two sources for metrics collection: Prometheus manager module: It is responsible for exposing all metrics other than ceph daemons performance counters. Ceph exporter: It is responsible for exposing only ceph daemons performance counters as prometheus metrics. From the root of your locally cloned Rook repo, go the monitoring directory: 1 2 $ git clone --single-branch --branch master https://github.com/rook/rook.git cd rook/deploy/examples/monitoring Create the service monitor as well as the Prometheus server pod and service: 1 2 3 4 kubectl create -f service-monitor.yaml kubectl create -f exporter-service-monitor.yaml kubectl create -f prometheus.yaml kubectl create -f prometheus-service.yaml Ensure that the Prometheus server pod gets created and advances to the Running state before moving on: 1 kubectl -n rook-ceph get pod prometheus-rook-prometheus-0 Note It is not recommended to consume storage from the Ceph cluster for Prometheus. If the Ceph cluster fails, Prometheus would become unresponsive and thus not alert you of the failure.","title":"Prometheus Instances"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#prometheus-web-console","text":"Once the Prometheus server is running, you can open a web browser and go to the URL that is output from this command: 1 echo \"http://$(kubectl -n rook-ceph -o jsonpath={.status.hostIP} get pod prometheus-rook-prometheus-0):30900\" You should now see the Prometheus monitoring website. Click on Graph in the top navigation bar. In the dropdown that says insert metric at cursor , select any metric you would like to see, for example ceph_cluster_total_used_bytes Click on the Execute button. Below the Execute button, ensure the Graph tab is selected and you should now see a graph of your chosen metric over time.","title":"Prometheus Web Console"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#prometheus-consoles","text":"You can find Prometheus Consoles for and from Ceph here: GitHub ceph/cephmetrics - dashboards/current directory . A guide to how you can write your own Prometheus consoles can be found on the official Prometheus site here: Prometheus.io Documentation - Console Templates .","title":"Prometheus Consoles"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#prometheus-alerts","text":"To enable the Ceph Prometheus alerts via the helm charts, set the following properties in values.yaml: rook-ceph chart: monitoring.enabled: true rook-ceph-cluster chart: monitoring.enabled: true monitoring.createPrometheusRules: true Alternatively, to enable the Ceph Prometheus alerts with example manifests follow these steps: Create the RBAC and prometheus rules: 1 2 kubectl create -f deploy/examples/monitoring/rbac.yaml kubectl create -f deploy/examples/monitoring/localrules.yaml Make following changes to your CephCluster object (e.g., cluster.yaml ). 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph [ ... ] spec : [ ... ] monitoring : enabled : true [ ... ] Deploy or update the CephCluster object. 1 kubectl apply -f cluster.yaml Note This expects the Prometheus Operator and a Prometheus instance to be pre-installed by the admin.","title":"Prometheus Alerts"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#customize-alerts","text":"The Prometheus alerts can be customized with a post-processor using tools such as Kustomize . For example, first extract the helm chart: 1 helm template -f values.yaml rook-release/rook-ceph-cluster > cluster-chart.yaml Now create the desired customization configuration files. This simple example will show how to update the severity of a rule, add a label to a rule, and change the for time value. Create a file named kustomization.yaml: 1 2 3 4 5 6 7 8 9 patches : - path : modifications.yaml target : group : monitoring.coreos.com kind : PrometheusRule name : prometheus-ceph-rules version : v1 resources : - cluster-chart.yaml Create a file named modifications.yaml 1 2 3 4 5 6 7 8 - op : add path : /spec/groups/0/rules/0/labels value : my-label : foo severity : none - op : add path : /spec/groups/0/rules/0/for value : 15m Finally, run kustomize to update the desired prometheus rules: 1 2 kustomize build . > updated-chart.yaml kubectl create -f updated-chart.yaml","title":"Customize Alerts"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#grafana-dashboards","text":"The dashboards have been created by @galexrt . For feedback on the dashboards please reach out to him on the Rook.io Slack . Note The dashboards are only compatible with Grafana 7.2.0 or higher. Also note that the dashboards are updated from time to time, to fix issues and improve them. The following Grafana dashboards are available: Ceph - Cluster Ceph - OSD (Single) Ceph - Pools","title":"Grafana Dashboards"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#updates-and-upgrades","text":"When updating Rook, there may be updates to RBAC for monitoring. It is easy to apply the changes with each update or upgrade. This should be done at the same time you update Rook common resources like common.yaml . 1 kubectl apply -f deploy/examples/monitoring/rbac.yaml Hint This is updated automatically if you are upgrading via the helm chart","title":"Updates and Upgrades"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#teardown","text":"To clean up all the artifacts created by the monitoring walk-through, copy/paste the entire block below (note that errors about resources \"not found\" can be ignored): 1 2 3 4 kubectl delete -f service-monitor.yaml kubectl delete -f prometheus.yaml kubectl delete -f prometheus-service.yaml kubectl delete -f https://raw.githubusercontent.com/coreos/prometheus-operator/v0.40.0/bundle.yaml Then the rest of the instructions in the Prometheus Operator docs can be followed to finish cleaning up.","title":"Teardown"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#special-cases","text":"","title":"Special Cases"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#tectonic-bare-metal","text":"Tectonic strongly discourages the tectonic-system Prometheus instance to be used outside their intentions, so you need to create a new Prometheus Operator yourself. After this you only need to create the service monitor as stated above.","title":"Tectonic Bare Metal"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#csi-liveness","text":"To integrate CSI liveness and grpc into ceph monitoring we will need to deploy a service and service monitor. 1 kubectl create -f csi-metrics-service-monitor.yaml This will create the service monitor to have prometheus monitor CSI","title":"CSI Liveness"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#collecting-rbd-per-image-io-statistics","text":"RBD per-image IO statistics collection is disabled by default. This can be enabled by setting enableRBDStats: true in the CephBlockPool spec. Prometheus does not need to be restarted after enabling it.","title":"Collecting RBD per-image IO statistics"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#using-custom-label-selectors-in-prometheus","text":"If Prometheus needs to select specific resources, we can do so by injecting labels into these objects and using it as label selector. 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : ceph.rook.io/v1 kind : CephCluster metadata : name : rook-ceph namespace : rook-ceph [ ... ] spec : [ ... ] labels : monitoring : prometheus : k8s [ ... ]","title":"Using custom label selectors in Prometheus"},{"location":"Storage-Configuration/Monitoring/ceph-monitoring/#horizontal-pod-scaling-using-kubernetes-event-driven-autoscaling-keda","text":"Using metrics exported from the Prometheus service, the horizontal pod scaling can use the custom metrics other than CPU and memory consumption. It can be done with help of Prometheus Scaler provided by the KEDA . See the KEDA deployment guide for details. Following is an example to autoscale RGW: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 apiVersion : keda.sh/v1alpha1 kind : ScaledObject metadata : name : rgw-scale namespace : rook-ceph spec : scaleTargetRef : kind : Deployment name : rook-ceph-rgw-my-store-a # deployment for the autoscaling minReplicaCount : 1 maxReplicaCount : 5 triggers : - type : prometheus metadata : serverAddress : http://rook-prometheus.rook-ceph.svc:9090 metricName : collecting_ceph_rgw_put query : | sum(rate(ceph_rgw_put[2m])) # prometheus query used for autoscaling threshold : \"90\" Warning During reconciliation of a CephObjectStore , the Rook Operator will reset the replica count for RGW which was set by horizontal pod scaler. The horizontal pod autoscaler will change the again once it re-evaluates the rule. This can result in a performance hiccup of several seconds after a reconciliation. This is briefly discussed (here)[ https://github.com/rook/rook/issues/10001 ]","title":"Horizontal Pod Scaling using Kubernetes Event-driven Autoscaling (KEDA)"},{"location":"Storage-Configuration/NFS/nfs-advanced/","text":"All CephNFS daemons are configured using shared RADOS objects stored in a Ceph pool named .nfs . Users can modify the configuration object for each CephNFS cluster if they wish to customize the configuration. Changing configuration of the .nfs pool \u00b6 By default, Rook creates the .nfs pool with Ceph's default configuration. If you wish to change the configuration of this pool (for example to change its failure domain or replication factor), you can create a CephBlockPool with the spec.name field set to .nfs . This pool must be replicated and cannot be erasure coded. deploy/examples/nfs.yaml contains a sample for reference. Adding custom NFS-Ganesha config file changes \u00b6 Ceph uses NFS-Ganesha servers. The config file format for these objects is documented in the NFS-Ganesha project . Use Ceph's rados tool from the toolbox to interact with the configuration object. The below command will get you started by dumping the contents of the config object to stdout. The output will look something like the example shown if you have already created two exports as documented above. It is best not to modify any of the export objects created by Ceph so as not to cause errors with Ceph's export management. 1 2 3 $ rados --pool  --namespace  get conf-nfs. - % url \"rados:////export-1\" % url \"rados:////export-2\" rados ls and rados put are other commands you will want to work with the other shared configuration objects. Of note, it is possible to pre-populate the NFS configuration and export objects prior to creating CephNFS server clusters. Creating NFS export over RGW \u00b6 Warning RGW NFS export is experimental for the moment. It is not recommended for scenario of modifying existing content. For creating an NFS export over RGW(CephObjectStore) storage backend, the below command can be used. This creates an export for the /testrgw pseudo path on an existing bucket bkt4exp as an example. You could use /testrgw pseudo for nfs mount operation afterwards. 1 ceph nfs export create rgw my-nfs /testrgw bkt4exp","title":"Advanced configuration"},{"location":"Storage-Configuration/NFS/nfs-advanced/#changing-configuration-of-the-nfs-pool","text":"By default, Rook creates the .nfs pool with Ceph's default configuration. If you wish to change the configuration of this pool (for example to change its failure domain or replication factor), you can create a CephBlockPool with the spec.name field set to .nfs . This pool must be replicated and cannot be erasure coded. deploy/examples/nfs.yaml contains a sample for reference.","title":"Changing configuration of the .nfs pool"},{"location":"Storage-Configuration/NFS/nfs-advanced/#adding-custom-nfs-ganesha-config-file-changes","text":"Ceph uses NFS-Ganesha servers. The config file format for these objects is documented in the NFS-Ganesha project . Use Ceph's rados tool from the toolbox to interact with the configuration object. The below command will get you started by dumping the contents of the config object to stdout. The output will look something like the example shown if you have already created two exports as documented above. It is best not to modify any of the export objects created by Ceph so as not to cause errors with Ceph's export management. 1 2 3 $ rados --pool  --namespace  get conf-nfs. - % url \"rados:////export-1\" % url \"rados:////export-2\" rados ls and rados put are other commands you will want to work with the other shared configuration objects. Of note, it is possible to pre-populate the NFS configuration and export objects prior to creating CephNFS server clusters.","title":"Adding custom NFS-Ganesha config file changes"},{"location":"Storage-Configuration/NFS/nfs-advanced/#creating-nfs-export-over-rgw","text":"Warning RGW NFS export is experimental for the moment. It is not recommended for scenario of modifying existing content. For creating an NFS export over RGW(CephObjectStore) storage backend, the below command can be used. This creates an export for the /testrgw pseudo path on an existing bucket bkt4exp as an example. You could use /testrgw pseudo for nfs mount operation afterwards. 1 ceph nfs export create rgw my-nfs /testrgw bkt4exp","title":"Creating NFS export over RGW"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/","text":"Attention This feature is experimental and will not support upgrades to future versions. For this section, we will refer to Rook's deployment examples in the deploy/examples directory. Enabling the CSI drivers \u00b6 The Ceph CSI NFS provisioner and driver require additional RBAC to operate. Apply the deploy/examples/csi/nfs/rbac.yaml manifest to deploy the additional resources. Rook will only deploy the Ceph CSI NFS provisioner and driver components when the ROOK_CSI_ENABLE_NFS config is set to \"true\" in the rook-ceph-operator-config configmap. Change the value in your manifest, or patch the resource as below. 1 kubectl --namespace rook-ceph patch configmap rook-ceph-operator-config --type merge --patch '{\"data\":{\"ROOK_CSI_ENABLE_NFS\": \"true\"}}' Note The rook-ceph operator Helm chart will deploy the required RBAC and enable the driver components if csi.nfs.enabled is set to true . Creating NFS exports via PVC \u00b6 Prerequisites \u00b6 In order to create NFS exports via the CSI driver, you must first create a CephFilesystem to serve as the underlying storage for the exports, and you must create a CephNFS to run an NFS server that will expose the exports. RGWs cannot be used for the CSI driver. From the examples, filesystem.yaml creates a CephFilesystem called myfs , and nfs.yaml creates an NFS server called my-nfs . You may need to enable or disable the Ceph orchestrator. Follow the same steps documented above based on your Ceph version and desires. You must also create a storage class. Ceph CSI is designed to support any arbitrary Ceph cluster, but we are focused here only on Ceph clusters deployed by Rook. Let's take a look at a portion of the example storage class found at deploy/examples/csi/nfs/storageclass.yaml and break down how the values are determined. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-nfs provisioner : rook-ceph.nfs.csi.ceph.com # [1] parameters : nfsCluster : my-nfs # [2] server : rook-ceph-nfs-my-nfs-a # [3] clusterID : rook-ceph # [4] fsName : myfs # [5] pool : myfs-replicated # [6] # [7] (entire csi.storage.k8s.io/* section immediately below) csi.storage.k8s.io/provisioner-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-cephfs-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph # ... some fields omitted ... provisioner : rook-ceph .nfs.csi.ceph.com because rook-ceph is the namespace where the CephCluster is installed nfsCluster : my-nfs because this is the name of the CephNFS server : rook-ceph-nfs- my-nfs -a because Rook creates this Kubernetes Service for the CephNFS named my-nfs clusterID : rook-ceph because this is the namespace where the CephCluster is installed fsName : myfs because this is the name of the CephFilesystem used to back the NFS exports pool : myfs - replicated because myfs is the name of the CephFilesystem defined in fsName and because replicated is the name of a data pool defined in the CephFilesystem csi.storage.k8s.io/* : note that these values are shared with the Ceph CSI CephFS provisioner Creating a PVC \u00b6 See deploy/examples/csi/nfs/pvc.yaml for an example of how to create a PVC that will create an NFS export. The export will be created and a PV created for the PVC immediately, even without a Pod to mount the PVC. Attaching an export to a pod \u00b6 See deploy/examples/csi/nfs/pod.yaml for an example of how a PVC can be connected to an application pod. Connecting to an export directly \u00b6 After a PVC is created successfully, the share parameter set on the resulting PV contains the share path which can be used as the export path when mounting the export manually . In the example below /0001-0009-rook-ceph-0000000000000001-55c910f9-a1af-11ed-9772-1a471870b2f5 is the export path. 1 2 $ kubectl get pv pvc-b559f225-de79-451b-a327-3dbec1f95a1c -o jsonpath = '{.spec.csi.volumeAttributes}' /0001-0009-rook-ceph-0000000000000001-55c910f9-a1af-11ed-9772-1a471870b2f5 Taking snapshots of NFS exports \u00b6 NFS export PVCs can be snapshotted and later restored to new PVCs. Creating snapshots \u00b6 First, create a VolumeSnapshotClass as in the example here . The csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the cephfsplugin here . 1 kubectl create -f deploy/examples/csi/nfs/snapshotclass.yaml In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the NFS CSI driver. 1 kubectl create -f deploy/examples/csi/nfs/snapshot.yaml Verifying snapshots \u00b6 1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-nfslugin-snapclass rook-ceph.nfs.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE nfs-pvc-snapshot true nfs-pvc 1Gi csi-nfsplugin-snapclass snapcontent-34476204-a14a-4d59-bfbc-2bbba695652c 3h50m 3h51m The snapshot will be ready to restore to a new PVC when READYTOUSE field of the volumesnapshot is set to true. Restoring snapshot to a new PVC \u00b6 In pvc-restore , dataSource name should be the name of the VolumeSnapshot previously created. The dataSource kind should be \"VolumeSnapshot\". Create a new PVC from the snapshot. 1 kubectl create -f deploy/examples/csi/nfs/pvc-restore.yaml Verifying restored PVC Creation \u00b6 1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE nfs-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi RWX rook-nfs 55m nfs-pvc-restore Bound pvc-95308c75-6c93-4928-a551-6b5137192209 1Gi RWX rook-nfs 34s Cleaning up snapshot resource \u00b6 To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/nfs/pvc-restore.yaml kubectl delete -f deploy/examples/csi/nfs/snapshot.yaml kubectl delete -f deploy/examples/csi/nfs/snapshotclass.yaml Cloning NFS exports \u00b6 Creating clones \u00b6 In pvc-clone , dataSource should be the name of the PVC which is already created by NFS CSI driver. The dataSource kind should be \"PersistentVolumeClaim\" and also storageclass should be same as the source PVC. Create a new PVC Clone from the PVC as in the example here . 1 kubectl create -f deploy/examples/csi/nfs/pvc-clone.yaml Verifying a cloned PVC \u00b6 1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE nfs-pvc Bound pvc-1ea51547-a88b-4ab0-8b4a-812caeaf025d 1Gi RWX rook-nfs 39m nfs-pvc-clone Bound pvc-b575bc35-d521-4c41-b4f9-1d733cd28fdf 1Gi RWX rook-nfs 8s Cleaning up clone resources \u00b6 To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/nfs/pvc-clone.yaml","title":"CSI provisioner and driver"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#enabling-the-csi-drivers","text":"The Ceph CSI NFS provisioner and driver require additional RBAC to operate. Apply the deploy/examples/csi/nfs/rbac.yaml manifest to deploy the additional resources. Rook will only deploy the Ceph CSI NFS provisioner and driver components when the ROOK_CSI_ENABLE_NFS config is set to \"true\" in the rook-ceph-operator-config configmap. Change the value in your manifest, or patch the resource as below. 1 kubectl --namespace rook-ceph patch configmap rook-ceph-operator-config --type merge --patch '{\"data\":{\"ROOK_CSI_ENABLE_NFS\": \"true\"}}' Note The rook-ceph operator Helm chart will deploy the required RBAC and enable the driver components if csi.nfs.enabled is set to true .","title":"Enabling the CSI drivers"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#creating-nfs-exports-via-pvc","text":"","title":"Creating NFS exports via PVC"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#prerequisites","text":"In order to create NFS exports via the CSI driver, you must first create a CephFilesystem to serve as the underlying storage for the exports, and you must create a CephNFS to run an NFS server that will expose the exports. RGWs cannot be used for the CSI driver. From the examples, filesystem.yaml creates a CephFilesystem called myfs , and nfs.yaml creates an NFS server called my-nfs . You may need to enable or disable the Ceph orchestrator. Follow the same steps documented above based on your Ceph version and desires. You must also create a storage class. Ceph CSI is designed to support any arbitrary Ceph cluster, but we are focused here only on Ceph clusters deployed by Rook. Let's take a look at a portion of the example storage class found at deploy/examples/csi/nfs/storageclass.yaml and break down how the values are determined. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-nfs provisioner : rook-ceph.nfs.csi.ceph.com # [1] parameters : nfsCluster : my-nfs # [2] server : rook-ceph-nfs-my-nfs-a # [3] clusterID : rook-ceph # [4] fsName : myfs # [5] pool : myfs-replicated # [6] # [7] (entire csi.storage.k8s.io/* section immediately below) csi.storage.k8s.io/provisioner-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-cephfs-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph # ... some fields omitted ... provisioner : rook-ceph .nfs.csi.ceph.com because rook-ceph is the namespace where the CephCluster is installed nfsCluster : my-nfs because this is the name of the CephNFS server : rook-ceph-nfs- my-nfs -a because Rook creates this Kubernetes Service for the CephNFS named my-nfs clusterID : rook-ceph because this is the namespace where the CephCluster is installed fsName : myfs because this is the name of the CephFilesystem used to back the NFS exports pool : myfs - replicated because myfs is the name of the CephFilesystem defined in fsName and because replicated is the name of a data pool defined in the CephFilesystem csi.storage.k8s.io/* : note that these values are shared with the Ceph CSI CephFS provisioner","title":"Prerequisites"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#creating-a-pvc","text":"See deploy/examples/csi/nfs/pvc.yaml for an example of how to create a PVC that will create an NFS export. The export will be created and a PV created for the PVC immediately, even without a Pod to mount the PVC.","title":"Creating a PVC"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#attaching-an-export-to-a-pod","text":"See deploy/examples/csi/nfs/pod.yaml for an example of how a PVC can be connected to an application pod.","title":"Attaching an export to a pod"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#connecting-to-an-export-directly","text":"After a PVC is created successfully, the share parameter set on the resulting PV contains the share path which can be used as the export path when mounting the export manually . In the example below /0001-0009-rook-ceph-0000000000000001-55c910f9-a1af-11ed-9772-1a471870b2f5 is the export path. 1 2 $ kubectl get pv pvc-b559f225-de79-451b-a327-3dbec1f95a1c -o jsonpath = '{.spec.csi.volumeAttributes}' /0001-0009-rook-ceph-0000000000000001-55c910f9-a1af-11ed-9772-1a471870b2f5","title":"Connecting to an export directly"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#taking-snapshots-of-nfs-exports","text":"NFS export PVCs can be snapshotted and later restored to new PVCs.","title":"Taking snapshots of NFS exports"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#creating-snapshots","text":"First, create a VolumeSnapshotClass as in the example here . The csi.storage.k8s.io/snapshotter-secret-name parameter should reference the name of the secret created for the cephfsplugin here . 1 kubectl create -f deploy/examples/csi/nfs/snapshotclass.yaml In snapshot , volumeSnapshotClassName should be the name of the VolumeSnapshotClass previously created. The persistentVolumeClaimName should be the name of the PVC which is already created by the NFS CSI driver. 1 kubectl create -f deploy/examples/csi/nfs/snapshot.yaml","title":"Creating snapshots"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#verifying-snapshots","text":"1 2 3 $ kubectl get volumesnapshotclass NAME DRIVER DELETIONPOLICY AGE csi-nfslugin-snapclass rook-ceph.nfs.csi.ceph.com Delete 3h55m 1 2 3 $ kubectl get volumesnapshot NAME READYTOUSE SOURCEPVC SOURCESNAPSHOTCONTENT RESTORESIZE SNAPSHOTCLASS SNAPSHOTCONTENT CREATIONTIME AGE nfs-pvc-snapshot true nfs-pvc 1Gi csi-nfsplugin-snapclass snapcontent-34476204-a14a-4d59-bfbc-2bbba695652c 3h50m 3h51m The snapshot will be ready to restore to a new PVC when READYTOUSE field of the volumesnapshot is set to true.","title":"Verifying snapshots"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#restoring-snapshot-to-a-new-pvc","text":"In pvc-restore , dataSource name should be the name of the VolumeSnapshot previously created. The dataSource kind should be \"VolumeSnapshot\". Create a new PVC from the snapshot. 1 kubectl create -f deploy/examples/csi/nfs/pvc-restore.yaml","title":"Restoring snapshot to a new PVC"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#verifying-restored-pvc-creation","text":"1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE nfs-pvc Bound pvc-74734901-577a-11e9-b34f-525400581048 1Gi RWX rook-nfs 55m nfs-pvc-restore Bound pvc-95308c75-6c93-4928-a551-6b5137192209 1Gi RWX rook-nfs 34s","title":"Verifying restored PVC Creation"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#cleaning-up-snapshot-resource","text":"To clean your cluster of the resources created by this example, run the following: 1 2 3 kubectl delete -f deploy/examples/csi/nfs/pvc-restore.yaml kubectl delete -f deploy/examples/csi/nfs/snapshot.yaml kubectl delete -f deploy/examples/csi/nfs/snapshotclass.yaml","title":"Cleaning up snapshot resource"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#cloning-nfs-exports","text":"","title":"Cloning NFS exports"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#creating-clones","text":"In pvc-clone , dataSource should be the name of the PVC which is already created by NFS CSI driver. The dataSource kind should be \"PersistentVolumeClaim\" and also storageclass should be same as the source PVC. Create a new PVC Clone from the PVC as in the example here . 1 kubectl create -f deploy/examples/csi/nfs/pvc-clone.yaml","title":"Creating clones"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#verifying-a-cloned-pvc","text":"1 kubectl get pvc 1 2 3 NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE nfs-pvc Bound pvc-1ea51547-a88b-4ab0-8b4a-812caeaf025d 1Gi RWX rook-nfs 39m nfs-pvc-clone Bound pvc-b575bc35-d521-4c41-b4f9-1d733cd28fdf 1Gi RWX rook-nfs 8s","title":"Verifying a cloned PVC"},{"location":"Storage-Configuration/NFS/nfs-csi-driver/#cleaning-up-clone-resources","text":"To clean your cluster of the resources created by this example, run the following: 1 kubectl delete -f deploy/examples/csi/nfs/pvc-clone.yaml","title":"Cleaning up clone resources"},{"location":"Storage-Configuration/NFS/nfs-security/","text":"Rook provides security for CephNFS server clusters through two high-level features: user ID mapping and user authentication . Attention All features in this document are experimental and may not support upgrades to future versions. Attention Some configurations of these features may break the ability to mount NFS storage to pods via PVCs . The NFS CSI driver may not be able to mount exports for pods when ID mapping is configured. User ID mapping \u00b6 User ID mapping allows the NFS server to map connected NFS client IDs to a different user domain, allowing NFS clients to be associated with a particular user in your organization. For example, users stored in LDAP can be associated with NFS users and vice versa. ID mapping via SSSD \u00b6 SSSD is the System Security Services Daemon. It can be used to provide user ID mapping from a number of sources including LDAP, Active Directory, and FreeIPA. Currently, only LDAP has been tested. Attention The Ceph container image must have the sssd-client package installed to support SSSD. This package is included in quay.io/ceph/ceph in v17.2.4 and newer. For older Ceph versions you may build your own Ceph image which adds RUN yum install sssd-client && yum clean all . SSSD configuration \u00b6 SSSD requires a configuration file in order to configure its connection to the user ID mapping system (e.g., LDAP). The file follows the sssd.conf format documented in its man pages . Methods of providing the configuration file are documented in the NFS CRD security section . Recommendations: - The SSSD sidecar only requires the namespace switch (a.k.a. \"nsswitch\" or \"nss\"). We recommend enabling only the nss service to lower CPU usage. - NFS-Ganesha does not require user enumeration. We recommend leaving this option unset or setting enumerate = false to speed up lookups and reduce RAM usage. - NFS exports created via documented methods do not require listing all members of groups. We recommend setting ignore_group_members = true to speed up LDAP lookups. Only customized exports that set manage_gids need to consider this option. A sample sssd.conf file is shown below. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 [sssd] # Only the nss service is required for the SSSD sidecar. services = nss domains = default config_file_version = 2 [nss] filter_users = root [domain/default] id_provider = ldap ldap_uri = ldap://server-address.example.net ldap_search_base = dc=example,dc=net ldap_default_bind_dn = cn=admin,dc=example,dc=net ldap_default_authtok_type = password ldap_default_authtok = my-password ldap_user_search_base = ou=users,dc=example,dc=net ldap_group_search_base = ou=groups,dc=example,dc=net ldap_access_filter = memberOf=cn=rook,ou=groups,dc=example,dc=net # recommended options for speeding up LDAP lookups: enumerate = false ignore_group_members = true The SSSD configuration file may be omitted from the CephNFS spec if desired. In this case, Rook will not set /etc/sssd/sssd.conf in any way. This allows you to manage the sssd.conf file yourself however you wish. For example, you may build it into your custom Ceph container image, or use the Vault agent injector to securely add the file via annotations on the CephNFS spec (passed to the NFS server pods). User authentication \u00b6 User authentication allows NFS clients and the Rook CephNFS servers to authenticate with each other to ensure security. Authentication through Kerberos \u00b6 Kerberos is the authentication mechanism natively supported by NFS-Ganesha. With NFSv4, individual users are authenticated and not merely client machines. Kerberos configuration \u00b6 Kerberos authentication requires configuration files in order for the NFS-Ganesha server to authenticate to the Kerberos server (KDC). The requirements are two-parted: 1. one or more kerberos configuration files that configures the connection to the Kerberos server. This file follows the krb5.conf format documented in its man pages . 2. a keytab file that provides credentials for the service principal that NFS-Ganesha will use to authenticate with the Kerberos server. 3. a kerberos domain name which will be used to map kerberos credentials to uid/gid domain name that NFS-Ganesha will use to authenticate with the Methods of providing the configuration files are documented in the NFS CRD security section . Recommendations: - Rook configures Kerberos to log to stderr. We suggest removing logging sections from config files to avoid consuming unnecessary disk space from logging to files. A sample Kerberos config file is shown below. 1 2 3 4 5 6 7 8 9 10 11 12 [libdefaults] default_realm = EXAMPLE.NET [realms] EXAMPLE.NET = { kdc = kdc.example.net:88 admin_server = kdc.example.net:749 } [domain_realm] .example.net = EXAMPLE.NET example.net = EXAMPLE.NET The Kerberos config files ( configFiles ) may be omitted from the Ceph NFS spec if desired. In this case, Rook will not add any config files to /etc/krb5.conf.rook/ , but it will still configure Kerberos to load any config files it finds there. This allows you to manage these files yourself however you wish. Similarly, the keytab file ( keytabFile ) may be omitted from the CephNFS spec if desired. In this case, Rook will not set /etc/krb5.keytab in any way. This allows you to manage the krb5.keytab file yourself however you wish. As an example for either of the above cases, you may build files into your custom Ceph container image or use the Vault agent injector to securely add files via annotations on the CephNFS spec (passed to the NFS server pods). NFS service principals \u00b6 The Kerberos service principal used by Rook's CephNFS servers to authenticate with the Kerberos server is built up from 3 components: 1. the configured from spec.security.kerberos.principalName that acts as the service name 2. the hostname of the server on which NFS-Ganesha is running which is in turn built up from the namespace and name of the CephNFS resource, joined by a hyphen. e.g., rooknamespace-nfsname 3. the realm as configured by the kerberos config file(s) from spec.security.kerberos.configFiles The full service principal name is constructed as /-@ . For ease of scaling up or down CephNFS clusters, this principal is used for all servers in the CephNFS cluster. Users must add this service principal to their Kerberos server configuration. Example For a CephNFS named \"fileshare\" in the \"business-unit\" Kubernetes namespace that has a principalName of \"sales-apac\" and where the Kerberos realm is \"EXAMPLE.NET\", the full principal name will be sales-apac/business-unit-fileshare@EXAMPLE.NET . Advanced spec.security.kerberos.principalName corresponds directly to NFS-Ganesha's NFS_KRB5:PrincipalName config. See the NFS-Ganesha wiki for more details. Kerberos domain name \u00b6 The kerberos domain name is used to setup the domain name in /etc/idmapd.conf. This domain name is used by idmap to map the kerberos credential to the user uid/gid. Without this configured, NFS-Ganesha will be unable to map the Kerberos principal to an uid/gid and will instead use the configured anonuid/anongid (default: -2) when accessing the local filesystem.","title":"Security"},{"location":"Storage-Configuration/NFS/nfs-security/#user-id-mapping","text":"User ID mapping allows the NFS server to map connected NFS client IDs to a different user domain, allowing NFS clients to be associated with a particular user in your organization. For example, users stored in LDAP can be associated with NFS users and vice versa.","title":"User ID mapping"},{"location":"Storage-Configuration/NFS/nfs-security/#id-mapping-via-sssd","text":"SSSD is the System Security Services Daemon. It can be used to provide user ID mapping from a number of sources including LDAP, Active Directory, and FreeIPA. Currently, only LDAP has been tested. Attention The Ceph container image must have the sssd-client package installed to support SSSD. This package is included in quay.io/ceph/ceph in v17.2.4 and newer. For older Ceph versions you may build your own Ceph image which adds RUN yum install sssd-client && yum clean all .","title":"ID mapping via SSSD"},{"location":"Storage-Configuration/NFS/nfs-security/#sssd-configuration","text":"SSSD requires a configuration file in order to configure its connection to the user ID mapping system (e.g., LDAP). The file follows the sssd.conf format documented in its man pages . Methods of providing the configuration file are documented in the NFS CRD security section . Recommendations: - The SSSD sidecar only requires the namespace switch (a.k.a. \"nsswitch\" or \"nss\"). We recommend enabling only the nss service to lower CPU usage. - NFS-Ganesha does not require user enumeration. We recommend leaving this option unset or setting enumerate = false to speed up lookups and reduce RAM usage. - NFS exports created via documented methods do not require listing all members of groups. We recommend setting ignore_group_members = true to speed up LDAP lookups. Only customized exports that set manage_gids need to consider this option. A sample sssd.conf file is shown below. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 [sssd] # Only the nss service is required for the SSSD sidecar. services = nss domains = default config_file_version = 2 [nss] filter_users = root [domain/default] id_provider = ldap ldap_uri = ldap://server-address.example.net ldap_search_base = dc=example,dc=net ldap_default_bind_dn = cn=admin,dc=example,dc=net ldap_default_authtok_type = password ldap_default_authtok = my-password ldap_user_search_base = ou=users,dc=example,dc=net ldap_group_search_base = ou=groups,dc=example,dc=net ldap_access_filter = memberOf=cn=rook,ou=groups,dc=example,dc=net # recommended options for speeding up LDAP lookups: enumerate = false ignore_group_members = true The SSSD configuration file may be omitted from the CephNFS spec if desired. In this case, Rook will not set /etc/sssd/sssd.conf in any way. This allows you to manage the sssd.conf file yourself however you wish. For example, you may build it into your custom Ceph container image, or use the Vault agent injector to securely add the file via annotations on the CephNFS spec (passed to the NFS server pods).","title":"SSSD configuration"},{"location":"Storage-Configuration/NFS/nfs-security/#user-authentication","text":"User authentication allows NFS clients and the Rook CephNFS servers to authenticate with each other to ensure security.","title":"User authentication"},{"location":"Storage-Configuration/NFS/nfs-security/#authentication-through-kerberos","text":"Kerberos is the authentication mechanism natively supported by NFS-Ganesha. With NFSv4, individual users are authenticated and not merely client machines.","title":"Authentication through Kerberos"},{"location":"Storage-Configuration/NFS/nfs-security/#kerberos-configuration","text":"Kerberos authentication requires configuration files in order for the NFS-Ganesha server to authenticate to the Kerberos server (KDC). The requirements are two-parted: 1. one or more kerberos configuration files that configures the connection to the Kerberos server. This file follows the krb5.conf format documented in its man pages . 2. a keytab file that provides credentials for the service principal that NFS-Ganesha will use to authenticate with the Kerberos server. 3. a kerberos domain name which will be used to map kerberos credentials to uid/gid domain name that NFS-Ganesha will use to authenticate with the Methods of providing the configuration files are documented in the NFS CRD security section . Recommendations: - Rook configures Kerberos to log to stderr. We suggest removing logging sections from config files to avoid consuming unnecessary disk space from logging to files. A sample Kerberos config file is shown below. 1 2 3 4 5 6 7 8 9 10 11 12 [libdefaults] default_realm = EXAMPLE.NET [realms] EXAMPLE.NET = { kdc = kdc.example.net:88 admin_server = kdc.example.net:749 } [domain_realm] .example.net = EXAMPLE.NET example.net = EXAMPLE.NET The Kerberos config files ( configFiles ) may be omitted from the Ceph NFS spec if desired. In this case, Rook will not add any config files to /etc/krb5.conf.rook/ , but it will still configure Kerberos to load any config files it finds there. This allows you to manage these files yourself however you wish. Similarly, the keytab file ( keytabFile ) may be omitted from the CephNFS spec if desired. In this case, Rook will not set /etc/krb5.keytab in any way. This allows you to manage the krb5.keytab file yourself however you wish. As an example for either of the above cases, you may build files into your custom Ceph container image or use the Vault agent injector to securely add files via annotations on the CephNFS spec (passed to the NFS server pods).","title":"Kerberos configuration"},{"location":"Storage-Configuration/NFS/nfs-security/#nfs-service-principals","text":"The Kerberos service principal used by Rook's CephNFS servers to authenticate with the Kerberos server is built up from 3 components: 1. the configured from spec.security.kerberos.principalName that acts as the service name 2. the hostname of the server on which NFS-Ganesha is running which is in turn built up from the namespace and name of the CephNFS resource, joined by a hyphen. e.g., rooknamespace-nfsname 3. the realm as configured by the kerberos config file(s) from spec.security.kerberos.configFiles The full service principal name is constructed as /-@ . For ease of scaling up or down CephNFS clusters, this principal is used for all servers in the CephNFS cluster. Users must add this service principal to their Kerberos server configuration. Example For a CephNFS named \"fileshare\" in the \"business-unit\" Kubernetes namespace that has a principalName of \"sales-apac\" and where the Kerberos realm is \"EXAMPLE.NET\", the full principal name will be sales-apac/business-unit-fileshare@EXAMPLE.NET . Advanced spec.security.kerberos.principalName corresponds directly to NFS-Ganesha's NFS_KRB5:PrincipalName config. See the NFS-Ganesha wiki for more details.","title":"NFS service principals"},{"location":"Storage-Configuration/NFS/nfs-security/#kerberos-domain-name","text":"The kerberos domain name is used to setup the domain name in /etc/idmapd.conf. This domain name is used by idmap to map the kerberos credential to the user uid/gid. Without this configured, NFS-Ganesha will be unable to map the Kerberos principal to an uid/gid and will instead use the configured anonuid/anongid (default: -2) when accessing the local filesystem.","title":"Kerberos domain name"},{"location":"Storage-Configuration/NFS/nfs/","text":"NFS storage can be mounted with read/write permission from multiple pods. NFS storage may be especially useful for leveraging an existing Rook cluster to provide NFS storage for legacy applications that assume an NFS client connection. Such applications may not have been migrated to Kubernetes or might not yet support PVCs. Rook NFS storage can provide access to the same network filesystem storage from within the Kubernetes cluster via PVC while simultaneously providing access via direct client connection from within or outside of the Kubernetes cluster. Warning Simultaneous access to NFS storage from Pods and from from external clients complicates NFS user ID mapping significantly. Client IDs mapped from external clients will not be the same as the IDs associated with the NFS CSI driver, which mount exports for Kubernetes pods. Warning Due to a number of Ceph issues and changes, Rook officially only supports Ceph v16.2.7 or higher for CephNFS. If you are using an earlier version, upgrade your Ceph version following the advice given in Rook's v1.9 NFS docs . Note CephNFSes support NFSv4.1+ access only. Serving earlier protocols inhibits responsiveness after a server restart. Prerequisites \u00b6 This guide assumes you have created a Rook cluster as explained in the main quickstart guide as well as a Ceph filesystem which will act as the backing storage for NFS. Many samples reference the CephNFS and CephFilesystem example manifests here and here . Creating an NFS cluster \u00b6 Create the NFS cluster by specifying the desired settings documented for the NFS CRD . Creating Exports \u00b6 When a CephNFS is first created, all NFS daemons within the CephNFS cluster will share a configuration with no exports defined. When creating an export, it is necessary to specify the CephFilesystem which will act as the backing storage for the NFS export. RADOS Gateways (RGWs), provided by CephObjectStores , can also be used as backing storage for NFS exports if desired. Using the Ceph Dashboard \u00b6 Exports can be created via the Ceph dashboard as well. To enable and use the Ceph dashboard in Rook, see here . Using the Ceph CLI \u00b6 The Ceph CLI can be used from the Rook toolbox pod to create and manage NFS exports. To do so, first ensure the necessary Ceph mgr modules are enabled, if necessary, and that the Ceph orchestrator backend is set to Rook. Enable the Ceph orchestrator if necessary \u00b6 Required for Ceph v16.2.7 and below Optional for Ceph v16.2.8 and above Must be disabled for Ceph v17.2.1 due to a Ceph regression 1 2 3 ceph mgr module enable rook ceph mgr module enable nfs ceph orch set backend rook Ceph's NFS CLI can create NFS exports that are backed by CephFS (a CephFilesystem) or Ceph Object Gateway (a CephObjectStore). cluster_id or cluster-name in the Ceph NFS docs normally refers to the name of the NFS cluster, which is the CephNFS name in the Rook context. For creating an NFS export for the CephNFS and CephFilesystem example manifests, the below command can be used. This creates an export for the /test pseudo path. 1 ceph nfs export create cephfs my-nfs /test myfs The below command will list the current NFS exports for the example CephNFS cluster, which will give the output shown for the current example. 1 2 3 4 $ ceph nfs export ls my-nfs [ \"/test\" ] The simple /test export's info can be listed as well. Notice from the example that only NFS protocol v4 via TCP is supported. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 $ ceph nfs export info my-nfs /test { \"export_id\": 1, \"path\": \"/\", \"cluster_id\": \"my-nfs\", \"pseudo\": \"/test\", \"access_type\": \"RW\", \"squash\": \"none\", \"security_label\": true, \"protocols\": [ 4 ], \"transports\": [ \"TCP\" ], \"fsal\": { \"name\": \"CEPH\", \"user_id\": \"nfs.my-nfs.1\", \"fs_name\": \"myfs\" }, \"clients\": [] } If you are done managing NFS exports and don't need the Ceph orchestrator module enabled for anything else, it may be preferable to disable the Rook and NFS mgr modules to free up a small amount of RAM in the Ceph mgr Pod. 1 2 ceph orch set backend \"\" ceph mgr module disable rook Mounting exports \u00b6 Each CephNFS server has a unique Kubernetes Service. This is because NFS clients can't readily handle NFS failover. CephNFS services are named with the pattern rook-ceph-nfs--  is a unique letter ID (e.g., a, b, c, etc.) for a given NFS server. For example, rook-ceph-nfs-my-nfs-a . For each NFS client, choose an NFS service to use for the connection. With NFS v4, you can mount an export by its path using a mount command like below. You can mount all exports at once by omitting the export path and leaving the directory as just / . 1 mount -t nfs4 -o proto=tcp :/  Exposing the NFS server outside of the Kubernetes cluster \u00b6 Use a LoadBalancer Service to expose an NFS server (and its exports) outside of the Kubernetes cluster. The Service's endpoint can be used as the NFS service address when mounting the export manually . We provide an example Service here: deploy/examples/nfs-load-balancer.yaml . NFS Security \u00b6 Security options for NFS are documented here . Ceph CSI NFS provisioner and NFS CSI driver \u00b6 The NFS CSI provisioner and driver are documented here Advanced configuration \u00b6 Advanced NFS configuration is documented here Known issues \u00b6 Known issues are documented on the NFS CRD page .","title":"NFS Storage Overview"},{"location":"Storage-Configuration/NFS/nfs/#prerequisites","text":"This guide assumes you have created a Rook cluster as explained in the main quickstart guide as well as a Ceph filesystem which will act as the backing storage for NFS. Many samples reference the CephNFS and CephFilesystem example manifests here and here .","title":"Prerequisites"},{"location":"Storage-Configuration/NFS/nfs/#creating-an-nfs-cluster","text":"Create the NFS cluster by specifying the desired settings documented for the NFS CRD .","title":"Creating an NFS cluster"},{"location":"Storage-Configuration/NFS/nfs/#creating-exports","text":"When a CephNFS is first created, all NFS daemons within the CephNFS cluster will share a configuration with no exports defined. When creating an export, it is necessary to specify the CephFilesystem which will act as the backing storage for the NFS export. RADOS Gateways (RGWs), provided by CephObjectStores , can also be used as backing storage for NFS exports if desired.","title":"Creating Exports"},{"location":"Storage-Configuration/NFS/nfs/#using-the-ceph-dashboard","text":"Exports can be created via the Ceph dashboard as well. To enable and use the Ceph dashboard in Rook, see here .","title":"Using the Ceph Dashboard"},{"location":"Storage-Configuration/NFS/nfs/#using-the-ceph-cli","text":"The Ceph CLI can be used from the Rook toolbox pod to create and manage NFS exports. To do so, first ensure the necessary Ceph mgr modules are enabled, if necessary, and that the Ceph orchestrator backend is set to Rook.","title":"Using the Ceph CLI"},{"location":"Storage-Configuration/NFS/nfs/#enable-the-ceph-orchestrator-if-necessary","text":"Required for Ceph v16.2.7 and below Optional for Ceph v16.2.8 and above Must be disabled for Ceph v17.2.1 due to a Ceph regression 1 2 3 ceph mgr module enable rook ceph mgr module enable nfs ceph orch set backend rook Ceph's NFS CLI can create NFS exports that are backed by CephFS (a CephFilesystem) or Ceph Object Gateway (a CephObjectStore). cluster_id or cluster-name in the Ceph NFS docs normally refers to the name of the NFS cluster, which is the CephNFS name in the Rook context. For creating an NFS export for the CephNFS and CephFilesystem example manifests, the below command can be used. This creates an export for the /test pseudo path. 1 ceph nfs export create cephfs my-nfs /test myfs The below command will list the current NFS exports for the example CephNFS cluster, which will give the output shown for the current example. 1 2 3 4 $ ceph nfs export ls my-nfs [ \"/test\" ] The simple /test export's info can be listed as well. Notice from the example that only NFS protocol v4 via TCP is supported. 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 $ ceph nfs export info my-nfs /test { \"export_id\": 1, \"path\": \"/\", \"cluster_id\": \"my-nfs\", \"pseudo\": \"/test\", \"access_type\": \"RW\", \"squash\": \"none\", \"security_label\": true, \"protocols\": [ 4 ], \"transports\": [ \"TCP\" ], \"fsal\": { \"name\": \"CEPH\", \"user_id\": \"nfs.my-nfs.1\", \"fs_name\": \"myfs\" }, \"clients\": [] } If you are done managing NFS exports and don't need the Ceph orchestrator module enabled for anything else, it may be preferable to disable the Rook and NFS mgr modules to free up a small amount of RAM in the Ceph mgr Pod. 1 2 ceph orch set backend \"\" ceph mgr module disable rook","title":"Enable the Ceph orchestrator if necessary"},{"location":"Storage-Configuration/NFS/nfs/#mounting-exports","text":"Each CephNFS server has a unique Kubernetes Service. This is because NFS clients can't readily handle NFS failover. CephNFS services are named with the pattern rook-ceph-nfs--  is a unique letter ID (e.g., a, b, c, etc.) for a given NFS server. For example, rook-ceph-nfs-my-nfs-a . For each NFS client, choose an NFS service to use for the connection. With NFS v4, you can mount an export by its path using a mount command like below. You can mount all exports at once by omitting the export path and leaving the directory as just / . 1 mount -t nfs4 -o proto=tcp :/ ","title":"Mounting exports"},{"location":"Storage-Configuration/NFS/nfs/#exposing-the-nfs-server-outside-of-the-kubernetes-cluster","text":"Use a LoadBalancer Service to expose an NFS server (and its exports) outside of the Kubernetes cluster. The Service's endpoint can be used as the NFS service address when mounting the export manually . We provide an example Service here: deploy/examples/nfs-load-balancer.yaml .","title":"Exposing the NFS server outside of the Kubernetes cluster"},{"location":"Storage-Configuration/NFS/nfs/#nfs-security","text":"Security options for NFS are documented here .","title":"NFS Security"},{"location":"Storage-Configuration/NFS/nfs/#ceph-csi-nfs-provisioner-and-nfs-csi-driver","text":"The NFS CSI provisioner and driver are documented here","title":"Ceph CSI NFS provisioner and NFS CSI driver"},{"location":"Storage-Configuration/NFS/nfs/#advanced-configuration","text":"Advanced NFS configuration is documented here","title":"Advanced configuration"},{"location":"Storage-Configuration/NFS/nfs/#known-issues","text":"Known issues are documented on the NFS CRD page .","title":"Known issues"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/","text":"Rook supports the creation of new buckets and access to existing buckets via two custom resources: an Object Bucket Claim (OBC) is custom resource which requests a bucket (new or existing) and is described by a Custom Resource Definition (CRD) shown below. an Object Bucket (OB) is a custom resource automatically generated when a bucket is provisioned. It is a global resource, typically not visible to non-admin users, and contains information specific to the bucket. It is described by an OB CRD, also shown below. An OBC references a storage class which is created by an administrator. The storage class defines whether the bucket requested is a new bucket or an existing bucket. It also defines the bucket retention policy. Users request a new or existing bucket by creating an OBC which is shown below. The ceph provisioner detects the OBC and creates a new bucket or grants access to an existing bucket, depending the storage class referenced in the OBC. It also generates a Secret which provides credentials to access the bucket, and a ConfigMap which contains the bucket's endpoint. Application pods consume the information in the Secret and ConfigMap to access the bucket. Please note that to make provisioner watch the cluster namespace only you need to set ROOK_OBC_WATCH_OPERATOR_NAMESPACE to true in the operator manifest, otherwise it watches all namespaces. Example \u00b6 OBC Custom Resource \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : name : ceph-bucket [1] namespace : rook-ceph [2] spec : bucketName : [ 3 ] generateBucketName : photo-booth [4] storageClassName : rook-ceph-bucket [5] additionalConfig : [ 6 ] maxObjects : \"1000\" maxSize : \"2G\" name of the ObjectBucketClaim . This name becomes the name of the Secret and ConfigMap. namespace (optional) of the ObjectBucketClaim , which is also the namespace of the ConfigMap and Secret. bucketName name of the bucket . Not recommended for new buckets since names must be unique within an entire object store. generateBucketName value becomes the prefix for a randomly generated name, if supplied then bucketName must be empty. If both bucketName and generateBucketName are supplied then BucketName has precedence and GenerateBucketName is ignored. If both bucketName and generateBucketName are blank or omitted then the storage class is expected to contain the name of an existing bucket. It's an error if all three bucket related names are blank or omitted. storageClassName which defines the StorageClass which contains the names of the bucket provisioner, the object-store and specifies the bucket retention policy. additionalConfig is an optional list of key-value pairs used to define attributes specific to the bucket being provisioned by this OBC. This information is typically tuned to a particular bucket provisioner and may limit application portability. Options supported: maxObjects : The maximum number of objects in the bucket maxSize : The maximum size of the bucket, please note minimum recommended value is 4K. OBC Custom Resource after Bucket Provisioning \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : creationTimestamp : \"2019-10-18T09:54:01Z\" generation : 2 name : ceph-bucket namespace : default [1] resourceVersion : \"559491\" spec : ObjectBucketName : obc-default-ceph-bucket [2] additionalConfig : null bucketName : photo-booth-c1178d61-1517-431f-8408-ec4c9fa50bee [3] storageClassName : rook-ceph-bucket [4] status : phase : Bound [5] namespace where OBC got created. ObjectBucketName generated OB name created using name space and OBC name. the generated (in this case), unique bucket name for the new bucket. name of the storage class from OBC got created. phases of bucket creation: Pending : the operator is processing the request. Bound : the operator finished processing the request and linked the OBC and OB Released : the OB has been deleted, leaving the OBC unclaimed but unavailable. Failed : not currently set. App Pod \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 apiVersion : v1 kind : Pod metadata : name : app-pod namespace : dev-user spec : containers : - name : mycontainer image : redis envFrom : [ 1 ] - configMapRef : name : ceph-bucket [2] - secretRef : name : ceph-bucket [3] use env: if mapping of the defined key names to the env var names used by the app is needed. makes available to the pod as env variables: BUCKET_HOST , BUCKET_PORT , BUCKET_NAME makes available to the pod as env variables: AWS_ACCESS_KEY_ID , AWS_SECRET_ACCESS_KEY StorageClass \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-bucket labels : aws-s3/object [1] provisioner : rook-ceph.ceph.rook.io/bucket [2] parameters : [ 3 ] objectStoreName : my-store objectStoreNamespace : rook-ceph bucketName : ceph-bucket [4] reclaimPolicy : Delete [5] label (optional) here associates this StorageClass to a specific provisioner. provisioner responsible for handling OBCs referencing this StorageClass . all parameter required. bucketName is required for access to existing buckets but is omitted when provisioning new buckets. Unlike greenfield provisioning, the brownfield bucket name appears in the StorageClass , not the OBC . rook-ceph provisioner decides how to treat the reclaimPolicy when an OBC is deleted for the bucket. See explanation as specified in Kubernetes Delete = physically delete the bucket. Retain = do not physically delete the bucket.","title":"Bucket Claim"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/#example","text":"","title":"Example"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/#obc-custom-resource","text":"1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : name : ceph-bucket [1] namespace : rook-ceph [2] spec : bucketName : [ 3 ] generateBucketName : photo-booth [4] storageClassName : rook-ceph-bucket [5] additionalConfig : [ 6 ] maxObjects : \"1000\" maxSize : \"2G\" name of the ObjectBucketClaim . This name becomes the name of the Secret and ConfigMap. namespace (optional) of the ObjectBucketClaim , which is also the namespace of the ConfigMap and Secret. bucketName name of the bucket . Not recommended for new buckets since names must be unique within an entire object store. generateBucketName value becomes the prefix for a randomly generated name, if supplied then bucketName must be empty. If both bucketName and generateBucketName are supplied then BucketName has precedence and GenerateBucketName is ignored. If both bucketName and generateBucketName are blank or omitted then the storage class is expected to contain the name of an existing bucket. It's an error if all three bucket related names are blank or omitted. storageClassName which defines the StorageClass which contains the names of the bucket provisioner, the object-store and specifies the bucket retention policy. additionalConfig is an optional list of key-value pairs used to define attributes specific to the bucket being provisioned by this OBC. This information is typically tuned to a particular bucket provisioner and may limit application portability. Options supported: maxObjects : The maximum number of objects in the bucket maxSize : The maximum size of the bucket, please note minimum recommended value is 4K.","title":"OBC Custom Resource"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/#obc-custom-resource-after-bucket-provisioning","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : creationTimestamp : \"2019-10-18T09:54:01Z\" generation : 2 name : ceph-bucket namespace : default [1] resourceVersion : \"559491\" spec : ObjectBucketName : obc-default-ceph-bucket [2] additionalConfig : null bucketName : photo-booth-c1178d61-1517-431f-8408-ec4c9fa50bee [3] storageClassName : rook-ceph-bucket [4] status : phase : Bound [5] namespace where OBC got created. ObjectBucketName generated OB name created using name space and OBC name. the generated (in this case), unique bucket name for the new bucket. name of the storage class from OBC got created. phases of bucket creation: Pending : the operator is processing the request. Bound : the operator finished processing the request and linked the OBC and OB Released : the OB has been deleted, leaving the OBC unclaimed but unavailable. Failed : not currently set.","title":"OBC Custom Resource after Bucket Provisioning"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/#app-pod","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 apiVersion : v1 kind : Pod metadata : name : app-pod namespace : dev-user spec : containers : - name : mycontainer image : redis envFrom : [ 1 ] - configMapRef : name : ceph-bucket [2] - secretRef : name : ceph-bucket [3] use env: if mapping of the defined key names to the env var names used by the app is needed. makes available to the pod as env variables: BUCKET_HOST , BUCKET_PORT , BUCKET_NAME makes available to the pod as env variables: AWS_ACCESS_KEY_ID , AWS_SECRET_ACCESS_KEY","title":"App Pod"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-claim/#storageclass","text":"1 2 3 4 5 6 7 8 9 10 11 12 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-bucket labels : aws-s3/object [1] provisioner : rook-ceph.ceph.rook.io/bucket [2] parameters : [ 3 ] objectStoreName : my-store objectStoreNamespace : rook-ceph bucketName : ceph-bucket [4] reclaimPolicy : Delete [5] label (optional) here associates this StorageClass to a specific provisioner. provisioner responsible for handling OBCs referencing this StorageClass . all parameter required. bucketName is required for access to existing buckets but is omitted when provisioning new buckets. Unlike greenfield provisioning, the brownfield bucket name appears in the StorageClass , not the OBC . rook-ceph provisioner decides how to treat the reclaimPolicy when an OBC is deleted for the bucket. See explanation as specified in Kubernetes Delete = physically delete the bucket. Retain = do not physically delete the bucket.","title":"StorageClass"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/","text":"Rook supports the creation of bucket notifications via two custom resources: a CephBucketNotification is a custom resource the defines: topic, events and filters of a bucket notification, and is described by a Custom Resource Definition (CRD) shown below. Bucket notifications are associated with a bucket by setting labels on the Object Bucket claim (OBC). See the Ceph documentation for detailed information: Bucket Notifications - Ceph Object Gateway - Ceph Documentation . a CephBucketTopic is custom resource which represents a bucket notification topic and is described by a CRD shown below. A bucket notification topic represents an endpoint (or a \"topic\" inside this endpoint) to which bucket notifications could be sent. Notifications \u00b6 A CephBucketNotification defines what bucket actions trigger the notification and which topic to send notifications to. A CephBucketNotification may also define a filter, based on the object's name and other object attributes. Notifications can be associated with buckets created via ObjectBucketClaims by adding labels to an ObjectBucketClaim with the following format: 1 bucket-notification- :  The CephBucketTopic, CephBucketNotification and ObjectBucketClaim must all belong to the same namespace. If a bucket was created manually (not via an ObjectBucketClaim), notifications on this bucket should also be created manually. However, topics in these notifications may reference topics that were created via CephBucketTopic resources. Topics \u00b6 A CephBucketTopic represents an endpoint (of types: Kafka, AMQP0.9.1 or HTTP), or a specific resource inside this endpoint (e.g a Kafka or an AMQP topic, or a specific URI in an HTTP server). The CephBucketTopic also holds any additional info needed for a CephObjectStore's RADOS Gateways (RGW) to connect to the endpoint. Topics don't belong to a specific bucket or notification. Notifications from multiple buckets may be sent to the same topic, and one bucket (via multiple CephBucketNotifications) may send notifications to multiple topics. Notification Reliability and Delivery \u00b6 Notifications may be sent synchronously, as part of the operation that triggered them. In this mode, the operation is acknowledged only after the notification is sent to the topic\u2019s configured endpoint, which means that the round trip time of the notification is added to the latency of the operation itself. The original triggering operation will still be considered as successful even if the notification fail with an error, cannot be delivered or times out. Notifications may also be sent asynchronously. They will be committed into persistent storage and then asynchronously sent to the topic\u2019s configured endpoint. In this case, the only latency added to the original operation is of committing the notification to persistent storage. If the notification fail with an error, cannot be delivered or times out, it will be retried until successfully acknowledged. Example \u00b6 CephBucketTopic Custom Resource \u00b6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 apiVersion : ceph.rook.io/v1 kind : CephBucketTopic metadata : name : my-topic [1] namespace : my-app-space [2] spec : objectStoreName : my-store [3] objectStoreNamespace : rook-ceph [4] opaqueData : my@email.com [5] persistent : false [6] endpoint : [ 7 ] http : [ 8 ] uri : http://my-notification-endpoint:8080 # uri: http://my-notification-endpoint:8080/my-topic # uri: https://my-notification-endpoint:8443 disableVerifySSL : true [9] sendCloudEvents : false [10] # amqp: [11] # uri: amqp://my-rabbitmq-service:5672 # uri: amqp://my-rabbitmq-service:5672/vhost1 # uri: amqps://user@password:my-rabbitmq-service:5672 # disableVerifySSL: true [12] # ackLevel: broker [13] # exchange: my-exchange [14] # kafka: [15] # uri: kafka://my-kafka-service:9092 # disableVerifySSL: true [16] # ackLevel: broker [17] # useSSL: false [18] name of the CephBucketTopic In case of AMQP endpoint, the name is used for the AMQP topic (\u201crouting key\u201d for a topic exchange) In case of Kafka endpoint, the name is used as the Kafka topic namespace (optional) of the CephBucketTopic . Should match the namespace of the CephBucketNotification associated with this CephBucketTopic, and the OBC with the label referencing the CephBucketNotification objectStoreName is the name of the object store in which the topic should be created. This must be the same object store used for the buckets associated with the notifications referencing this topic. objectStoreNamespace is the namespace of the object store in which the topic should be created opaqueData (optional) is added to all notifications triggered by a notifications associated with the topic persistent (optional) indicates whether notifications to this endpoint are persistent (=asynchronous) or sent synchronously (\u201cfalse\u201d by default) endpoint to which to send the notifications to. Exactly one of the endpoints must be defined: http , amqp , kafka http (optional) hold the spec for an HTTP endpoint. The format of the URI would be: http[s]://[:][/] port defaults to: 80/443 for HTTP/S accordingly disableVerifySSL indicates whether the RGW is going to verify the SSL certificate of the HTTP server in case HTTPS is used (\"false\" by default) sendCloudEvents : (optional) send the notifications with the CloudEvents header . Supported for Ceph Quincy (v17) or newer (\"false\" by default) amqp (optional) hold the spec for an AMQP endpoint. The format of the URI would be: amqp[s]://[:@][:][/] port defaults to: 5672/5671 for AMQP/S accordingly user/password defaults to: guest/guest user/password may only be provided if HTTPS is used with the RGW. If not, topic creation request will be rejected vhost defaults to: \u201c/\u201d disableVerifySSL (optional) indicates whether the RGW is going to verify the SSL certificate of the AMQP server in case AMQPS is used (\"false\" by default) ackLevel (optional) indicates what kind of ack the RGW is waiting for after sending the notifications: \u201cnone\u201d: message is considered \u201cdelivered\u201d if sent to broker \u201cbroker\u201d: message is considered \u201cdelivered\u201d if acked by broker (default) \u201croutable\u201d: message is considered \u201cdelivered\u201d if broker can route to a consumer exchange in the AMQP broker that would route the notifications. Different topics pointing to the same endpoint must use the same exchange kafka (optional) hold the spec for a Kafka endpoint. The format of the URI would be: kafka://[:@][: :  # are ignored by the operator's bucket notifications provisioning mechanism some-label : some-value # the following label adds notifications to this bucket bucket-notification-my-notification : my-notification bucket-notification-another-notification : another-notification spec : generateBucketName : ceph-bkt storageClassName : rook-ceph-delete-bucket","title":"Object Bucket Notifications"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/#notifications","text":"A CephBucketNotification defines what bucket actions trigger the notification and which topic to send notifications to. A CephBucketNotification may also define a filter, based on the object's name and other object attributes. Notifications can be associated with buckets created via ObjectBucketClaims by adding labels to an ObjectBucketClaim with the following format: 1 bucket-notification- :  The CephBucketTopic, CephBucketNotification and ObjectBucketClaim must all belong to the same namespace. If a bucket was created manually (not via an ObjectBucketClaim), notifications on this bucket should also be created manually. However, topics in these notifications may reference topics that were created via CephBucketTopic resources.","title":"Notifications"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/#topics","text":"A CephBucketTopic represents an endpoint (of types: Kafka, AMQP0.9.1 or HTTP), or a specific resource inside this endpoint (e.g a Kafka or an AMQP topic, or a specific URI in an HTTP server). The CephBucketTopic also holds any additional info needed for a CephObjectStore's RADOS Gateways (RGW) to connect to the endpoint. Topics don't belong to a specific bucket or notification. Notifications from multiple buckets may be sent to the same topic, and one bucket (via multiple CephBucketNotifications) may send notifications to multiple topics.","title":"Topics"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/#notification-reliability-and-delivery","text":"Notifications may be sent synchronously, as part of the operation that triggered them. In this mode, the operation is acknowledged only after the notification is sent to the topic\u2019s configured endpoint, which means that the round trip time of the notification is added to the latency of the operation itself. The original triggering operation will still be considered as successful even if the notification fail with an error, cannot be delivered or times out. Notifications may also be sent asynchronously. They will be committed into persistent storage and then asynchronously sent to the topic\u2019s configured endpoint. In this case, the only latency added to the original operation is of committing the notification to persistent storage. If the notification fail with an error, cannot be delivered or times out, it will be retried until successfully acknowledged.","title":"Notification Reliability and Delivery"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/#example","text":"","title":"Example"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-bucket-notifications/#cephbuckettopic-custom-resource","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 apiVersion : ceph.rook.io/v1 kind : CephBucketTopic metadata : name : my-topic [1] namespace : my-app-space [2] spec : objectStoreName : my-store [3] objectStoreNamespace : rook-ceph [4] opaqueData : my@email.com [5] persistent : false [6] endpoint : [ 7 ] http : [ 8 ] uri : http://my-notification-endpoint:8080 # uri: http://my-notification-endpoint:8080/my-topic # uri: https://my-notification-endpoint:8443 disableVerifySSL : true [9] sendCloudEvents : false [10] # amqp: [11] # uri: amqp://my-rabbitmq-service:5672 # uri: amqp://my-rabbitmq-service:5672/vhost1 # uri: amqps://user@password:my-rabbitmq-service:5672 # disableVerifySSL: true [12] # ackLevel: broker [13] # exchange: my-exchange [14] # kafka: [15] # uri: kafka://my-kafka-service:9092 # disableVerifySSL: true [16] # ackLevel: broker [17] # useSSL: false [18] name of the CephBucketTopic In case of AMQP endpoint, the name is used for the AMQP topic (\u201crouting key\u201d for a topic exchange) In case of Kafka endpoint, the name is used as the Kafka topic namespace (optional) of the CephBucketTopic . Should match the namespace of the CephBucketNotification associated with this CephBucketTopic, and the OBC with the label referencing the CephBucketNotification objectStoreName is the name of the object store in which the topic should be created. This must be the same object store used for the buckets associated with the notifications referencing this topic. objectStoreNamespace is the namespace of the object store in which the topic should be created opaqueData (optional) is added to all notifications triggered by a notifications associated with the topic persistent (optional) indicates whether notifications to this endpoint are persistent (=asynchronous) or sent synchronously (\u201cfalse\u201d by default) endpoint to which to send the notifications to. Exactly one of the endpoints must be defined: http , amqp , kafka http (optional) hold the spec for an HTTP endpoint. The format of the URI would be: http[s]://[:][/] port defaults to: 80/443 for HTTP/S accordingly disableVerifySSL indicates whether the RGW is going to verify the SSL certificate of the HTTP server in case HTTPS is used (\"false\" by default) sendCloudEvents : (optional) send the notifications with the CloudEvents header . Supported for Ceph Quincy (v17) or newer (\"false\" by default) amqp (optional) hold the spec for an AMQP endpoint. The format of the URI would be: amqp[s]://[:@][:][/] port defaults to: 5672/5671 for AMQP/S accordingly user/password defaults to: guest/guest user/password may only be provided if HTTPS is used with the RGW. If not, topic creation request will be rejected vhost defaults to: \u201c/\u201d disableVerifySSL (optional) indicates whether the RGW is going to verify the SSL certificate of the AMQP server in case AMQPS is used (\"false\" by default) ackLevel (optional) indicates what kind of ack the RGW is waiting for after sending the notifications: \u201cnone\u201d: message is considered \u201cdelivered\u201d if sent to broker \u201cbroker\u201d: message is considered \u201cdelivered\u201d if acked by broker (default) \u201croutable\u201d: message is considered \u201cdelivered\u201d if broker can route to a consumer exchange in the AMQP broker that would route the notifications. Different topics pointing to the same endpoint must use the same exchange kafka (optional) hold the spec for a Kafka endpoint. The format of the URI would be: kafka://[:@][: :  # are ignored by the operator's bucket notifications provisioning mechanism some-label : some-value # the following label adds notifications to this bucket bucket-notification-my-notification : my-notification bucket-notification-another-notification : another-notification spec : generateBucketName : ceph-bkt storageClassName : rook-ceph-delete-bucket","title":"OBC Custom Resource"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/","text":"Multisite is a feature of Ceph that allows object stores to replicate their data over multiple Ceph clusters. Multisite also allows object stores to be independent and isolated from other object stores in a cluster. When a ceph-object-store is created without the zone section; a realm, zone group, and zone is created with the same name as the ceph-object-store. Since it is the only ceph-object-store in the realm, the data in the ceph-object-store remain independent and isolated from others on the same cluster. When a ceph-object-store is created with the zone section, the ceph-object-store will join a custom created zone, zone group, and realm each with a different names than its own. This allows the ceph-object-store to replicate its data over multiple Ceph clusters. To review core multisite concepts please read the ceph-multisite design overview . Prerequisites \u00b6 This guide assumes a Rook cluster as explained in the Quickstart . Creating Object Multisite \u00b6 If an admin wants to set up multisite on a Rook Ceph cluster, the following resources must be created: A realm A zonegroup A zone A ceph object store with the zone section object-multisite.yaml in the examples directory can be used to create the multisite CRDs. 1 kubectl create -f object-multisite.yaml The first zone group created in a realm is the master zone group. The first zone created in a zone group is the master zone. When a non-master zone or non-master zone group is created, the zone group or zone is not in the Ceph Radosgw Multisite Period until an object-store is created in that zone (and zone group). The zone will create the pools for the object-store(s) that are in the zone to use. When one of the multisite CRs (realm, zone group, zone) is deleted the underlying ceph realm/zone group/zone is not deleted, neither are the pools created by the zone. See the \"Multisite Cleanup\" section for more information. For more information on the multisite CRDs, see the related CRDs: - CephObjectRealm - CephObjectZoneGroup - CephObjectZone Pulling a Realm \u00b6 If an admin wants to sync data from another cluster, the admin needs to pull a realm on a Rook Ceph cluster from another Rook Ceph (or Ceph) cluster. To begin doing this, the admin needs 2 pieces of information: An endpoint from the realm being pulled from The access key and the system key of the system user from the realm being pulled from. Getting the Pull Endpoint \u00b6 To pull a Ceph realm from a remote Ceph cluster, an endpoint must be added to the CephObjectRealm's pull section in the spec . This endpoint must be from the master zone in the master zone group of that realm. If an admin does not know of an endpoint that fits this criteria, the admin can find such an endpoint on the remote Ceph cluster (via the tool box if it is a Rook Ceph Cluster) by running: 1 2 3 4 5 6 $ radosgw-admin zonegroup get --rgw-realm = $REALM_NAME --rgw-zonegroup = $MASTER_ZONEGROUP_NAME { ... \"endpoints\": [http://10.17.159.77:80], ... } A list of endpoints in the master zone group in the master zone is in the endpoints section of the JSON output of the zonegoup get command. This endpoint must also be resolvable from the new Rook Ceph cluster. To test this run the curl command on the endpoint: 1 2 $ curl -L http://10.17.159.77:80 anonymous Finally add the endpoint to the pull section of the CephObjectRealm's spec. The CephObjectRealm should have the same name as the CephObjectRealm/Ceph realm it is pulling from. 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : realm-a namespace : rook-ceph spec : pull : endpoint : http://10.17.159.77:80 Getting Realm Access Key and Secret Key \u00b6 The access key and secret key of the system user are keys that allow other Ceph clusters to pull the realm of the system user. Getting the Realm Access Key and Secret Key from the Rook Ceph Cluster \u00b6 System User for Multisite \u00b6 When an admin creates a ceph-object-realm a system user automatically gets created for the realm with an access key and a secret key. This system user has the name \"$REALM_NAME-system-user\". For the example if realm name is realm-a , then uid for the system user is \"realm-a-system-user\". These keys for the user are exported as a kubernetes secret called \"$REALM_NAME-keys\" (ex: realm-a-keys). This system user used by RGW internally for the data replication. Getting keys from k8s secret \u00b6 To get these keys from the cluster the realm was originally created on, run: 1 kubectl -n $ORIGINAL_CLUSTER_NAMESPACE get secrets realm-a-keys -o yaml > realm-a-keys.yaml Edit the realm-a-keys.yaml file, and change the namespace with the namespace that the new Rook Ceph cluster exists in. Then create a kubernetes secret on the pulling Rook Ceph cluster with the same secrets yaml file. 1 kubectl create -f realm-a-keys.yaml Getting the Realm Access Key and Secret Key from a Non Rook Ceph Cluster \u00b6 The access key and the secret key of the system user can be found in the output of running the following command on a non-rook ceph cluster: 1 radosgw-admin user info --uid=\"realm-a-system-user\" 1 2 3 4 5 6 7 8 9 10 11 { ... \"keys\" : [ { \"user\" : \"realm-a-system-user\" \"access_key\" : \"aSw4blZIKV9nKEU5VC0=\" \"secret_key\" : \"JSlDXFt5TlgjSV9QOE9XUndrLiI5JEo9YDBsJg==\" , } ], ... } Then base64 encode the each of the keys and create a .yaml file for the Kubernetes secret from the following template. Only the access-key , secret-key , and namespace sections need to be replaced. 1 2 3 4 5 6 7 8 9 apiVersion : v1 data : access-key : YVN3NGJsWklLVjluS0VVNVZDMD0= secret-key : SlNsRFhGdDVUbGdqU1Y5UU9FOVhVbmRyTGlJNUpFbzlZREJzSmc9PQ== kind : Secret metadata : name : realm-a-keys namespace : $NEW_ROOK_CLUSTER_NAMESPACE type : kubernetes.io/rook Finally, create a kubernetes secret on the pulling Rook Ceph cluster with the new secrets yaml file. 1 kubectl create -f realm-a-keys.yaml Pulling a Realm on a New Rook Ceph Cluster \u00b6 Once the admin knows the endpoint and the secret for the keys has been created, the admin should create: A CephObjectRealm matching to the realm on the other Ceph cluster, with an endpoint as described above. A CephObjectZoneGroup matching the master zone group name or the master CephObjectZoneGroup from the cluster the realm was pulled from. A CephObjectZone referring to the CephObjectZoneGroup created above. A CephObjectStore referring to the new CephObjectZone resource. object-multisite-pull-realm.yaml (with changes) in the examples directory can be used to create the multisite CRDs. 1 kubectl create -f object-multisite-pull-realm.yaml Scaling a Multisite \u00b6 Scaling the number of gateways that run the synchronization thread to 2 or more can increase the latency of the replication of each S3 object. The recommended way to scale a mutisite configuration is to dissociate the gateway dedicated to the synchronization from gateways that serve clients. The two types of gateways can be deployed by creating two CephObjectStores associated with the same CephObjectZone. The objectstore that deploys the gateway dedicated to the synchronization must have spec.gateway.instances set to 1 , while the objectstore that deploys the client gateways have multiple replicas and should disable the synchronization thread on the gateways by setting spec.gateway.disableMultisiteSyncTraffic to true . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 --- apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : replication namespace : rook-ceph spec : gateway : port : 80 instances : 1 disableMultisiteSyncTraffic : false zone : name : zone-a --- apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : clients namespace : rook-ceph spec : gateway : port : 80 instances : 5 disableMultisiteSyncTraffic : true zone : name : zone-a Multisite Cleanup \u00b6 Multisite configuration must be cleaned up by hand. Deleting a realm/zone group/zone CR will not delete the underlying Ceph realm, zone group, zone, or the pools associated with a zone. Realm Deletion \u00b6 Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-realm resource is deleted or modified, the realm is not deleted from the Ceph cluster. Realm deletion must be done via the toolbox. Deleting a Realm \u00b6 The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. The following command, run via the toolbox, deletes the realm. 1 radosgw-admin realm delete --rgw-realm=realm-a Zone Group Deletion \u00b6 Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-zone group resource is deleted or modified, the zone group is not deleted from the Ceph cluster. Zone Group deletion must be done through the toolbox. Deleting a Zone Group \u00b6 The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. The following command, run via the toolbox, deletes the zone group. 1 2 radosgw-admin zonegroup delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a Deleting and Reconfiguring the Ceph Object Zone \u00b6 Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-zone resource is deleted or modified, the zone is not deleted from the Ceph cluster. Zone deletion must be done through the toolbox. Changing the Master Zone \u00b6 The Rook toolbox can change the master zone in a zone group. 1 2 3 radosgw-admin zone modify --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a --master radosgw-admin zonegroup modify --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --master radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a Deleting Zone \u00b6 The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. There are two scenarios possible when deleting a zone. The following commands, run via the toolbox, deletes the zone if there is only one zone in the zone group. 1 2 radosgw-admin zone delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a In the other scenario, there are more than one zones in a zone group. Care must be taken when changing which zone is the master zone. Please read the following documentation before running the below commands: The following commands, run via toolboxes, remove the zone from the zone group first, then delete the zone. 1 2 3 4 radosgw-admin zonegroup rm --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin zone delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a When a zone is deleted, the pools for that zone are not deleted. Deleting Pools for a Zone \u00b6 The Rook toolbox can delete pools. Deleting pools should be done with caution. The following documentation on pools should be read before deleting any pools. When a zone is created the following pools are created for each zone: 1 2 3 4 5 6 $ ZONE_NAME.rgw.control $ ZONE_NAME.rgw.meta $ ZONE_NAME.rgw.log $ ZONE_NAME.rgw.buckets.index $ ZONE_NAME.rgw.buckets.non-ec $ ZONE_NAME.rgw.buckets.data Here is an example command to delete the .rgw.buckets.data pool for zone-a. 1 ceph osd pool rm zone-a.rgw.buckets.data zone-a.rgw.buckets.data --yes-i-really-really-mean-it In this command the pool name must be mentioned twice for the pool to be removed. Removing an Object Store from a Zone \u00b6 When an object-store (created in a zone) is deleted, the endpoint for that object store is removed from that zone, via 1 kubectl delete -f object-store.yaml Removing object store(s) from the master zone of the master zone group should be done with caution. When all of these object-stores are deleted the period cannot be updated and that realm cannot be pulled. Configure an Existing Object Store for Multisite \u00b6 When an object store is configured by Rook, it internally creates a zone, zone group, and realm with the same name as the object store. To enable multisite, you will need to create the corresponding zone, zone group, and realm CRs with the same name as the object store. For example, to create multisite CRs for an object store named my-store : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : my-store namespace : rook-ceph # namespace:cluster --- apiVersion : ceph.rook.io/v1 kind : CephObjectZoneGroup metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : realm : my-store --- apiVersion : ceph.rook.io/v1 kind : CephObjectZone metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : zoneGroup : my-store metadataPool : replicated : size : 3 dataPool : replicated : size : 3 preservePoolsOnDelete : false # recommended to set this value if ingress used for exposing rgw endpoints # customEndpoints: # - \"http://rgw-a.fqdn\" Now modify the existing CephObjectStore CR to exclude pool settings and add a reference to the zone. 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : gateway : port : 80 instances : 1 zone : name : my-store Using custom names \u00b6 If names different from the object store need to be set for the realm, zone, or zone group, first rename them in the backend via toolbox pod, then following the procedure above. 1 2 3 4 radosgw-admin realm rename --rgw-realm=my-store --realm-new-name= radosgw-admin zonegroup rename --rgw-zonegroup=my-store --zonegroup-new-name= --rgw-realm= radosgw-admin zone rename --rgw-zone=my-store --zone-new-name= --rgw-zonegroup= --rgw-realm= radosgw-admin period update --commit Important Renaming in the toolbox must be performed before creating the multisite CRs","title":"Object Store Multisite"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#prerequisites","text":"This guide assumes a Rook cluster as explained in the Quickstart .","title":"Prerequisites"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#creating-object-multisite","text":"If an admin wants to set up multisite on a Rook Ceph cluster, the following resources must be created: A realm A zonegroup A zone A ceph object store with the zone section object-multisite.yaml in the examples directory can be used to create the multisite CRDs. 1 kubectl create -f object-multisite.yaml The first zone group created in a realm is the master zone group. The first zone created in a zone group is the master zone. When a non-master zone or non-master zone group is created, the zone group or zone is not in the Ceph Radosgw Multisite Period until an object-store is created in that zone (and zone group). The zone will create the pools for the object-store(s) that are in the zone to use. When one of the multisite CRs (realm, zone group, zone) is deleted the underlying ceph realm/zone group/zone is not deleted, neither are the pools created by the zone. See the \"Multisite Cleanup\" section for more information. For more information on the multisite CRDs, see the related CRDs: - CephObjectRealm - CephObjectZoneGroup - CephObjectZone","title":"Creating Object Multisite"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#pulling-a-realm","text":"If an admin wants to sync data from another cluster, the admin needs to pull a realm on a Rook Ceph cluster from another Rook Ceph (or Ceph) cluster. To begin doing this, the admin needs 2 pieces of information: An endpoint from the realm being pulled from The access key and the system key of the system user from the realm being pulled from.","title":"Pulling a Realm"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#getting-the-pull-endpoint","text":"To pull a Ceph realm from a remote Ceph cluster, an endpoint must be added to the CephObjectRealm's pull section in the spec . This endpoint must be from the master zone in the master zone group of that realm. If an admin does not know of an endpoint that fits this criteria, the admin can find such an endpoint on the remote Ceph cluster (via the tool box if it is a Rook Ceph Cluster) by running: 1 2 3 4 5 6 $ radosgw-admin zonegroup get --rgw-realm = $REALM_NAME --rgw-zonegroup = $MASTER_ZONEGROUP_NAME { ... \"endpoints\": [http://10.17.159.77:80], ... } A list of endpoints in the master zone group in the master zone is in the endpoints section of the JSON output of the zonegoup get command. This endpoint must also be resolvable from the new Rook Ceph cluster. To test this run the curl command on the endpoint: 1 2 $ curl -L http://10.17.159.77:80 anonymous Finally add the endpoint to the pull section of the CephObjectRealm's spec. The CephObjectRealm should have the same name as the CephObjectRealm/Ceph realm it is pulling from. 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : realm-a namespace : rook-ceph spec : pull : endpoint : http://10.17.159.77:80","title":"Getting the Pull Endpoint"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#getting-realm-access-key-and-secret-key","text":"The access key and secret key of the system user are keys that allow other Ceph clusters to pull the realm of the system user.","title":"Getting Realm Access Key and Secret Key"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#getting-the-realm-access-key-and-secret-key-from-the-rook-ceph-cluster","text":"","title":"Getting the Realm Access Key and Secret Key from the Rook Ceph Cluster"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#system-user-for-multisite","text":"When an admin creates a ceph-object-realm a system user automatically gets created for the realm with an access key and a secret key. This system user has the name \"$REALM_NAME-system-user\". For the example if realm name is realm-a , then uid for the system user is \"realm-a-system-user\". These keys for the user are exported as a kubernetes secret called \"$REALM_NAME-keys\" (ex: realm-a-keys). This system user used by RGW internally for the data replication.","title":"System User for Multisite"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#getting-keys-from-k8s-secret","text":"To get these keys from the cluster the realm was originally created on, run: 1 kubectl -n $ORIGINAL_CLUSTER_NAMESPACE get secrets realm-a-keys -o yaml > realm-a-keys.yaml Edit the realm-a-keys.yaml file, and change the namespace with the namespace that the new Rook Ceph cluster exists in. Then create a kubernetes secret on the pulling Rook Ceph cluster with the same secrets yaml file. 1 kubectl create -f realm-a-keys.yaml","title":"Getting keys from k8s secret"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#getting-the-realm-access-key-and-secret-key-from-a-non-rook-ceph-cluster","text":"The access key and the secret key of the system user can be found in the output of running the following command on a non-rook ceph cluster: 1 radosgw-admin user info --uid=\"realm-a-system-user\" 1 2 3 4 5 6 7 8 9 10 11 { ... \"keys\" : [ { \"user\" : \"realm-a-system-user\" \"access_key\" : \"aSw4blZIKV9nKEU5VC0=\" \"secret_key\" : \"JSlDXFt5TlgjSV9QOE9XUndrLiI5JEo9YDBsJg==\" , } ], ... } Then base64 encode the each of the keys and create a .yaml file for the Kubernetes secret from the following template. Only the access-key , secret-key , and namespace sections need to be replaced. 1 2 3 4 5 6 7 8 9 apiVersion : v1 data : access-key : YVN3NGJsWklLVjluS0VVNVZDMD0= secret-key : SlNsRFhGdDVUbGdqU1Y5UU9FOVhVbmRyTGlJNUpFbzlZREJzSmc9PQ== kind : Secret metadata : name : realm-a-keys namespace : $NEW_ROOK_CLUSTER_NAMESPACE type : kubernetes.io/rook Finally, create a kubernetes secret on the pulling Rook Ceph cluster with the new secrets yaml file. 1 kubectl create -f realm-a-keys.yaml","title":"Getting the Realm Access Key and Secret Key from a Non Rook Ceph Cluster"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#pulling-a-realm-on-a-new-rook-ceph-cluster","text":"Once the admin knows the endpoint and the secret for the keys has been created, the admin should create: A CephObjectRealm matching to the realm on the other Ceph cluster, with an endpoint as described above. A CephObjectZoneGroup matching the master zone group name or the master CephObjectZoneGroup from the cluster the realm was pulled from. A CephObjectZone referring to the CephObjectZoneGroup created above. A CephObjectStore referring to the new CephObjectZone resource. object-multisite-pull-realm.yaml (with changes) in the examples directory can be used to create the multisite CRDs. 1 kubectl create -f object-multisite-pull-realm.yaml","title":"Pulling a Realm on a New Rook Ceph Cluster"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#scaling-a-multisite","text":"Scaling the number of gateways that run the synchronization thread to 2 or more can increase the latency of the replication of each S3 object. The recommended way to scale a mutisite configuration is to dissociate the gateway dedicated to the synchronization from gateways that serve clients. The two types of gateways can be deployed by creating two CephObjectStores associated with the same CephObjectZone. The objectstore that deploys the gateway dedicated to the synchronization must have spec.gateway.instances set to 1 , while the objectstore that deploys the client gateways have multiple replicas and should disable the synchronization thread on the gateways by setting spec.gateway.disableMultisiteSyncTraffic to true . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 --- apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : replication namespace : rook-ceph spec : gateway : port : 80 instances : 1 disableMultisiteSyncTraffic : false zone : name : zone-a --- apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : clients namespace : rook-ceph spec : gateway : port : 80 instances : 5 disableMultisiteSyncTraffic : true zone : name : zone-a","title":"Scaling a Multisite"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#multisite-cleanup","text":"Multisite configuration must be cleaned up by hand. Deleting a realm/zone group/zone CR will not delete the underlying Ceph realm, zone group, zone, or the pools associated with a zone.","title":"Multisite Cleanup"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#realm-deletion","text":"Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-realm resource is deleted or modified, the realm is not deleted from the Ceph cluster. Realm deletion must be done via the toolbox.","title":"Realm Deletion"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#deleting-a-realm","text":"The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. The following command, run via the toolbox, deletes the realm. 1 radosgw-admin realm delete --rgw-realm=realm-a","title":"Deleting a Realm"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#zone-group-deletion","text":"Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-zone group resource is deleted or modified, the zone group is not deleted from the Ceph cluster. Zone Group deletion must be done through the toolbox.","title":"Zone Group Deletion"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#deleting-a-zone-group","text":"The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. The following command, run via the toolbox, deletes the zone group. 1 2 radosgw-admin zonegroup delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a","title":"Deleting a Zone Group"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#deleting-and-reconfiguring-the-ceph-object-zone","text":"Changes made to the resource's configuration or deletion of the resource are not reflected on the Ceph cluster. When the ceph-object-zone resource is deleted or modified, the zone is not deleted from the Ceph cluster. Zone deletion must be done through the toolbox.","title":"Deleting and Reconfiguring the Ceph Object Zone"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#changing-the-master-zone","text":"The Rook toolbox can change the master zone in a zone group. 1 2 3 radosgw-admin zone modify --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a --master radosgw-admin zonegroup modify --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --master radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a","title":"Changing the Master Zone"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#deleting-zone","text":"The Rook toolbox can modify the Ceph Multisite state via the radosgw-admin command. There are two scenarios possible when deleting a zone. The following commands, run via the toolbox, deletes the zone if there is only one zone in the zone group. 1 2 radosgw-admin zone delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a In the other scenario, there are more than one zones in a zone group. Care must be taken when changing which zone is the master zone. Please read the following documentation before running the below commands: The following commands, run via toolboxes, remove the zone from the zone group first, then delete the zone. 1 2 3 4 radosgw-admin zonegroup rm --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin zone delete --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a radosgw-admin period update --commit --rgw-realm=realm-a --rgw-zonegroup=zone-group-a --rgw-zone=zone-a When a zone is deleted, the pools for that zone are not deleted.","title":"Deleting Zone"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#deleting-pools-for-a-zone","text":"The Rook toolbox can delete pools. Deleting pools should be done with caution. The following documentation on pools should be read before deleting any pools. When a zone is created the following pools are created for each zone: 1 2 3 4 5 6 $ ZONE_NAME.rgw.control $ ZONE_NAME.rgw.meta $ ZONE_NAME.rgw.log $ ZONE_NAME.rgw.buckets.index $ ZONE_NAME.rgw.buckets.non-ec $ ZONE_NAME.rgw.buckets.data Here is an example command to delete the .rgw.buckets.data pool for zone-a. 1 ceph osd pool rm zone-a.rgw.buckets.data zone-a.rgw.buckets.data --yes-i-really-really-mean-it In this command the pool name must be mentioned twice for the pool to be removed.","title":"Deleting Pools for a Zone"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#removing-an-object-store-from-a-zone","text":"When an object-store (created in a zone) is deleted, the endpoint for that object store is removed from that zone, via 1 kubectl delete -f object-store.yaml Removing object store(s) from the master zone of the master zone group should be done with caution. When all of these object-stores are deleted the period cannot be updated and that realm cannot be pulled.","title":"Removing an Object Store from a Zone"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#configure-an-existing-object-store-for-multisite","text":"When an object store is configured by Rook, it internally creates a zone, zone group, and realm with the same name as the object store. To enable multisite, you will need to create the corresponding zone, zone group, and realm CRs with the same name as the object store. For example, to create multisite CRs for an object store named my-store : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 apiVersion : ceph.rook.io/v1 kind : CephObjectRealm metadata : name : my-store namespace : rook-ceph # namespace:cluster --- apiVersion : ceph.rook.io/v1 kind : CephObjectZoneGroup metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : realm : my-store --- apiVersion : ceph.rook.io/v1 kind : CephObjectZone metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : zoneGroup : my-store metadataPool : replicated : size : 3 dataPool : replicated : size : 3 preservePoolsOnDelete : false # recommended to set this value if ingress used for exposing rgw endpoints # customEndpoints: # - \"http://rgw-a.fqdn\" Now modify the existing CephObjectStore CR to exclude pool settings and add a reference to the zone. 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph # namespace:cluster spec : gateway : port : 80 instances : 1 zone : name : my-store","title":"Configure an Existing Object Store for Multisite"},{"location":"Storage-Configuration/Object-Storage-RGW/ceph-object-multisite/#using-custom-names","text":"If names different from the object store need to be set for the realm, zone, or zone group, first rename them in the backend via toolbox pod, then following the procedure above. 1 2 3 4 radosgw-admin realm rename --rgw-realm=my-store --realm-new-name= radosgw-admin zonegroup rename --rgw-zonegroup=my-store --zonegroup-new-name= --rgw-realm= radosgw-admin zone rename --rgw-zone=my-store --zone-new-name= --rgw-zonegroup= --rgw-realm= radosgw-admin period update --commit Important Renaming in the toolbox must be performed before creating the multisite CRs","title":"Using custom names"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/","text":"The Ceph COSI driver provisions buckets for object storage. This document instructs on enabling the driver and consuming a bucket from a sample application. Note The Ceph COSI driver is currently in experimental mode. Prerequisites \u00b6 COSI requires: 1. A running Rook object store 2. COSI controller Deploy the COSI controller with these commands: 1 2 kubectl apply -k github.com/kubernetes-sigs/container-object-storage-interface-api kubectl apply -k github.com/kubernetes-sigs/container-object-storage-interface-controller Ceph COSI Driver \u00b6 The Ceph COSI driver will be started when the CephCOSIDriver CR is created and when the first CephObjectStore is created. 1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephCOSIDriver metadata : name : ceph-cosi-driver namespace : rook-ceph spec : deploymentStrategy : \"Auto\" 1 2 cd deploy/examples/cosi kubectl create -f cephcosidriver.yaml The driver is created in the same namespace as Rook operator. Admin Operations \u00b6 Create a Ceph Object Store User \u00b6 Create a CephObjectStoreUser to be used by the COSI driver for provisioning buckets. 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : cosi namespace : rook-ceph spec : displayName : \"cosi user\" store : my-store capabilities : bucket : \"*\" user : \"*\" 1 kubectl create -f cosi-user.yaml Above step will be automated in future by the Rook operator. Create a BucketClass and BucketAccessClass \u00b6 The BucketClass and BucketAccessClass are CRDs defined by COSI. The BucketClass defines the storage class for the bucket. The BucketAccessClass defines the access class for the bucket. The BucketClass and BucketAccessClass are defined as below: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 kind : BucketClass apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bcc driverName : ceph.objectstorage.k8s.io deletionPolicy : Delete parameters : objectStoreUserSecretName : rook-ceph-object-user-my-store-cosi objectStoreUserSecretNamespace : rook-ceph --- kind : BucketAccessClass apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bac driverName : ceph.objectstorage.k8s.io authenticationType : KEY parameters : objectStoreUserSecretName : rook-ceph-object-user-my-store-cosi objectStoreUserSecretNamespace : rook-ceph 1 kubectl create -f bucketclass.yaml -f bucketaccessclass.yaml The objectStoreUserSecretName and objectStoreUserSecretNamespace are the name and namespace of the CephObjectStoreUser created in the previous step. User Operations \u00b6 Create a Bucket \u00b6 To create a bucket, use the BucketClass to pointing the required object store and then define BucketClaim request as below: 1 2 3 4 5 6 7 8 9 kind : BucketClaim apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bc namespace : default # any namespace can be used spec : bucketClassName : sample-bcc protocols : - s3 1 kubectl create -f bucketclaim.yaml Bucket Access \u00b6 Define access to the bucket by creating the BucketAccess resource: 1 2 3 4 5 6 7 8 9 10 11 kind : BucketAccess apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-access namespace : default # any namespace can be used spec : bucketAccessClassName : sample-bac bucketClaimName : sample-bc protocol : s3 # Change to the name of the secret where access details are stored credentialsSecretName : sample-secret-name 1 kubectl create -f bucketaccess.yaml The secret will be created which contains the access details for the bucket in JSON format in the namespace of BucketAccess: 1 kubectl get secret sample-secret-name -o yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 { \"metadata\" : { \"name\" : \"bc-81733d1a-ac7a-4759-96f3-fbcc07c0cee9\" , \"creationTimestamp\" : null }, \"spec\" : { \"bucketName\" : \"sample-bcc1fc94b04-6011-45e0-a3d8-b6a093055783\" , \"authenticationType\" : \"KEY\" , \"secretS3\" : { \"endpoint\" : \"http://rook-ceph-rgw-my-store.rook-ceph.svc:80\" , \"region\" : \"us-east\" , \"accessKeyID\" : \"LI2LES8QMR9GB5SZLB02\" , \"accessSecretKey\" : \"s0WAmcn8N1eIBgNV0mjCwZWQmJiCF4B0SAzbhYCL\" }, \"secretAzure\" : null , \"protocols\" : [ \"s3\" ] } } Consuming the Bucket via secret \u00b6 To access the bucket from an application pod, mount the secret for accessing the bucket: 1 2 3 4 5 6 7 8 9 10 11 volumes : - name : cosi-secrets secret : # Set the name of the secret from the BucketAccess secretName : sample-secret-name spec : containers : - name : sample-app volumeMounts : - name : cosi-secrets mountPath : /data/cosi The Secret will be mounted in the pod in the path: /data/cosi/BucketInfo . The app must parse the JSON object to load the bucket connection details. Another approach is the json data can be parsed by the application to access the bucket via init container. Following is a sample init container which parses the json data and creates a file with the access details: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 set -e jsonfile = %s if [ -d \" $jsonfile \" ] ; then export ENDPOINT = $( jq -r '.spec.secretS3.endpoint' $jsonfile ) export BUCKET = $( jq -r '.spec.bucketName' $jsonfile ) export AWS_ACCESS_KEY_ID = $( jq -r '.spec.secretS3.accessKeyID' $jsonfile ) export AWS_SECRET_ACCESS_KEY = $( jq -r '.spec.secretS3.accessSecretKey' $jsonfile ) fi else echo \"Error: $jsonfile does not exist\" exit 1 fi 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 apiVersion : v1 kind : Pod metadata : name : sample-app namespace : rook-ceph spec : containers : - name : sample-app image : busybox command : [ \"/bin/sh\" , \"-c\" , \"sleep 3600\" ] volumeMounts : - name : cosi-secrets mountPath : /data/cosi initContainers : - name : init-cosi image : busybox command : [ \"/bin/sh\" , \"-c\" , \"setup-aws-credentials /data/cosi/BucketInfo/credentials\" ] volumeMounts : - name : cosi-secrets mountPath : /data/cosi volumes : - name : cosi-secrets secret : # Set the name of the secret from the BucketAccess secretName : sample-secret-name","title":"Container Object Storage Interface (COSI)"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#prerequisites","text":"COSI requires: 1. A running Rook object store 2. COSI controller Deploy the COSI controller with these commands: 1 2 kubectl apply -k github.com/kubernetes-sigs/container-object-storage-interface-api kubectl apply -k github.com/kubernetes-sigs/container-object-storage-interface-controller","title":"Prerequisites"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#ceph-cosi-driver","text":"The Ceph COSI driver will be started when the CephCOSIDriver CR is created and when the first CephObjectStore is created. 1 2 3 4 5 6 7 apiVersion : ceph.rook.io/v1 kind : CephCOSIDriver metadata : name : ceph-cosi-driver namespace : rook-ceph spec : deploymentStrategy : \"Auto\" 1 2 cd deploy/examples/cosi kubectl create -f cephcosidriver.yaml The driver is created in the same namespace as Rook operator.","title":"Ceph COSI Driver"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#admin-operations","text":"","title":"Admin Operations"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#create-a-ceph-object-store-user","text":"Create a CephObjectStoreUser to be used by the COSI driver for provisioning buckets. 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : cosi namespace : rook-ceph spec : displayName : \"cosi user\" store : my-store capabilities : bucket : \"*\" user : \"*\" 1 kubectl create -f cosi-user.yaml Above step will be automated in future by the Rook operator.","title":"Create a Ceph Object Store User"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#create-a-bucketclass-and-bucketaccessclass","text":"The BucketClass and BucketAccessClass are CRDs defined by COSI. The BucketClass defines the storage class for the bucket. The BucketAccessClass defines the access class for the bucket. The BucketClass and BucketAccessClass are defined as below: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 kind : BucketClass apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bcc driverName : ceph.objectstorage.k8s.io deletionPolicy : Delete parameters : objectStoreUserSecretName : rook-ceph-object-user-my-store-cosi objectStoreUserSecretNamespace : rook-ceph --- kind : BucketAccessClass apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bac driverName : ceph.objectstorage.k8s.io authenticationType : KEY parameters : objectStoreUserSecretName : rook-ceph-object-user-my-store-cosi objectStoreUserSecretNamespace : rook-ceph 1 kubectl create -f bucketclass.yaml -f bucketaccessclass.yaml The objectStoreUserSecretName and objectStoreUserSecretNamespace are the name and namespace of the CephObjectStoreUser created in the previous step.","title":"Create a BucketClass and BucketAccessClass"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#user-operations","text":"","title":"User Operations"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#create-a-bucket","text":"To create a bucket, use the BucketClass to pointing the required object store and then define BucketClaim request as below: 1 2 3 4 5 6 7 8 9 kind : BucketClaim apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-bc namespace : default # any namespace can be used spec : bucketClassName : sample-bcc protocols : - s3 1 kubectl create -f bucketclaim.yaml","title":"Create a Bucket"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#bucket-access","text":"Define access to the bucket by creating the BucketAccess resource: 1 2 3 4 5 6 7 8 9 10 11 kind : BucketAccess apiVersion : objectstorage.k8s.io/v1alpha1 metadata : name : sample-access namespace : default # any namespace can be used spec : bucketAccessClassName : sample-bac bucketClaimName : sample-bc protocol : s3 # Change to the name of the secret where access details are stored credentialsSecretName : sample-secret-name 1 kubectl create -f bucketaccess.yaml The secret will be created which contains the access details for the bucket in JSON format in the namespace of BucketAccess: 1 kubectl get secret sample-secret-name -o yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 { \"metadata\" : { \"name\" : \"bc-81733d1a-ac7a-4759-96f3-fbcc07c0cee9\" , \"creationTimestamp\" : null }, \"spec\" : { \"bucketName\" : \"sample-bcc1fc94b04-6011-45e0-a3d8-b6a093055783\" , \"authenticationType\" : \"KEY\" , \"secretS3\" : { \"endpoint\" : \"http://rook-ceph-rgw-my-store.rook-ceph.svc:80\" , \"region\" : \"us-east\" , \"accessKeyID\" : \"LI2LES8QMR9GB5SZLB02\" , \"accessSecretKey\" : \"s0WAmcn8N1eIBgNV0mjCwZWQmJiCF4B0SAzbhYCL\" }, \"secretAzure\" : null , \"protocols\" : [ \"s3\" ] } }","title":"Bucket Access"},{"location":"Storage-Configuration/Object-Storage-RGW/cosi/#consuming-the-bucket-via-secret","text":"To access the bucket from an application pod, mount the secret for accessing the bucket: 1 2 3 4 5 6 7 8 9 10 11 volumes : - name : cosi-secrets secret : # Set the name of the secret from the BucketAccess secretName : sample-secret-name spec : containers : - name : sample-app volumeMounts : - name : cosi-secrets mountPath : /data/cosi The Secret will be mounted in the pod in the path: /data/cosi/BucketInfo . The app must parse the JSON object to load the bucket connection details. Another approach is the json data can be parsed by the application to access the bucket via init container. Following is a sample init container which parses the json data and creates a file with the access details: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 set -e jsonfile = %s if [ -d \" $jsonfile \" ] ; then export ENDPOINT = $( jq -r '.spec.secretS3.endpoint' $jsonfile ) export BUCKET = $( jq -r '.spec.bucketName' $jsonfile ) export AWS_ACCESS_KEY_ID = $( jq -r '.spec.secretS3.accessKeyID' $jsonfile ) export AWS_SECRET_ACCESS_KEY = $( jq -r '.spec.secretS3.accessSecretKey' $jsonfile ) fi else echo \"Error: $jsonfile does not exist\" exit 1 fi 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 apiVersion : v1 kind : Pod metadata : name : sample-app namespace : rook-ceph spec : containers : - name : sample-app image : busybox command : [ \"/bin/sh\" , \"-c\" , \"sleep 3600\" ] volumeMounts : - name : cosi-secrets mountPath : /data/cosi initContainers : - name : init-cosi image : busybox command : [ \"/bin/sh\" , \"-c\" , \"setup-aws-credentials /data/cosi/BucketInfo/credentials\" ] volumeMounts : - name : cosi-secrets mountPath : /data/cosi volumes : - name : cosi-secrets secret : # Set the name of the secret from the BucketAccess secretName : sample-secret-name","title":"Consuming the Bucket via secret"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/","text":"Object storage exposes an S3 API to the storage cluster for applications to put and get data. Prerequisites \u00b6 This guide assumes a Rook cluster as explained in the Quickstart . Configure an Object Store \u00b6 Rook has the ability to either deploy an object store in Kubernetes or to connect to an external RGW service. Most commonly, the object store will be configured locally by Rook. Alternatively, if you have an existing Ceph cluster with Rados Gateways, see the external section to consume it from Rook. Create a Local Object Store \u00b6 The below sample will create a CephObjectStore that starts the RGW service in the cluster with an S3 API. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). See the Object Store CRD , for more detail on the settings available for a CephObjectStore . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : host erasureCoded : dataChunks : 2 codingChunks : 1 preservePoolsOnDelete : true gateway : sslCertificateRef : port : 80 # securePort: 443 instances : 1 After the CephObjectStore is created, the Rook operator will then create all the pools and other resources necessary to start the service. This may take a minute to complete. Create an object store: 1 kubectl create -f object.yaml To confirm the object store is configured, wait for the RGW pod(s) to start: 1 kubectl -n rook-ceph get pod -l app=rook-ceph-rgw Connect to an External Object Store \u00b6 Rook can connect to existing RGW gateways to work in conjunction with the external mode of the CephCluster CRD. First, create a rgw-admin-ops-user user in the Ceph cluster with the necessary caps: 1 radosgw-admin user create --uid=rgw-admin-ops-user --display-name=\"RGW Admin Ops User\" --caps=\"buckets=*;users=*;usage=read;metadata=read;zone=read\" --rgw-realm= --rgw-zonegroup= --rgw-zone= The rgw-admin-ops-user user is required by the Rook operator to manage buckets and users via the admin ops and s3 api. The multisite configuration needs to be specified only if the admin sets up multisite for RGW. Then create a secret with the user credentials: 1 kubectl -n rook-ceph create secret generic --type=\"kubernetes.io/rook\" rgw-admin-ops-user --from-literal=accessKey= --from-literal=secretKey= If you have an external CephCluster CR, you can instruct Rook to consume external gateways with the following: 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : external-store namespace : rook-ceph spec : gateway : port : 8080 externalRgwEndpoints : - ip : 192.168.39.182 # hostname: example.com Use the existing object-external.yaml file. Even though multiple endpoints can be specified, it is recommend to use only one endpoint. This endpoint is randomly added to configmap of OBC and secret of the cephobjectstoreuser . Rook never guarantees the randomly picked endpoint is a working one or not. If there are multiple endpoints, please add load balancer in front of them and use the load balancer endpoint in the externalRgwEndpoints list. When ready, the message in the cephobjectstore status similar to this one: 1 2 3 kubectl -n rook-ceph get cephobjectstore external-store NAME PHASE external-store Ready Any pod from your cluster can now access this endpoint: 1 2 $ curl 10 .100.28.138:8080 anonymous Create a Bucket \u00b6 Info This document is a guide for creating bucket with an Object Bucket Claim (OBC). To create a bucket with the experimental COSI Driver, see the COSI documentation . Now that the object store is configured, next we need to create a bucket where a client can read and write objects. A bucket can be created by defining a storage class, similar to the pattern used by block and file storage. First, define the storage class that will allow object clients to create a bucket. The storage class defines the object storage system, the bucket retention policy, and other properties required by the administrator. Save the following as storageclass-bucket-delete.yaml (the example is named as such due to the Delete reclaim policy). 1 2 3 4 5 6 7 8 9 10 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-bucket # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.ceph.rook.io/bucket reclaimPolicy : Delete parameters : objectStoreName : my-store objectStoreNamespace : rook-ceph If you\u2019ve deployed the Rook operator in a namespace other than rook-ceph , change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace my-namespace the provisioner value should be my-namespace.ceph.rook.io/bucket . 1 kubectl create -f storageclass-bucket-delete.yaml Based on this storage class, an object client can now request a bucket by creating an Object Bucket Claim (OBC). When the OBC is created, the Rook bucket provisioner will create a new bucket. Notice that the OBC references the storage class that was created above. Save the following as object-bucket-claim-delete.yaml (the example is named as such due to the Delete reclaim policy): 1 2 3 4 5 6 7 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : name : ceph-bucket spec : generateBucketName : ceph-bkt storageClassName : rook-ceph-bucket 1 kubectl create -f object-bucket-claim-delete.yaml Now that the claim is created, the operator will create the bucket as well as generate other artifacts to enable access to the bucket. A secret and ConfigMap are created with the same name as the OBC and in the same namespace. The secret contains credentials used by the application pod to access the bucket. The ConfigMap contains bucket endpoint information and is also consumed by the pod. See the Object Bucket Claim Documentation for more details on the CephObjectBucketClaims . Client Connections \u00b6 The following commands extract key pieces of information from the secret and configmap:\" 1 2 3 4 5 6 # config-map, secret, OBC will part of default if no specific name space mentioned export AWS_HOST=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_HOST}') export PORT=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_PORT}') export BUCKET_NAME=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_NAME}') export AWS_ACCESS_KEY_ID=$(kubectl -n default get secret ceph-bucket -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 --decode) export AWS_SECRET_ACCESS_KEY=$(kubectl -n default get secret ceph-bucket -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 --decode) Consume the Object Storage \u00b6 Now that you have the object store configured and a bucket created, you can consume the object storage from an S3 client. This section will guide you through testing the connection to the CephObjectStore and uploading and downloading from it. Run the following commands after you have connected to the Rook toolbox . Connection Environment Variables \u00b6 To simplify the s3 client commands, you will want to set the four environment variables for use by your client (ie. inside the toolbox). See above for retrieving the variables for a bucket created by an ObjectBucketClaim . 1 2 3 4 export AWS_HOST= export PORT= export AWS_ACCESS_KEY_ID= export AWS_SECRET_ACCESS_KEY= Host : The DNS host name where the rgw service is found in the cluster. Assuming you are using the default rook-ceph cluster, it will be rook-ceph-rgw-my-store.rook-ceph.svc . Port : The endpoint where the rgw service is listening. Run kubectl -n rook-ceph get svc rook-ceph-rgw-my-store , to get the port. Access key : The user's access_key as printed above Secret key : The user's secret_key as printed above The variables for the user generated in this example might be: 1 2 3 4 export AWS_HOST=rook-ceph-rgw-my-store.rook-ceph.svc export PORT=80 export AWS_ACCESS_KEY_ID=XEZDB3UJ6X7HVBE7X7MA export AWS_SECRET_ACCESS_KEY=7yGIZON7EhFORz0I40BFniML36D2rl8CQQ5kXU6l The access key and secret key can be retrieved as described in the section above on client connections or below in the section creating a user if you are not creating the buckets with an ObjectBucketClaim . Configure s5cmd \u00b6 To test the CephObjectStore , set the object store credentials in the toolbox pod that contains the s5cmd tool. Important The default toolbox.yaml does not contain the s5cmd. The toolbox must be started with the rook operator image (toolbox-operator-image), which does contain s5cmd. 1 2 3 4 5 6 7 kubectl create -f deploy/examples/toolbox-operator-image.yaml mkdir ~/.aws cat > ~/.aws/credentials << EOF [default] aws_access_key_id = ${AWS_ACCESS_KEY_ID} aws_secret_access_key = ${AWS_SECRET_ACCESS_KEY} EOF PUT or GET an object \u00b6 Upload a file to the newly created bucket 1 2 echo \"Hello Rook\" > /tmp/rookObj s5cmd --endpoint-url http://$AWS_HOST:$PORT cp /tmp/rookObj s3://$BUCKET_NAME Download and verify the file from the bucket 1 2 s5cmd --endpoint-url http://$AWS_HOST:$PORT cp s3://$BUCKET_NAME/rookObj /tmp/rookObj-download cat /tmp/rookObj-download Monitoring health \u00b6 Rook configures health probes on the deployment created for CephObjectStore gateways. Refer to the CRD document for information about configuring the probes and monitoring the deployment status. Access External to the Cluster \u00b6 Rook sets up the object storage so pods will have access internal to the cluster. If your applications are running outside the cluster, you will need to setup an external service through a NodePort . First, note the service that exposes RGW internal to the cluster. We will leave this service intact and create a new service for external access. 1 2 3 $ kubectl -n rook-ceph get service rook-ceph-rgw-my-store NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-rgw-my-store 10.3.0.177  80/TCP 2m Save the external service as rgw-external.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : v1 kind : Service metadata : name : rook-ceph-rgw-my-store-external namespace : rook-ceph labels : app : rook-ceph-rgw rook_cluster : rook-ceph rook_object_store : my-store spec : ports : - name : rgw port : 80 protocol : TCP targetPort : 80 selector : app : rook-ceph-rgw rook_cluster : rook-ceph rook_object_store : my-store sessionAffinity : None type : NodePort Now create the external service. 1 kubectl create -f rgw-external.yaml See both rgw services running and notice what port the external service is running on: 1 2 3 4 $ kubectl -n rook-ceph get service rook-ceph-rgw-my-store rook-ceph-rgw-my-store-external NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-rgw-my-store ClusterIP 10.104.82.228  80/TCP 4m rook-ceph-rgw-my-store-external NodePort 10.111.113.237  80:31536/TCP 39s Internally the rgw service is running on port 80 . The external port in this case is 31536 . Now you can access the CephObjectStore from anywhere! All you need is the hostname for any machine in the cluster, the external port, and the user credentials. Create a User \u00b6 If you need to create an independent set of user credentials to access the S3 endpoint, create a CephObjectStoreUser . The user will be used to connect to the RGW service in the cluster using the S3 API. The user will be independent of any object bucket claims that you might have created in the earlier instructions in this document. See the Object Store User CRD for more detail on the settings available for a CephObjectStoreUser . 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : my-user namespace : rook-ceph spec : store : my-store displayName : \"my display name\" When the CephObjectStoreUser is created, the Rook operator will then create the RGW user on the specified CephObjectStore and store the Access Key and Secret Key in a kubernetes secret in the same namespace as the CephObjectStoreUser . 1 2 # Create the object store user kubectl create -f object-user.yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 # To confirm the object store user is configured, describe the secret $ kubectl -n rook-ceph describe secret rook-ceph-object-user-my-store-my-user Name: rook-ceph-object-user-my-store-my-user Namespace: rook-ceph Labels: app=rook-ceph-rgw rook_cluster=rook-ceph rook_object_store=my-store Annotations:  Type: kubernetes.io/rook Data ==== AccessKey: 20 bytes SecretKey: 40 bytes The AccessKey and SecretKey data fields can be mounted in a pod as an environment variable. More information on consuming kubernetes secrets can be found in the K8s secret documentation To directly retrieve the secrets: 1 2 kubectl -n rook-ceph get secret rook-ceph-object-user-my-store-my-user -o jsonpath='{.data.AccessKey}' | base64 --decode kubectl -n rook-ceph get secret rook-ceph-object-user-my-store-my-user -o jsonpath='{.data.SecretKey}' | base64 --decode Object Multisite \u00b6 Multisite is a feature of Ceph that allows object stores to replicate its data over multiple Ceph clusters. Multisite also allows object stores to be independent and isolated from other object stores in a cluster. For more information on multisite please read the ceph multisite overview for how to run it.","title":"Object Storage Overview"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#prerequisites","text":"This guide assumes a Rook cluster as explained in the Quickstart .","title":"Prerequisites"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#configure-an-object-store","text":"Rook has the ability to either deploy an object store in Kubernetes or to connect to an external RGW service. Most commonly, the object store will be configured locally by Rook. Alternatively, if you have an existing Ceph cluster with Rados Gateways, see the external section to consume it from Rook.","title":"Configure an Object Store"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#create-a-local-object-store","text":"The below sample will create a CephObjectStore that starts the RGW service in the cluster with an S3 API. Note This sample requires at least 3 bluestore OSDs , with each OSD located on a different node . The OSDs must be located on different nodes, because the failureDomain is set to host and the erasureCoded chunk settings require at least 3 different OSDs (2 dataChunks + 1 codingChunks ). See the Object Store CRD , for more detail on the settings available for a CephObjectStore . 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : my-store namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPool : failureDomain : host erasureCoded : dataChunks : 2 codingChunks : 1 preservePoolsOnDelete : true gateway : sslCertificateRef : port : 80 # securePort: 443 instances : 1 After the CephObjectStore is created, the Rook operator will then create all the pools and other resources necessary to start the service. This may take a minute to complete. Create an object store: 1 kubectl create -f object.yaml To confirm the object store is configured, wait for the RGW pod(s) to start: 1 kubectl -n rook-ceph get pod -l app=rook-ceph-rgw","title":"Create a Local Object Store"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#connect-to-an-external-object-store","text":"Rook can connect to existing RGW gateways to work in conjunction with the external mode of the CephCluster CRD. First, create a rgw-admin-ops-user user in the Ceph cluster with the necessary caps: 1 radosgw-admin user create --uid=rgw-admin-ops-user --display-name=\"RGW Admin Ops User\" --caps=\"buckets=*;users=*;usage=read;metadata=read;zone=read\" --rgw-realm= --rgw-zonegroup= --rgw-zone= The rgw-admin-ops-user user is required by the Rook operator to manage buckets and users via the admin ops and s3 api. The multisite configuration needs to be specified only if the admin sets up multisite for RGW. Then create a secret with the user credentials: 1 kubectl -n rook-ceph create secret generic --type=\"kubernetes.io/rook\" rgw-admin-ops-user --from-literal=accessKey= --from-literal=secretKey= If you have an external CephCluster CR, you can instruct Rook to consume external gateways with the following: 1 2 3 4 5 6 7 8 9 10 11 apiVersion : ceph.rook.io/v1 kind : CephObjectStore metadata : name : external-store namespace : rook-ceph spec : gateway : port : 8080 externalRgwEndpoints : - ip : 192.168.39.182 # hostname: example.com Use the existing object-external.yaml file. Even though multiple endpoints can be specified, it is recommend to use only one endpoint. This endpoint is randomly added to configmap of OBC and secret of the cephobjectstoreuser . Rook never guarantees the randomly picked endpoint is a working one or not. If there are multiple endpoints, please add load balancer in front of them and use the load balancer endpoint in the externalRgwEndpoints list. When ready, the message in the cephobjectstore status similar to this one: 1 2 3 kubectl -n rook-ceph get cephobjectstore external-store NAME PHASE external-store Ready Any pod from your cluster can now access this endpoint: 1 2 $ curl 10 .100.28.138:8080 anonymous","title":"Connect to an External Object Store"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#create-a-bucket","text":"Info This document is a guide for creating bucket with an Object Bucket Claim (OBC). To create a bucket with the experimental COSI Driver, see the COSI documentation . Now that the object store is configured, next we need to create a bucket where a client can read and write objects. A bucket can be created by defining a storage class, similar to the pattern used by block and file storage. First, define the storage class that will allow object clients to create a bucket. The storage class defines the object storage system, the bucket retention policy, and other properties required by the administrator. Save the following as storageclass-bucket-delete.yaml (the example is named as such due to the Delete reclaim policy). 1 2 3 4 5 6 7 8 9 10 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-ceph-bucket # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.ceph.rook.io/bucket reclaimPolicy : Delete parameters : objectStoreName : my-store objectStoreNamespace : rook-ceph If you\u2019ve deployed the Rook operator in a namespace other than rook-ceph , change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in the namespace my-namespace the provisioner value should be my-namespace.ceph.rook.io/bucket . 1 kubectl create -f storageclass-bucket-delete.yaml Based on this storage class, an object client can now request a bucket by creating an Object Bucket Claim (OBC). When the OBC is created, the Rook bucket provisioner will create a new bucket. Notice that the OBC references the storage class that was created above. Save the following as object-bucket-claim-delete.yaml (the example is named as such due to the Delete reclaim policy): 1 2 3 4 5 6 7 apiVersion : objectbucket.io/v1alpha1 kind : ObjectBucketClaim metadata : name : ceph-bucket spec : generateBucketName : ceph-bkt storageClassName : rook-ceph-bucket 1 kubectl create -f object-bucket-claim-delete.yaml Now that the claim is created, the operator will create the bucket as well as generate other artifacts to enable access to the bucket. A secret and ConfigMap are created with the same name as the OBC and in the same namespace. The secret contains credentials used by the application pod to access the bucket. The ConfigMap contains bucket endpoint information and is also consumed by the pod. See the Object Bucket Claim Documentation for more details on the CephObjectBucketClaims .","title":"Create a Bucket"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#client-connections","text":"The following commands extract key pieces of information from the secret and configmap:\" 1 2 3 4 5 6 # config-map, secret, OBC will part of default if no specific name space mentioned export AWS_HOST=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_HOST}') export PORT=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_PORT}') export BUCKET_NAME=$(kubectl -n default get cm ceph-bucket -o jsonpath='{.data.BUCKET_NAME}') export AWS_ACCESS_KEY_ID=$(kubectl -n default get secret ceph-bucket -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 --decode) export AWS_SECRET_ACCESS_KEY=$(kubectl -n default get secret ceph-bucket -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 --decode)","title":"Client Connections"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#consume-the-object-storage","text":"Now that you have the object store configured and a bucket created, you can consume the object storage from an S3 client. This section will guide you through testing the connection to the CephObjectStore and uploading and downloading from it. Run the following commands after you have connected to the Rook toolbox .","title":"Consume the Object Storage"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#connection-environment-variables","text":"To simplify the s3 client commands, you will want to set the four environment variables for use by your client (ie. inside the toolbox). See above for retrieving the variables for a bucket created by an ObjectBucketClaim . 1 2 3 4 export AWS_HOST= export PORT= export AWS_ACCESS_KEY_ID= export AWS_SECRET_ACCESS_KEY= Host : The DNS host name where the rgw service is found in the cluster. Assuming you are using the default rook-ceph cluster, it will be rook-ceph-rgw-my-store.rook-ceph.svc . Port : The endpoint where the rgw service is listening. Run kubectl -n rook-ceph get svc rook-ceph-rgw-my-store , to get the port. Access key : The user's access_key as printed above Secret key : The user's secret_key as printed above The variables for the user generated in this example might be: 1 2 3 4 export AWS_HOST=rook-ceph-rgw-my-store.rook-ceph.svc export PORT=80 export AWS_ACCESS_KEY_ID=XEZDB3UJ6X7HVBE7X7MA export AWS_SECRET_ACCESS_KEY=7yGIZON7EhFORz0I40BFniML36D2rl8CQQ5kXU6l The access key and secret key can be retrieved as described in the section above on client connections or below in the section creating a user if you are not creating the buckets with an ObjectBucketClaim .","title":"Connection Environment Variables"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#configure-s5cmd","text":"To test the CephObjectStore , set the object store credentials in the toolbox pod that contains the s5cmd tool. Important The default toolbox.yaml does not contain the s5cmd. The toolbox must be started with the rook operator image (toolbox-operator-image), which does contain s5cmd. 1 2 3 4 5 6 7 kubectl create -f deploy/examples/toolbox-operator-image.yaml mkdir ~/.aws cat > ~/.aws/credentials << EOF [default] aws_access_key_id = ${AWS_ACCESS_KEY_ID} aws_secret_access_key = ${AWS_SECRET_ACCESS_KEY} EOF","title":"Configure s5cmd"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#put-or-get-an-object","text":"Upload a file to the newly created bucket 1 2 echo \"Hello Rook\" > /tmp/rookObj s5cmd --endpoint-url http://$AWS_HOST:$PORT cp /tmp/rookObj s3://$BUCKET_NAME Download and verify the file from the bucket 1 2 s5cmd --endpoint-url http://$AWS_HOST:$PORT cp s3://$BUCKET_NAME/rookObj /tmp/rookObj-download cat /tmp/rookObj-download","title":"PUT or GET an object"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#monitoring-health","text":"Rook configures health probes on the deployment created for CephObjectStore gateways. Refer to the CRD document for information about configuring the probes and monitoring the deployment status.","title":"Monitoring health"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#access-external-to-the-cluster","text":"Rook sets up the object storage so pods will have access internal to the cluster. If your applications are running outside the cluster, you will need to setup an external service through a NodePort . First, note the service that exposes RGW internal to the cluster. We will leave this service intact and create a new service for external access. 1 2 3 $ kubectl -n rook-ceph get service rook-ceph-rgw-my-store NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-rgw-my-store 10.3.0.177  80/TCP 2m Save the external service as rgw-external.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 apiVersion : v1 kind : Service metadata : name : rook-ceph-rgw-my-store-external namespace : rook-ceph labels : app : rook-ceph-rgw rook_cluster : rook-ceph rook_object_store : my-store spec : ports : - name : rgw port : 80 protocol : TCP targetPort : 80 selector : app : rook-ceph-rgw rook_cluster : rook-ceph rook_object_store : my-store sessionAffinity : None type : NodePort Now create the external service. 1 kubectl create -f rgw-external.yaml See both rgw services running and notice what port the external service is running on: 1 2 3 4 $ kubectl -n rook-ceph get service rook-ceph-rgw-my-store rook-ceph-rgw-my-store-external NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-rgw-my-store ClusterIP 10.104.82.228  80/TCP 4m rook-ceph-rgw-my-store-external NodePort 10.111.113.237  80:31536/TCP 39s Internally the rgw service is running on port 80 . The external port in this case is 31536 . Now you can access the CephObjectStore from anywhere! All you need is the hostname for any machine in the cluster, the external port, and the user credentials.","title":"Access External to the Cluster"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#create-a-user","text":"If you need to create an independent set of user credentials to access the S3 endpoint, create a CephObjectStoreUser . The user will be used to connect to the RGW service in the cluster using the S3 API. The user will be independent of any object bucket claims that you might have created in the earlier instructions in this document. See the Object Store User CRD for more detail on the settings available for a CephObjectStoreUser . 1 2 3 4 5 6 7 8 apiVersion : ceph.rook.io/v1 kind : CephObjectStoreUser metadata : name : my-user namespace : rook-ceph spec : store : my-store displayName : \"my display name\" When the CephObjectStoreUser is created, the Rook operator will then create the RGW user on the specified CephObjectStore and store the Access Key and Secret Key in a kubernetes secret in the same namespace as the CephObjectStoreUser . 1 2 # Create the object store user kubectl create -f object-user.yaml 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 # To confirm the object store user is configured, describe the secret $ kubectl -n rook-ceph describe secret rook-ceph-object-user-my-store-my-user Name: rook-ceph-object-user-my-store-my-user Namespace: rook-ceph Labels: app=rook-ceph-rgw rook_cluster=rook-ceph rook_object_store=my-store Annotations:  Type: kubernetes.io/rook Data ==== AccessKey: 20 bytes SecretKey: 40 bytes The AccessKey and SecretKey data fields can be mounted in a pod as an environment variable. More information on consuming kubernetes secrets can be found in the K8s secret documentation To directly retrieve the secrets: 1 2 kubectl -n rook-ceph get secret rook-ceph-object-user-my-store-my-user -o jsonpath='{.data.AccessKey}' | base64 --decode kubectl -n rook-ceph get secret rook-ceph-object-user-my-store-my-user -o jsonpath='{.data.SecretKey}' | base64 --decode","title":"Create a User"},{"location":"Storage-Configuration/Object-Storage-RGW/object-storage/#object-multisite","text":"Multisite is a feature of Ceph that allows object stores to replicate its data over multiple Ceph clusters. Multisite also allows object stores to be independent and isolated from other object stores in a cluster. For more information on multisite please read the ceph multisite overview for how to run it.","title":"Object Multisite"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/","text":"Ceph filesystem mirroring is a process of asynchronous replication of snapshots to a remote CephFS file system. Snapshots are synchronized by mirroring snapshot data followed by creating a snapshot with the same name (for a given directory on the remote file system) as the snapshot being synchronized. It is generally useful when planning for Disaster Recovery. Mirroring is for clusters that are geographically distributed and stretching a single cluster is not possible due to high latencies. Prerequisites \u00b6 This guide assumes you have created a Rook cluster as explained in the main quickstart guide Create the Filesystem with Mirroring enabled \u00b6 The following will enable mirroring on the filesystem: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPools : - name : replicated failureDomain : host replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true mirroring : enabled : true # list of Kubernetes Secrets containing the peer token # for more details see: https://docs.ceph.com/en/latest/dev/cephfs-mirroring/#bootstrap-peers # Add the secret name if it already exists else specify the empty list here. peers : secretNames : #- secondary-cluster-peer # specify the schedule(s) on which snapshots should be taken # see the official syntax here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-schedules snapshotSchedules : - path : / interval : 24h # daily snapshots # The startTime should be mentioned in the format YYYY-MM-DDTHH:MM:SS # If startTime is not specified, then by default the start time is considered as midnight UTC. # see usage here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#usage # startTime: 2022-07-15T11:55:00 # manage retention policies # see syntax duration here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-retention-policies snapshotRetention : - path : / duration : \"h 24\" Create the cephfs-mirror daemon \u00b6 Launch the rook-ceph-fs-mirror pod on the source storage cluster, which deploys the cephfs-mirror daemon in the cluster: 1 kubectl create -f deploy/examples/filesystem-mirror.yaml Please refer to Filesystem Mirror CRD for more information. Configuring mirroring peers \u00b6 Once mirroring is enabled, Rook will by default create its own bootstrap peer token so that it can be used by another cluster. The bootstrap peer token can be found in a Kubernetes Secret. The name of the Secret is present in the Status field of the CephFilesystem CR: 1 2 3 status : info : fsMirrorBootstrapPeerSecretName : fs-peer-token-myfs This secret can then be fetched like so: 1 2 # kubectl get secret -n rook-ceph fs-peer-token-myfs -o jsonpath = '{.data.token}' | base64 -d eyJmc2lkIjoiOTFlYWUwZGQtMDZiMS00ZDJjLTkxZjMtMTMxMWM5ZGYzODJiIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFEN1psOWZ3V1VGRHhBQWdmY0gyZi8xeUhYeGZDUTU5L1N0NEE9PSIsIm1vbl9ob3N0IjoiW3YyOjEwLjEwMS4xOC4yMjM6MzMwMCx2MToxMC4xMDEuMTguMjIzOjY3ODldIn0= Import the token in the Destination cluster \u00b6 The decoded secret must be saved in a file before importing. 1 # ceph fs snapshot mirror peer_bootstrap import   See the CephFS mirror documentation on how to add a bootstrap peer . Further refer to CephFS mirror documentation to configure a directory for snapshot mirroring . Verify that the snapshots have synced \u00b6 To check the mirror daemon status , please run the following command from the toolbox pod. For example : 1 # ceph fs snapshot mirror daemon status | jq 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 [ { \"daemon_id\" : 906790 , \"filesystems\" : [ { \"filesystem_id\" : 1 , \"name\" : \"myfs\" , \"directory_count\" : 1 , \"peers\" : [ { \"uuid\" : \"a24a3366-8130-4d55-aada-95fa9d3ff94d\" , \"remote\" : { \"client_name\" : \"client.mirror\" , \"cluster_name\" : \"91046889-a6aa-4f74-9fb0-f7bb111666b4\" , \"fs_name\" : \"myfs\" }, \"stats\" : { \"failure_count\" : 0 , \"recovery_count\" : 0 } } ] } ] } ] Please refer to the --admin-daemon socket commands from the CephFS mirror documentation to verify mirror status and peer synchronization status and run the commands from the rook-ceph-fs-mirror pod: 1 # kubectl -n rook-ceph exec -it deploy/rook-ceph-fs-mirror -- bash Fetch the ceph-client.fs-mirror daemon admin socket file from the /var/run/ceph directory: 1 # ls -lhsa /var/run/ceph/ 1 # ceph --admin-daemon /var/run/ceph/ceph-client.fs-mirror.1.93989418120648.asok fs mirror status myfs@1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 { \"rados_inst\" : \"X.X.X.X:0/2286593433\" , \"peers\" : { \"a24a3366-8130-4d55-aada-95fa9d3ff94d\" : { \"remote\" : { \"client_name\" : \"client.mirror\" , \"cluster_name\" : \"91046889-a6aa-4f74-9fb0-f7bb111666b4\" , \"fs_name\" : \"myfs\" } } }, \"snap_dirs\" : { \"dir_count\" : 1 } } For getting peer synchronization status : 1 # ceph --admin-daemon /var/run/ceph/ceph-client.fs-mirror.1.93989418120648.asok fs mirror peer status myfs@1 a24a3366-8130-4d55-aada-95fa9d3ff94d 1 2 3 4 5 6 7 8 9 10 11 12 { \"/volumes/_nogroup/subvol-1\" : { \"state\" : \"idle\" , \"last_synced_snap\" : { \"id\" : 4 , \"name\" : \"snap2\" }, \"snaps_synced\" : 0 , \"snaps_deleted\" : 0 , \"snaps_renamed\" : 0 } }","title":"Filesystem Mirroring"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#prerequisites","text":"This guide assumes you have created a Rook cluster as explained in the main quickstart guide","title":"Prerequisites"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#create-the-filesystem-with-mirroring-enabled","text":"The following will enable mirroring on the filesystem: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : failureDomain : host replicated : size : 3 dataPools : - name : replicated failureDomain : host replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true mirroring : enabled : true # list of Kubernetes Secrets containing the peer token # for more details see: https://docs.ceph.com/en/latest/dev/cephfs-mirroring/#bootstrap-peers # Add the secret name if it already exists else specify the empty list here. peers : secretNames : #- secondary-cluster-peer # specify the schedule(s) on which snapshots should be taken # see the official syntax here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-schedules snapshotSchedules : - path : / interval : 24h # daily snapshots # The startTime should be mentioned in the format YYYY-MM-DDTHH:MM:SS # If startTime is not specified, then by default the start time is considered as midnight UTC. # see usage here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#usage # startTime: 2022-07-15T11:55:00 # manage retention policies # see syntax duration here https://docs.ceph.com/en/latest/cephfs/snap-schedule/#add-and-remove-retention-policies snapshotRetention : - path : / duration : \"h 24\"","title":"Create the Filesystem with Mirroring enabled"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#create-the-cephfs-mirror-daemon","text":"Launch the rook-ceph-fs-mirror pod on the source storage cluster, which deploys the cephfs-mirror daemon in the cluster: 1 kubectl create -f deploy/examples/filesystem-mirror.yaml Please refer to Filesystem Mirror CRD for more information.","title":"Create the cephfs-mirror daemon"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#configuring-mirroring-peers","text":"Once mirroring is enabled, Rook will by default create its own bootstrap peer token so that it can be used by another cluster. The bootstrap peer token can be found in a Kubernetes Secret. The name of the Secret is present in the Status field of the CephFilesystem CR: 1 2 3 status : info : fsMirrorBootstrapPeerSecretName : fs-peer-token-myfs This secret can then be fetched like so: 1 2 # kubectl get secret -n rook-ceph fs-peer-token-myfs -o jsonpath = '{.data.token}' | base64 -d eyJmc2lkIjoiOTFlYWUwZGQtMDZiMS00ZDJjLTkxZjMtMTMxMWM5ZGYzODJiIiwiY2xpZW50X2lkIjoicmJkLW1pcnJvci1wZWVyIiwia2V5IjoiQVFEN1psOWZ3V1VGRHhBQWdmY0gyZi8xeUhYeGZDUTU5L1N0NEE9PSIsIm1vbl9ob3N0IjoiW3YyOjEwLjEwMS4xOC4yMjM6MzMwMCx2MToxMC4xMDEuMTguMjIzOjY3ODldIn0=","title":"Configuring mirroring peers"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#import-the-token-in-the-destination-cluster","text":"The decoded secret must be saved in a file before importing. 1 # ceph fs snapshot mirror peer_bootstrap import   See the CephFS mirror documentation on how to add a bootstrap peer . Further refer to CephFS mirror documentation to configure a directory for snapshot mirroring .","title":"Import the token in the Destination cluster"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-mirroring/#verify-that-the-snapshots-have-synced","text":"To check the mirror daemon status , please run the following command from the toolbox pod. For example : 1 # ceph fs snapshot mirror daemon status | jq 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 [ { \"daemon_id\" : 906790 , \"filesystems\" : [ { \"filesystem_id\" : 1 , \"name\" : \"myfs\" , \"directory_count\" : 1 , \"peers\" : [ { \"uuid\" : \"a24a3366-8130-4d55-aada-95fa9d3ff94d\" , \"remote\" : { \"client_name\" : \"client.mirror\" , \"cluster_name\" : \"91046889-a6aa-4f74-9fb0-f7bb111666b4\" , \"fs_name\" : \"myfs\" }, \"stats\" : { \"failure_count\" : 0 , \"recovery_count\" : 0 } } ] } ] } ] Please refer to the --admin-daemon socket commands from the CephFS mirror documentation to verify mirror status and peer synchronization status and run the commands from the rook-ceph-fs-mirror pod: 1 # kubectl -n rook-ceph exec -it deploy/rook-ceph-fs-mirror -- bash Fetch the ceph-client.fs-mirror daemon admin socket file from the /var/run/ceph directory: 1 # ls -lhsa /var/run/ceph/ 1 # ceph --admin-daemon /var/run/ceph/ceph-client.fs-mirror.1.93989418120648.asok fs mirror status myfs@1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 { \"rados_inst\" : \"X.X.X.X:0/2286593433\" , \"peers\" : { \"a24a3366-8130-4d55-aada-95fa9d3ff94d\" : { \"remote\" : { \"client_name\" : \"client.mirror\" , \"cluster_name\" : \"91046889-a6aa-4f74-9fb0-f7bb111666b4\" , \"fs_name\" : \"myfs\" } } }, \"snap_dirs\" : { \"dir_count\" : 1 } } For getting peer synchronization status : 1 # ceph --admin-daemon /var/run/ceph/ceph-client.fs-mirror.1.93989418120648.asok fs mirror peer status myfs@1 a24a3366-8130-4d55-aada-95fa9d3ff94d 1 2 3 4 5 6 7 8 9 10 11 12 { \"/volumes/_nogroup/subvol-1\" : { \"state\" : \"idle\" , \"last_synced_snap\" : { \"id\" : 4 , \"name\" : \"snap2\" }, \"snaps_synced\" : 0 , \"snaps_deleted\" : 0 , \"snaps_renamed\" : 0 } }","title":"Verify that the snapshots have synced"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/","text":"A filesystem storage (also named shared filesystem) can be mounted with read/write permission from multiple pods. This may be useful for applications which can be clustered using a shared filesystem. This example runs a shared filesystem for the kube-registry . Prerequisites \u00b6 This guide assumes you have created a Rook cluster as explained in the main quickstart guide Multiple Filesystems Support \u00b6 Multiple filesystems are supported as of the Ceph Pacific release. Create the Filesystem \u00b6 Create the filesystem by specifying the desired settings for the metadata pool, data pools, and metadata server in the CephFilesystem CRD. In this example we create the metadata pool with replication of three and a single data pool with replication of three. For more options, see the documentation on creating shared filesystems . Save this shared filesystem definition as filesystem.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : replicated : size : 3 dataPools : - name : replicated replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true The Rook operator will create all the pools and other resources necessary to start the service. This may take a minute to complete. 1 2 3 # Create the filesystem kubectl create -f filesystem.yaml [...] To confirm the filesystem is configured, wait for the mds pods to start: 1 2 3 4 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mds NAME READY STATUS RESTARTS AGE rook-ceph-mds-myfs-7d59fdfcf4-h8kw9 1/1 Running 0 12s rook-ceph-mds-myfs-7d59fdfcf4-kgkjp 1/1 Running 0 12s To see detailed status of the filesystem, start and connect to the Rook toolbox . A new line will be shown with ceph status for the mds service. In this example, there is one active instance of MDS which is up, with one MDS instance in standby-replay mode in case of failover. 1 2 3 4 $ ceph status [...] services: mds: myfs-1/1/1 up {[myfs:0]=mzw58b=up:active}, 1 up:standby-replay Provision Storage \u00b6 Before Rook can start provisioning storage, a StorageClass needs to be created based on the filesystem. This is needed for Kubernetes to interoperate with the CSI driver to create persistent volumes. Save this storage class definition as storageclass.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-cephfs # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.cephfs.csi.ceph.com parameters : # clusterID is the namespace where the rook cluster is running # If you change this namespace, also change the namespace below where the secret namespaces are defined clusterID : rook-ceph # CephFS filesystem name into which the volume shall be created fsName : myfs # Ceph pool into which the volume shall be created # Required for provisionVolume: \"true\" pool : myfs-replicated # The secrets contain Ceph admin credentials. These are generated automatically by the operator # in the same namespace as the cluster. csi.storage.k8s.io/provisioner-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-cephfs-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph reclaimPolicy : Delete If you've deployed the Rook operator in a namespace other than \"rook-ceph\" as is common change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in \"rook-op\" the provisioner value should be \"rook-op.rbd.csi.ceph.com\". Create the storage class. 1 kubectl create -f deploy/examples/csi/cephfs/storageclass.yaml Quotas \u00b6 Attention The CephFS CSI driver uses quotas to enforce the PVC size requested. Only newer kernels support CephFS quotas (kernel version of at least 4.17). If you require quotas to be enforced and the kernel driver does not support it, you can disable the kernel driver and use the FUSE client. This can be done by setting CSI_FORCE_CEPHFS_KERNEL_CLIENT: false in the operator deployment ( operator.yaml ). However, it is important to know that when the FUSE client is enabled, there is an issue that during upgrade the application pods will be disconnected from the mount and will need to be restarted. See the upgrade guide for more details. Consume the Shared Filesystem: K8s Registry Sample \u00b6 As an example, we will start the kube-registry pod with the shared filesystem as the backing store. Save the following spec as kube-registry.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 apiVersion : v1 kind : PersistentVolumeClaim metadata : name : cephfs-pvc namespace : kube-system spec : accessModes : - ReadWriteMany resources : requests : storage : 1Gi storageClassName : rook-cephfs --- apiVersion : apps/v1 kind : Deployment metadata : name : kube-registry namespace : kube-system labels : k8s-app : kube-registry kubernetes.io/cluster-service : \"true\" spec : replicas : 3 selector : matchLabels : k8s-app : kube-registry template : metadata : labels : k8s-app : kube-registry kubernetes.io/cluster-service : \"true\" spec : containers : - name : registry image : registry:2 imagePullPolicy : Always resources : limits : cpu : 100m memory : 100Mi env : # Configuration reference: https://docs.docker.com/registry/configuration/ - name : REGISTRY_HTTP_ADDR value : :5000 - name : REGISTRY_HTTP_SECRET value : \"Ple4seCh4ngeThisN0tAVerySecretV4lue\" - name : REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY value : /var/lib/registry volumeMounts : - name : image-store mountPath : /var/lib/registry ports : - containerPort : 5000 name : registry protocol : TCP livenessProbe : httpGet : path : / port : registry readinessProbe : httpGet : path : / port : registry volumes : - name : image-store persistentVolumeClaim : claimName : cephfs-pvc readOnly : false Create the Kube registry deployment: 1 kubectl create -f deploy/examples/csi/cephfs/kube-registry.yaml You now have a docker registry which is HA with persistent storage. Kernel Version Requirement \u00b6 If the Rook cluster has more than one filesystem and the application pod is scheduled to a node with kernel version older than 4.7, inconsistent results may arise since kernels older than 4.7 do not support specifying filesystem namespaces. Consume the Shared Filesystem: Toolbox \u00b6 Once you have pushed an image to the registry (see the instructions to expose and use the kube-registry), verify that kube-registry is using the filesystem that was configured above by mounting the shared filesystem in the toolbox pod. See the Direct Filesystem topic for more details. Consume the Shared Filesystem across namespaces \u00b6 A PVC that you create using the rook-cephfs storageClass can be shared between different Pods simultaneously, either read-write or read-only, but is restricted to a single namespace (a PVC is a namespace-scoped resource, so you cannot use it in another one). However there are some use cases where you want to share the content from a CephFS-based PVC among different Pods in different namespaces, for a shared library for example, or a collaboration workspace between applications running in different namespaces. You can do that using the following recipe. Shared volume creation \u00b6 In the rook namespace, create a copy of the secret rook-csi-cephfs-node , name it rook-csi-cephfs-node-user . Edit your new secret, changing the name of the keys (keep the value as it is): adminID -> userID adminKey -> userKey Create the PVC you want to share, for example: 1 2 3 4 5 6 7 8 9 10 11 12 13 kind : PersistentVolumeClaim apiVersion : v1 metadata : name : base-pvc namespace : first-namespace spec : accessModes : - ReadWriteMany resources : requests : storage : 100Gi storageClassName : rook-cephfs volumeMode : Filesystem The corresponding PV that is created will have all the necessary info to connect to the CephFS volume (all non-necessary information are removed here): 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 kind : PersistentVolume apiVersion : v1 metadata : name : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22 annotations : pv.kubernetes.io/provisioned-by : rook.cephfs.csi.ceph.com finalizers : - kubernetes.io/pv-protection spec : capacity : storage : 100Gi csi : driver : rook.cephfs.csi.ceph.com volumeHandle : >- 0001-0011-rook-0000000000000001-8a528de0-e274-11ec-b069-0a580a800213 volumeAttributes : clusterID : rook fsName : rook-cephfilesystem storage.kubernetes.io/csiProvisionerIdentity : 1654174264855-8081-rook.cephfs.csi.ceph.com subvolumeName : csi-vol-8a528de0-e274-11ec-b069-0a580a800213 subvolumePath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec nodeStageSecretRef : name : rook-csi-cephfs-node namespace : rook controllerExpandSecretRef : name : rook-csi-cephfs-provisioner namespace : rook accessModes : - ReadWriteMany claimRef : kind : PersistentVolumeClaim namespace : first-namespace name : base-pvc apiVersion : v1 resourceVersion : '49728' persistentVolumeReclaimPolicy : Retain storageClassName : rook-cephfs volumeMode : Filesystem On this PV, change the persistentVolumeReclaimPolicy parameter to Retain to avoid it from being deleted when you will delete PVCs. Don't forget to change it back to Delete when you want to remove the shared volume (see full procedure in the next section). Copy the YAML content of the PV, and create a new static PV with the same information and some modifications. From the original YAML, you must: Modify the original name. To keep track, the best solution is to append to the original name the namespace name where you want your new PV. In this example newnamespace . Modify the volumeHandle. Again append the targeted namespace. Add the staticVolume: \"true\" entry to the volumeAttributes. Add the rootPath entry to the volumeAttributes, with the same content as subvolumePath . In the nodeStageSecretRef section, change the name to point to the secret you created earlier, rook-csi-cephfs-node-user . Remove the unnecessary information before applying the YAML (claimRef, managedFields,...): Your YAML should look like this: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 kind : PersistentVolume apiVersion : v1 metadata : name : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22-newnamespace spec : capacity : storage : 100Gi csi : driver : rook.cephfs.csi.ceph.com volumeHandle : >- 0001-0011-rook-0000000000000001-8a528de0-e274-11ec-b069-0a580a800213-newnamespace volumeAttributes : clusterID : rook fsName : rook-cephfilesystem storage.kubernetes.io/csiProvisionerIdentity : 1654174264855-8081-rook.cephfs.csi.ceph.com subvolumeName : csi-vol-8a528de0-e274-11ec-b069-0a580a800213 subvolumePath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec rootPath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec staticVolume : \"true\" nodeStageSecretRef : name : rook-csi-cephfs-node namespace : rook accessModes : - ReadWriteMany persistentVolumeReclaimPolicy : Retain storageClassName : rook-cephfs volumeMode : Filesystem In a new or other namespace, create a new PVC that will use this new PV you created. You simply have to point to it in the volumeName parameter. Make sure you enter the same size as the original PVC!: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 kind : PersistentVolumeClaim apiVersion : v1 metadata : name : second-pvc namespace : newnamespace finalizers : - kubernetes.io/pvc-protection spec : accessModes : - ReadWriteMany resources : requests : storage : 100Gi volumeName : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22-newnamespace storageClassName : rook-cephfs volumeMode : Filesystem You have now access to the same CephFS subvolume from different PVCs in different namespaces. Redo the previous steps (copy PV with a new name, create a PVC pointing to it) in each namespace you want to use this subvolume. Note : the new PVCs/PVs we have created are static. Therefore CephCSI does not support snapshots, clones, resizing or delete operations for them. If those operations are required, you must make them on the original PVC. Shared volume removal \u00b6 As the same CephFS volume is used by different PVCs/PVs, you must proceed very orderly to remove it properly. Delete the static PVCs in the different namespaces, but keep the original one! Delete the corresponding static PVs that should now have been marked as \"Released\". Again, don't delete the original one yet! Edit the original PV, changing back the persistentVolumeReclaimPolicy from Retain to Delete . Delete the original PVC. It will now properly delete the original PV, as well as the subvolume in CephFS. Pending Issue \u00b6 Due to this bug , the global mount for a Volume that is mounted multiple times on the same node will not be unmounted. This does not result in any particular problem, apart from polluting the logs with unmount error messages, or having many different mounts hanging if you create and delete many shared PVCs, or you don't really use them. Until this issue is solved, either on the Rook or Kubelet side, you can always manually unmount the unwanted hanging global mounts on the nodes: Log onto each node where the volume has been mounted. Check for hanging mounts using their volumeHandle . Unmount the unwanted volumes. Teardown \u00b6 To clean up all the artifacts created by the filesystem demo: 1 kubectl delete -f kube-registry.yaml To delete the filesystem components and backing data, delete the Filesystem CRD. Warning Data will be deleted if preserveFilesystemOnDelete=false**. 1 kubectl -n rook-ceph delete cephfilesystem myfs Note: If the \"preserveFilesystemOnDelete\" filesystem attribute is set to true, the above command won't delete the filesystem. Recreating the same CRD will reuse the existing filesystem. Advanced Example: Erasure Coded Filesystem \u00b6 The Ceph filesystem example can be found here: Ceph Shared Filesystem - Samples - Erasure Coded .","title":"Filesystem Storage Overview"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#prerequisites","text":"This guide assumes you have created a Rook cluster as explained in the main quickstart guide","title":"Prerequisites"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#multiple-filesystems-support","text":"Multiple filesystems are supported as of the Ceph Pacific release.","title":"Multiple Filesystems Support"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#create-the-filesystem","text":"Create the filesystem by specifying the desired settings for the metadata pool, data pools, and metadata server in the CephFilesystem CRD. In this example we create the metadata pool with replication of three and a single data pool with replication of three. For more options, see the documentation on creating shared filesystems . Save this shared filesystem definition as filesystem.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 apiVersion : ceph.rook.io/v1 kind : CephFilesystem metadata : name : myfs namespace : rook-ceph spec : metadataPool : replicated : size : 3 dataPools : - name : replicated replicated : size : 3 preserveFilesystemOnDelete : true metadataServer : activeCount : 1 activeStandby : true The Rook operator will create all the pools and other resources necessary to start the service. This may take a minute to complete. 1 2 3 # Create the filesystem kubectl create -f filesystem.yaml [...] To confirm the filesystem is configured, wait for the mds pods to start: 1 2 3 4 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mds NAME READY STATUS RESTARTS AGE rook-ceph-mds-myfs-7d59fdfcf4-h8kw9 1/1 Running 0 12s rook-ceph-mds-myfs-7d59fdfcf4-kgkjp 1/1 Running 0 12s To see detailed status of the filesystem, start and connect to the Rook toolbox . A new line will be shown with ceph status for the mds service. In this example, there is one active instance of MDS which is up, with one MDS instance in standby-replay mode in case of failover. 1 2 3 4 $ ceph status [...] services: mds: myfs-1/1/1 up {[myfs:0]=mzw58b=up:active}, 1 up:standby-replay","title":"Create the Filesystem"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#provision-storage","text":"Before Rook can start provisioning storage, a StorageClass needs to be created based on the filesystem. This is needed for Kubernetes to interoperate with the CSI driver to create persistent volumes. Save this storage class definition as storageclass.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 apiVersion : storage.k8s.io/v1 kind : StorageClass metadata : name : rook-cephfs # Change \"rook-ceph\" provisioner prefix to match the operator namespace if needed provisioner : rook-ceph.cephfs.csi.ceph.com parameters : # clusterID is the namespace where the rook cluster is running # If you change this namespace, also change the namespace below where the secret namespaces are defined clusterID : rook-ceph # CephFS filesystem name into which the volume shall be created fsName : myfs # Ceph pool into which the volume shall be created # Required for provisionVolume: \"true\" pool : myfs-replicated # The secrets contain Ceph admin credentials. These are generated automatically by the operator # in the same namespace as the cluster. csi.storage.k8s.io/provisioner-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/provisioner-secret-namespace : rook-ceph csi.storage.k8s.io/controller-expand-secret-name : rook-csi-cephfs-provisioner csi.storage.k8s.io/controller-expand-secret-namespace : rook-ceph csi.storage.k8s.io/node-stage-secret-name : rook-csi-cephfs-node csi.storage.k8s.io/node-stage-secret-namespace : rook-ceph reclaimPolicy : Delete If you've deployed the Rook operator in a namespace other than \"rook-ceph\" as is common change the prefix in the provisioner to match the namespace you used. For example, if the Rook operator is running in \"rook-op\" the provisioner value should be \"rook-op.rbd.csi.ceph.com\". Create the storage class. 1 kubectl create -f deploy/examples/csi/cephfs/storageclass.yaml","title":"Provision Storage"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#quotas","text":"Attention The CephFS CSI driver uses quotas to enforce the PVC size requested. Only newer kernels support CephFS quotas (kernel version of at least 4.17). If you require quotas to be enforced and the kernel driver does not support it, you can disable the kernel driver and use the FUSE client. This can be done by setting CSI_FORCE_CEPHFS_KERNEL_CLIENT: false in the operator deployment ( operator.yaml ). However, it is important to know that when the FUSE client is enabled, there is an issue that during upgrade the application pods will be disconnected from the mount and will need to be restarted. See the upgrade guide for more details.","title":"Quotas"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#consume-the-shared-filesystem-k8s-registry-sample","text":"As an example, we will start the kube-registry pod with the shared filesystem as the backing store. Save the following spec as kube-registry.yaml : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 apiVersion : v1 kind : PersistentVolumeClaim metadata : name : cephfs-pvc namespace : kube-system spec : accessModes : - ReadWriteMany resources : requests : storage : 1Gi storageClassName : rook-cephfs --- apiVersion : apps/v1 kind : Deployment metadata : name : kube-registry namespace : kube-system labels : k8s-app : kube-registry kubernetes.io/cluster-service : \"true\" spec : replicas : 3 selector : matchLabels : k8s-app : kube-registry template : metadata : labels : k8s-app : kube-registry kubernetes.io/cluster-service : \"true\" spec : containers : - name : registry image : registry:2 imagePullPolicy : Always resources : limits : cpu : 100m memory : 100Mi env : # Configuration reference: https://docs.docker.com/registry/configuration/ - name : REGISTRY_HTTP_ADDR value : :5000 - name : REGISTRY_HTTP_SECRET value : \"Ple4seCh4ngeThisN0tAVerySecretV4lue\" - name : REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY value : /var/lib/registry volumeMounts : - name : image-store mountPath : /var/lib/registry ports : - containerPort : 5000 name : registry protocol : TCP livenessProbe : httpGet : path : / port : registry readinessProbe : httpGet : path : / port : registry volumes : - name : image-store persistentVolumeClaim : claimName : cephfs-pvc readOnly : false Create the Kube registry deployment: 1 kubectl create -f deploy/examples/csi/cephfs/kube-registry.yaml You now have a docker registry which is HA with persistent storage.","title":"Consume the Shared Filesystem: K8s Registry Sample"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#kernel-version-requirement","text":"If the Rook cluster has more than one filesystem and the application pod is scheduled to a node with kernel version older than 4.7, inconsistent results may arise since kernels older than 4.7 do not support specifying filesystem namespaces.","title":"Kernel Version Requirement"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#consume-the-shared-filesystem-toolbox","text":"Once you have pushed an image to the registry (see the instructions to expose and use the kube-registry), verify that kube-registry is using the filesystem that was configured above by mounting the shared filesystem in the toolbox pod. See the Direct Filesystem topic for more details.","title":"Consume the Shared Filesystem: Toolbox"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#consume-the-shared-filesystem-across-namespaces","text":"A PVC that you create using the rook-cephfs storageClass can be shared between different Pods simultaneously, either read-write or read-only, but is restricted to a single namespace (a PVC is a namespace-scoped resource, so you cannot use it in another one). However there are some use cases where you want to share the content from a CephFS-based PVC among different Pods in different namespaces, for a shared library for example, or a collaboration workspace between applications running in different namespaces. You can do that using the following recipe.","title":"Consume the Shared Filesystem across namespaces"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#shared-volume-creation","text":"In the rook namespace, create a copy of the secret rook-csi-cephfs-node , name it rook-csi-cephfs-node-user . Edit your new secret, changing the name of the keys (keep the value as it is): adminID -> userID adminKey -> userKey Create the PVC you want to share, for example: 1 2 3 4 5 6 7 8 9 10 11 12 13 kind : PersistentVolumeClaim apiVersion : v1 metadata : name : base-pvc namespace : first-namespace spec : accessModes : - ReadWriteMany resources : requests : storage : 100Gi storageClassName : rook-cephfs volumeMode : Filesystem The corresponding PV that is created will have all the necessary info to connect to the CephFS volume (all non-necessary information are removed here): 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 kind : PersistentVolume apiVersion : v1 metadata : name : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22 annotations : pv.kubernetes.io/provisioned-by : rook.cephfs.csi.ceph.com finalizers : - kubernetes.io/pv-protection spec : capacity : storage : 100Gi csi : driver : rook.cephfs.csi.ceph.com volumeHandle : >- 0001-0011-rook-0000000000000001-8a528de0-e274-11ec-b069-0a580a800213 volumeAttributes : clusterID : rook fsName : rook-cephfilesystem storage.kubernetes.io/csiProvisionerIdentity : 1654174264855-8081-rook.cephfs.csi.ceph.com subvolumeName : csi-vol-8a528de0-e274-11ec-b069-0a580a800213 subvolumePath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec nodeStageSecretRef : name : rook-csi-cephfs-node namespace : rook controllerExpandSecretRef : name : rook-csi-cephfs-provisioner namespace : rook accessModes : - ReadWriteMany claimRef : kind : PersistentVolumeClaim namespace : first-namespace name : base-pvc apiVersion : v1 resourceVersion : '49728' persistentVolumeReclaimPolicy : Retain storageClassName : rook-cephfs volumeMode : Filesystem On this PV, change the persistentVolumeReclaimPolicy parameter to Retain to avoid it from being deleted when you will delete PVCs. Don't forget to change it back to Delete when you want to remove the shared volume (see full procedure in the next section). Copy the YAML content of the PV, and create a new static PV with the same information and some modifications. From the original YAML, you must: Modify the original name. To keep track, the best solution is to append to the original name the namespace name where you want your new PV. In this example newnamespace . Modify the volumeHandle. Again append the targeted namespace. Add the staticVolume: \"true\" entry to the volumeAttributes. Add the rootPath entry to the volumeAttributes, with the same content as subvolumePath . In the nodeStageSecretRef section, change the name to point to the secret you created earlier, rook-csi-cephfs-node-user . Remove the unnecessary information before applying the YAML (claimRef, managedFields,...): Your YAML should look like this: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 kind : PersistentVolume apiVersion : v1 metadata : name : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22-newnamespace spec : capacity : storage : 100Gi csi : driver : rook.cephfs.csi.ceph.com volumeHandle : >- 0001-0011-rook-0000000000000001-8a528de0-e274-11ec-b069-0a580a800213-newnamespace volumeAttributes : clusterID : rook fsName : rook-cephfilesystem storage.kubernetes.io/csiProvisionerIdentity : 1654174264855-8081-rook.cephfs.csi.ceph.com subvolumeName : csi-vol-8a528de0-e274-11ec-b069-0a580a800213 subvolumePath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec rootPath : >- /volumes/csi/csi-vol-8a528de0-e274-11ec-b069-0a580a800213/da98fb83-fff3-485a-a0a9-57c227cb67ec staticVolume : \"true\" nodeStageSecretRef : name : rook-csi-cephfs-node namespace : rook accessModes : - ReadWriteMany persistentVolumeReclaimPolicy : Retain storageClassName : rook-cephfs volumeMode : Filesystem In a new or other namespace, create a new PVC that will use this new PV you created. You simply have to point to it in the volumeName parameter. Make sure you enter the same size as the original PVC!: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 kind : PersistentVolumeClaim apiVersion : v1 metadata : name : second-pvc namespace : newnamespace finalizers : - kubernetes.io/pvc-protection spec : accessModes : - ReadWriteMany resources : requests : storage : 100Gi volumeName : pvc-a02dd277-cb26-4c1e-9434-478ebc321e22-newnamespace storageClassName : rook-cephfs volumeMode : Filesystem You have now access to the same CephFS subvolume from different PVCs in different namespaces. Redo the previous steps (copy PV with a new name, create a PVC pointing to it) in each namespace you want to use this subvolume. Note : the new PVCs/PVs we have created are static. Therefore CephCSI does not support snapshots, clones, resizing or delete operations for them. If those operations are required, you must make them on the original PVC.","title":"Shared volume creation"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#shared-volume-removal","text":"As the same CephFS volume is used by different PVCs/PVs, you must proceed very orderly to remove it properly. Delete the static PVCs in the different namespaces, but keep the original one! Delete the corresponding static PVs that should now have been marked as \"Released\". Again, don't delete the original one yet! Edit the original PV, changing back the persistentVolumeReclaimPolicy from Retain to Delete . Delete the original PVC. It will now properly delete the original PV, as well as the subvolume in CephFS.","title":"Shared volume removal"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#pending-issue","text":"Due to this bug , the global mount for a Volume that is mounted multiple times on the same node will not be unmounted. This does not result in any particular problem, apart from polluting the logs with unmount error messages, or having many different mounts hanging if you create and delete many shared PVCs, or you don't really use them. Until this issue is solved, either on the Rook or Kubelet side, you can always manually unmount the unwanted hanging global mounts on the nodes: Log onto each node where the volume has been mounted. Check for hanging mounts using their volumeHandle . Unmount the unwanted volumes.","title":"Pending Issue"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#teardown","text":"To clean up all the artifacts created by the filesystem demo: 1 kubectl delete -f kube-registry.yaml To delete the filesystem components and backing data, delete the Filesystem CRD. Warning Data will be deleted if preserveFilesystemOnDelete=false**. 1 kubectl -n rook-ceph delete cephfilesystem myfs Note: If the \"preserveFilesystemOnDelete\" filesystem attribute is set to true, the above command won't delete the filesystem. Recreating the same CRD will reuse the existing filesystem.","title":"Teardown"},{"location":"Storage-Configuration/Shared-Filesystem-CephFS/filesystem-storage/#advanced-example-erasure-coded-filesystem","text":"The Ceph filesystem example can be found here: Ceph Shared Filesystem - Samples - Erasure Coded .","title":"Advanced Example: Erasure Coded Filesystem"},{"location":"Troubleshooting/ceph-common-issues/","text":"Many of these problem cases are hard to summarize down to a short phrase that adequately describes the problem. Each problem will start with a bulleted list of symptoms. Keep in mind that all symptoms may not apply depending on the configuration of Rook. If the majority of the symptoms are seen there is a fair chance you are experiencing that problem. If after trying the suggestions found on this page and the problem is not resolved, the Rook team is very happy to help you troubleshoot the issues in their Slack channel. Once you have registered for the Rook Slack , proceed to the #ceph channel to ask for assistance. See also the CSI Troubleshooting Guide . Troubleshooting Techniques \u00b6 There are two main categories of information you will need to investigate issues in the cluster: Kubernetes status and logs documented here Ceph cluster status (see upcoming Ceph tools section) Ceph Tools \u00b6 After you verify the basic health of the running pods, next you will want to run Ceph tools for status of the storage components. There are two ways to run the Ceph tools, either in the Rook toolbox or inside other Rook pods that are already running. Logs on a specific node to find why a PVC is failing to mount See the log collection topic for a script that will help you gather the logs Other artifacts: The monitors that are expected to be in quorum: kubectl -n  get configmap rook-ceph-mon-endpoints -o yaml | grep data Tools in the Rook Toolbox \u00b6 The rook-ceph-tools pod provides a simple environment to run Ceph tools. Once the pod is up and running, connect to the pod to execute Ceph commands to evaluate that current state of the cluster. 1 kubectl -n rook-ceph exec -it $(kubectl -n rook-ceph get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[*].metadata.name}') bash Ceph Commands \u00b6 Here are some common commands to troubleshoot a Ceph cluster: ceph status ceph osd status ceph osd df ceph osd utilization ceph osd pool stats ceph osd tree ceph pg stat The first two status commands provide the overall cluster health. The normal state for cluster operations is HEALTH_OK, but will still function when the state is in a HEALTH_WARN state. If you are in a WARN state, then the cluster is in a condition that it may enter the HEALTH_ERROR state at which point all disk I/O operations are halted. If a HEALTH_WARN state is observed, then one should take action to prevent the cluster from halting when it enters the HEALTH_ERROR state. There are many Ceph sub-commands to look at and manipulate Ceph objects, well beyond the scope this document. See the Ceph documentation for more details of gathering information about the health of the cluster. In addition, there are other helpful hints and some best practices located in the Advanced Configuration section . Of particular note, there are scripts for collecting logs and gathering OSD information there. Cluster failing to service requests \u00b6 Symptoms \u00b6 Execution of the ceph command hangs PersistentVolumes are not being created Large amount of slow requests are blocking Large amount of stuck requests are blocking One or more MONs are restarting periodically Investigation \u00b6 Create a rook-ceph-tools pod to investigate the current state of Ceph. Here is an example of what one might see. In this case the ceph status command would just hang so a CTRL-C needed to be sent. 1 2 3 4 kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- ceph status ceph status ^CCluster connection interrupted or timed out Another indication is when one or more of the MON pods restart frequently. Note the 'mon107' that has only been up for 16 minutes in the following output. 1 2 3 4 5 6 7 8 $ kubectl -n rook-ceph get all -o wide --show-all NAME READY STATUS RESTARTS AGE IP NODE po/rook-ceph-mgr0-2487684371-gzlbq 1/1 Running 0 17h 192.168.224.46 k8-host-0402 po/rook-ceph-mon107-p74rj 1/1 Running 0 16m 192.168.224.28 k8-host-0402 rook-ceph-mon1-56fgm 1/1 Running 0 2d 192.168.91.135 k8-host-0404 rook-ceph-mon2-rlxcd 1/1 Running 0 2d 192.168.123.33 k8-host-0403 rook-ceph-osd-bg2vj 1/1 Running 0 2d 192.168.91.177 k8-host-0404 rook-ceph-osd-mwxdm 1/1 Running 0 2d 192.168.123.31 k8-host-0403 Solution \u00b6 What is happening here is that the MON pods are restarting and one or more of the Ceph daemons are not getting configured with the proper cluster information. This is commonly the result of not specifying a value for dataDirHostPath in your Cluster CRD. The dataDirHostPath setting specifies a path on the local host for the Ceph daemons to store configuration and data. Setting this to a path like /var/lib/rook , reapplying your Cluster CRD and restarting all the Ceph daemons (MON, MGR, OSD, RGW) should solve this problem. After the Ceph daemons have been restarted, it is advisable to restart the rook-tools pod . Monitors are the only pods running \u00b6 Symptoms \u00b6 Rook operator is running Either a single mon starts or the mons start very slowly (at least several minutes apart) The crash-collector pods are crashing No mgr, osd, or other daemons are created except the CSI driver Investigation \u00b6 When the operator is starting a cluster, the operator will start one mon at a time and check that they are healthy before continuing to bring up all three mons. If the first mon is not detected healthy, the operator will continue to check until it is healthy. If the first mon fails to start, a second and then a third mon may attempt to start. However, they will never form quorum and the orchestration will be blocked from proceeding. The crash-collector pods will be blocked from starting until the mons have formed quorum the first time. There are several common causes for the mons failing to form quorum: The operator pod does not have network connectivity to the mon pod(s). The network may be configured incorrectly. One or more mon pods are in running state, but the operator log shows they are not able to form quorum A mon is using configuration from a previous installation. See the cleanup guide for cleaning the previous cluster. A firewall may be blocking the ports required for the Ceph mons to form quorum. Ensure ports 6789 and 3300 are enabled. See the Ceph networking guide for more details. There may be MTU mismatch between different networking components. Some networks may be more susceptible to mismatch than others. If Kubernetes CNI or hosts enable jumbo frames (MTU 9000), Ceph will use large packets to maximize network bandwidth. If other parts of the networking chain don't support jumbo frames, this could result in lost or rejected packets unexpectedly. Operator fails to connect to the mon \u00b6 First look at the logs of the operator to confirm if it is able to connect to the mons. 1 kubectl -n rook-ceph logs -l app=rook-ceph-operator Likely you will see an error similar to the following that the operator is timing out when connecting to the mon. The last command is ceph mon_status , followed by a timeout message five minutes later. 1 2 3 4 5 6 2018-01-21 21:47:32.375833 I | exec: Running command: ceph mon_status --cluster=rook --conf=/var/lib/rook/rook-ceph/rook.config --keyring=/var/lib/rook/rook-ceph/client.admin.keyring --format json --out-file /tmp/442263890 2018-01-21 21:52:35.370533 I | exec: 2018-01-21 21:52:35.071462 7f96a3b82700 0 monclient(hunting): authenticate timed out after 300 2018-01-21 21:52:35.071462 7f96a3b82700 0 monclient(hunting): authenticate timed out after 300 2018-01-21 21:52:35.071524 7f96a3b82700 0 librados: client.admin authentication error (110) Connection timed out 2018-01-21 21:52:35.071524 7f96a3b82700 0 librados: client.admin authentication error (110) Connection timed out [errno 110] error connecting to the cluster The error would appear to be an authentication error, but it is misleading. The real issue is a timeout. Solution \u00b6 If you see the timeout in the operator log, verify if the mon pod is running (see the next section). If the mon pod is running, check the network connectivity between the operator pod and the mon pod. A common issue is that the CNI is not configured correctly. To verify the network connectivity: Get the endpoint for a mon Curl the mon from the operator pod For example, this command will curl the first mon from the operator: 1 2 $ kubectl -n rook-ceph exec deploy/rook-ceph-operator -- curl $( kubectl -n rook-ceph get svc -l app = rook-ceph-mon -o jsonpath = '{.items[0].spec.clusterIP}' ) :3300 2 >/dev/null ceph v2 If \"ceph v2\" is printed to the console, the connection was successful. If the command does not respond or otherwise fails, the network connection cannot be established. Failing mon pod \u00b6 Second we need to verify if the mon pod started successfully. 1 2 3 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-69fb9c78cd-58szd 1/1 CrashLoopBackOff 2 47s If the mon pod is failing as in this example, you will need to look at the mon pod status or logs to determine the cause. If the pod is in a crash loop backoff state, you should see the reason by describing the pod. 1 2 3 4 5 6 7 8 # The pod shows a termination status that the keyring does not match the existing keyring $ kubectl -n rook-ceph describe pod -l mon = rook-ceph-mon0 ... Last State: Terminated Reason: Error Message: The keyring does not match the existing keyring in /var/lib/rook/rook-ceph-mon0/data/keyring. You may need to delete the contents of dataDirHostPath on the host from a previous deployment. ... See the solution in the next section regarding cleaning up the dataDirHostPath on the nodes. Solution \u00b6 This is a common problem reinitializing the Rook cluster when the local directory used for persistence has not been purged. This directory is the dataDirHostPath setting in the cluster CRD and is typically set to /var/lib/rook . To fix the issue you will need to delete all components of Rook and then delete the contents of /var/lib/rook (or the directory specified by dataDirHostPath ) on each of the hosts in the cluster. Then when the cluster CRD is applied to start a new cluster, the rook-operator should start all the pods as expected. Caution Deleting the dataDirHostPath folder is destructive to the storage. Only delete the folder if you are trying to permanently purge the Rook cluster. See the Cleanup Guide for more details. PVCs stay in pending state \u00b6 Symptoms \u00b6 When you create a PVC based on a rook storage class, it stays pending indefinitely For the Wordpress example, you might see two PVCs in pending state. 1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE mysql-pv-claim Pending rook-ceph-block 8s wp-pv-claim Pending rook-ceph-block 16s Investigation \u00b6 There are two common causes for the PVCs staying in pending state: There are no OSDs in the cluster The CSI provisioner pod is not running or is not responding to the request to provision the storage Confirm if there are OSDs \u00b6 To confirm if you have OSDs in your cluster, connect to the Rook Toolbox and run the ceph status command. You should see that you have at least one OSD up and in . The minimum number of OSDs required depends on the replicated.size setting in the pool created for the storage class. In a \"test\" cluster, only one OSD is required (see storageclass-test.yaml ). In the production storage class example ( storageclass.yaml ), three OSDs would be required. 1 2 3 4 5 6 7 8 9 $ ceph status cluster: id: a0452c76-30d9-4c1a-a948-5d8405f19a7c health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 11m) mgr: a(active, since 10m) osd: 1 osds: 1 up (since 46s), 1 in (since 109m) OSD Prepare Logs \u00b6 If you don't see the expected number of OSDs, let's investigate why they weren't created. On each node where Rook looks for OSDs to configure, you will see an \"osd prepare\" pod. 1 2 3 $ kubectl -n rook-ceph get pod -l app = rook-ceph-osd-prepare NAME ... READY STATUS RESTARTS AGE rook-ceph-osd-prepare-minikube-9twvk 0/2 Completed 0 30m See the section on why OSDs are not getting created to investigate the logs. CSI Driver \u00b6 The CSI driver may not be responding to the requests. Look in the logs of the CSI provisioner pod to see if there are any errors during the provisioning. There are two provisioner pods: 1 kubectl -n rook-ceph get pod -l app=csi-rbdplugin-provisioner Get the logs of each of the pods. One of them should be the \"leader\" and be responding to requests. 1 kubectl -n rook-ceph logs csi-cephfsplugin-provisioner-d77bb49c6-q9hwq csi-provisioner See also the CSI Troubleshooting Guide . Operator unresponsiveness \u00b6 Lastly, if you have OSDs up and in , the next step is to confirm the operator is responding to the requests. Look in the Operator pod logs around the time when the PVC was created to confirm if the request is being raised. If the operator does not show requests to provision the block image, the operator may be stuck on some other operation. In this case, restart the operator pod to get things going again. Solution \u00b6 If the \"osd prepare\" logs didn't give you enough clues about why the OSDs were not being created, please review your cluster.yaml configuration. The common misconfigurations include: If useAllDevices: true , Rook expects to find local devices attached to the nodes. If no devices are found, no OSDs will be created. If useAllDevices: false , OSDs will only be created if deviceFilter is specified. Only local devices attached to the nodes will be configurable by Rook. In other words, the devices must show up under /dev . The devices must not have any partitions or filesystems on them. Rook will only configure raw devices. Partitions are not yet supported. OSD pods are failing to start \u00b6 Symptoms \u00b6 OSD pods are failing to start You have started a cluster after tearing down another cluster Investigation \u00b6 When an OSD starts, the device or directory will be configured for consumption. If there is an error with the configuration, the pod will crash and you will see the CrashLoopBackoff status for the pod. Look in the osd pod logs for an indication of the failure. 1 2 $ kubectl -n rook-ceph logs rook-ceph-osd-fl8fs ... One common case for failure is that you have re-deployed a test cluster and some state may remain from a previous deployment. If your cluster is larger than a few nodes, you may get lucky enough that the monitors were able to start and form quorum. However, now the OSDs pods may fail to start due to the old state. Looking at the OSD pod logs you will see an error about the file already existing. 1 2 3 4 5 6 7 $ kubectl -n rook-ceph logs rook-ceph-osd-fl8fs ... 2017-10-31 20:13:11.187106 I | mkfs-osd0: 2017-10-31 20:13:11.186992 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) _read_fsid unparsable uuid 2017-10-31 20:13:11.187208 I | mkfs-osd0: 2017-10-31 20:13:11.187026 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) _setup_block_symlink_or_file failed to create block symlink to /dev/disk/by-partuuid/651153ba-2dfc-4231-ba06-94759e5ba273: (17) File exists 2017-10-31 20:13:11.187233 I | mkfs-osd0: 2017-10-31 20:13:11.187038 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) mkfs failed, (17) File exists 2017-10-31 20:13:11.187254 I | mkfs-osd0: 2017-10-31 20:13:11.187042 7f0059d62e00 -1 OSD::mkfs: ObjectStore::mkfs failed with error (17) File exists 2017-10-31 20:13:11.187275 I | mkfs-osd0: 2017-10-31 20:13:11.187121 7f0059d62e00 -1 ** ERROR: error creating empty object store in /var/lib/rook/osd0: (17) File exists Solution \u00b6 If the error is from the file that already exists, this is a common problem reinitializing the Rook cluster when the local directory used for persistence has not been purged. This directory is the dataDirHostPath setting in the cluster CRD and is typically set to /var/lib/rook . To fix the issue you will need to delete all components of Rook and then delete the contents of /var/lib/rook (or the directory specified by dataDirHostPath ) on each of the hosts in the cluster. Then when the cluster CRD is applied to start a new cluster, the rook-operator should start all the pods as expected. OSD pods are not created on my devices \u00b6 Symptoms \u00b6 No OSD pods are started in the cluster Devices are not configured with OSDs even though specified in the Cluster CRD One OSD pod is started on each node instead of multiple pods for each device Investigation \u00b6 First, ensure that you have specified the devices correctly in the CRD. The Cluster CRD has several ways to specify the devices that are to be consumed by the Rook storage: useAllDevices: true : Rook will consume all devices it determines to be available deviceFilter : Consume all devices that match this regular expression devices : Explicit list of device names on each node to consume Second, if Rook determines that a device is not available (has existing partitions or a formatted filesystem), Rook will skip consuming the devices. If Rook is not starting OSDs on the devices you expect, Rook may have skipped it for this reason. To see if a device was skipped, view the OSD preparation log on the node where the device was skipped. Note that it is completely normal and expected for OSD prepare pod to be in the completed state. After the job is complete, Rook leaves the pod around in case the logs need to be investigated. 1 2 3 4 5 6 # Get the prepare pods in the cluster $ kubectl -n rook-ceph get pod -l app = rook-ceph-osd-prepare NAME READY STATUS RESTARTS AGE rook-ceph-osd-prepare-node1-fvmrp 0/1 Completed 0 18m rook-ceph-osd-prepare-node2-w9xv9 0/1 Completed 0 22m rook-ceph-osd-prepare-node3-7rgnv 0/1 Completed 0 22m 1 2 3 # view the logs for the node of interest in the \"provision\" container $ kubectl -n rook-ceph logs rook-ceph-osd-prepare-node1-fvmrp provision [...] Here are some key lines to look for in the log: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 # A device will be skipped if Rook sees it has partitions or a filesystem 2019-05-30 19:02:57.353171 W | cephosd: skipping device sda that is in use 2019-05-30 19:02:57.452168 W | skipping device \"sdb5\": [\"Used by ceph-disk\"] # Other messages about a disk being unusable by ceph include: Insufficient space (<5GB) on vgs Insufficient space (<5GB) LVM detected Has BlueStore device label locked read-only # A device is going to be configured 2019-05-30 19:02:57.535598 I | cephosd: device sdc to be configured by ceph-volume # For each device configured you will see a report printed to the log 2019-05-30 19:02:59.844642 I | Type Path LV Size % of device 2019-05-30 19:02:59.844651 I | ---------------------------------------------------------------------------------------------------- 2019-05-30 19:02:59.844677 I | [data] /dev/sdc 7.00 GB 100% Solution \u00b6 Either update the CR with the correct settings, or clean the partitions or filesystem from your devices. To clean devices from a previous install see the cleanup guide . After the settings are updated or the devices are cleaned, trigger the operator to analyze the devices again by restarting the operator. Each time the operator starts, it will ensure all the desired devices are configured. The operator does automatically deploy OSDs in most scenarios, but an operator restart will cover any scenarios that the operator doesn't detect automatically. 1 2 3 # Restart the operator to ensure devices are configured. A new pod will automatically be started when the current operator pod is deleted. $ kubectl -n rook-ceph delete pod -l app = rook-ceph-operator [...] Node hangs after reboot \u00b6 This issue is fixed in Rook v1.3 or later. Symptoms \u00b6 After issuing a reboot command, node never returned online Only a power cycle helps Investigation \u00b6 On a node running a pod with a Ceph persistent volume 1 2 3 4 mount | grep rbd # _netdev mount option is absent, also occurs for cephfs # OS is not aware PV is mounted over network /dev/rbdx on ... (rw,relatime, ..., noquota) When the reboot command is issued, network interfaces are terminated before disks are unmounted. This results in the node hanging as repeated attempts to unmount Ceph persistent volumes fail with the following error: 1 libceph: connect [monitor-ip]:6789 error -101 Solution \u00b6 The node needs to be drained before reboot. After the successful drain, the node can be rebooted as usual. Because kubectl drain command automatically marks the node as unschedulable ( kubectl cordon effect), the node needs to be uncordoned once it's back online. Drain the node: 1 kubectl drain  --ignore-daemonsets --delete-local-data Uncordon the node: 1 kubectl uncordon  Using multiple shared filesystem (CephFS) is attempted on a kernel version older than 4.7 \u00b6 Symptoms \u00b6 More than one shared filesystem (CephFS) has been created in the cluster A pod attempts to mount any other shared filesystem besides the first one that was created The pod incorrectly gets the first filesystem mounted instead of the intended filesystem Solution \u00b6 The only solution to this problem is to upgrade your kernel to 4.7 or higher. This is due to a mount flag added in the kernel version 4.7 which allows to chose the filesystem by name. For additional info on the kernel version requirement for multiple shared filesystems (CephFS), see Filesystem - Kernel version requirement . Set debug log level for all Ceph daemons \u00b6 You can set a given log level and apply it to all the Ceph daemons at the same time. For this, make sure the toolbox pod is running, then determine the level you want (between 0 and 20). You can find the list of all subsystems and their default values in Ceph logging and debug official guide . Be careful when increasing the level as it will produce very verbose logs. Assuming you want a log level of 1, you will run: 1 2 3 4 $ kubectl -n rook-ceph exec deploy/rook-ceph-tools -- set-ceph-debug-level 1 ceph config set global debug_context 1 ceph config set global debug_lockdep 1 [...] Once you are done debugging, you can revert all the debug flag to their default value by running the following: 1 kubectl -n rook-ceph exec deploy/rook-ceph-tools -- set-ceph-debug-level default Activate log to file for a particular Ceph daemon \u00b6 They are cases where looking at Kubernetes logs is not enough for diverse reasons, but just to name a few: not everyone is familiar for Kubernetes logging and expects to find logs in traditional directories logs get eaten (buffer limit from the log engine) and thus not requestable from Kubernetes So for each daemon, dataDirHostPath is used to store logs, if logging is activated. Rook will bindmount dataDirHostPath for every pod. Let's say you want to enable logging for mon.a , but only for this daemon. Using the toolbox or from inside the operator run: 1 ceph config set mon.a log_to_file true This will activate logging on the filesystem, you will be able to find logs in dataDirHostPath/$NAMESPACE/log , so typically this would mean /var/lib/rook/rook-ceph/log . You don't need to restart the pod, the effect will be immediate. To disable the logging on file, simply set log_to_file to false . A worker node using RBD devices hangs up \u00b6 Symptoms \u00b6 There is no progress on I/O from/to one of RBD devices ( /dev/rbd* or /dev/nbd* ). After that, the whole worker node hangs up. Investigation \u00b6 This happens when the following conditions are satisfied. The problematic RBD device and the corresponding OSDs are co-located. There is an XFS filesystem on top of this device. In addition, when this problem happens, you can see the following messages in dmesg . 1 2 3 4 5 6 $ dmesg ... [51717.039319] INFO: task kworker/2:1:5938 blocked for more than 120 seconds. [51717.039361] Not tainted 4.15.0-72-generic #81-Ubuntu [51717.039388] \"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\" disables this message. ... It's so-called hung_task problem and means that there is a deadlock in the kernel. For more detail, please refer to the corresponding issue comment . Solution \u00b6 This problem will be solve by the following two fixes. Linux kernel: A minor feature that is introduced by this commit . It will be included in Linux v5.6. Ceph: A fix that uses the above-mentioned kernel's feature. The Ceph community will probably discuss this fix after releasing Linux v5.6. You can bypass this problem by using ext4 or any other filesystems rather than XFS. Filesystem type can be specified with csi.storage.k8s.io/fstype in StorageClass resource. Too few PGs per OSD warning is shown \u00b6 Symptoms \u00b6 ceph status shows \"too few PGs per OSD\" warning as follows. 1 2 3 4 5 6 $ ceph status cluster: id: fd06d7c3-5c5c-45ca-bdea-1cf26b783065 health: HEALTH_WARN too few PGs per OSD (16 < min 30) [...] Solution \u00b6 The meaning of this warning is written in the document . However, in many cases it is benign. For more information, please see the blog entry . Please refer to Configuring Pools if you want to know the proper pg_num of pools and change these values. LVM metadata can be corrupted with OSD on LV-backed PVC \u00b6 Symptoms \u00b6 There is a critical flaw in OSD on LV-backed PVC. LVM metadata can be corrupted if both the host and OSD container modify it simultaneously. For example, the administrator might modify it on the host, while the OSD initialization process in a container could modify it too. In addition, if lvmetad is running, the possibility of occurrence gets higher. In this case, the change of LVM metadata in OSD container is not reflected to LVM metadata cache in host for a while. If you still decide to configure an OSD on LVM, please keep the following in mind to reduce the probability of this issue. Solution \u00b6 Disable lvmetad. Avoid configuration of LVs from the host. In addition, don't touch the VGs and physical volumes that back these LVs. Avoid incrementing the count field of storageClassDeviceSets and create a new LV that backs an OSD simultaneously. You can know whether the above-mentioned tag exists with the command: sudo lvs -o lv_name,lv_tags . If the lv_tag field is empty in an LV corresponding to the OSD lv_tags, this OSD encountered the problem. In this case, please retire this OSD or replace with other new OSD before restarting. This problem doesn't happen in newly created LV-backed PVCs because OSD container doesn't modify LVM metadata anymore. The existing lvm mode OSDs work continuously even thought upgrade your Rook. However, using the raw mode OSDs is recommended because of the above-mentioned problem. You can replace the existing OSDs with raw mode OSDs by retiring them and adding new OSDs one by one. See the documents Remove an OSD and Add an OSD on a PVC . OSD prepare job fails due to low aio-max-nr setting \u00b6 If the Kernel is configured with a low aio-max-nr setting , the OSD prepare job might fail with the following error: 1 exec: stderr: 2020-09-17T00:30:12.145+0000 7f0c17632f40 -1 bdev(0x56212de88700 /var/lib/ceph/osd/ceph-0//block) _aio_start io_setup(2) failed with EAGAIN; try increasing /proc/sys/fs/aio-max-nr To overcome this, you need to increase the value of fs.aio-max-nr of your sysctl configuration (typically /etc/sysctl.conf ). You can do this with your favorite configuration management system. Alternatively, you can have a DaemonSet to apply the configuration for you on all your nodes. Unexpected partitions created \u00b6 Symptoms \u00b6 Users running Rook versions v1.6.0-v1.6.7 may observe unwanted OSDs on partitions that appear unexpectedly and seemingly randomly, which can corrupt existing OSDs. Unexpected partitions are created on host disks that are used by Ceph OSDs. This happens more often on SSDs than HDDs and usually only on disks that are 875GB or larger. Many tools like lsblk , blkid , udevadm , and parted will not show a partition table type for the partition. Newer versions of blkid are generally able to recognize the type as \"atari\". The underlying issue causing this is Atari partition (sometimes identified as AHDI) support in the Linux kernel. Atari partitions have very relaxed specifications compared to other partition types, and it is relatively easy for random data written to a disk to appear as an Atari partition to the Linux kernel. Ceph's Bluestore OSDs have an anecdotally high probability of writing data on to disks that can appear to the kernel as an Atari partition. Below is an example of lsblk output from a node where phantom Atari partitions are present. Note that sdX1 is never present for the phantom partitions, and sdX2 is 48G on all disks. sdX3 is a variable size and may not always be present. It is possible for sdX4 to appear, though it is an anecdotally rare event. 1 2 3 4 5 6 7 8 9 10 11 # lsblk NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT sdb 8:16 0 3T 0 disk \u251c\u2500sdb2 8:18 0 48G 0 part \u2514\u2500sdb3 8:19 0 6.1M 0 part sdc 8:32 0 3T 0 disk \u251c\u2500sdc2 8:34 0 48G 0 part \u2514\u2500sdc3 8:35 0 6.2M 0 part sdd 8:48 0 3T 0 disk \u251c\u2500sdd2 8:50 0 48G 0 part \u2514\u2500sdd3 8:51 0 6.3M 0 part You can see GitHub rook/rook - Issue 7940 unexpected partition on disks >= 1TB (atari partitions) for more detailed information and discussion. Solution \u00b6 Recover from corruption (v1.6.0-v1.6.7) \u00b6 If you are using Rook v1.6, you must first update to v1.6.8 or higher to avoid further incidents of OSD corruption caused by these Atari partitions. An old workaround suggested using deviceFilter: ^sd[a-z]+$ , but this still results in unexpected partitions. Rook will merely stop creating new OSDs on the partitions. It does not fix a related issue that ceph-volume that is unaware of the Atari partition problem. Users who used this workaround are still at risk for OSD failures in the future. To resolve the issue, immediately update to v1.6.8 or higher. After the update, no corruption should occur on OSDs created in the future. Next, to get back to a healthy Ceph cluster state, focus on one corrupted disk at a time and remove all OSDs on each corrupted disk one disk at a time. As an example, you may have /dev/sdb with two unexpected partitions ( /dev/sdb2 and /dev/sdb3 ) as well as a second corrupted disk /dev/sde with one unexpected partition ( /dev/sde2 ). First, remove the OSDs associated with /dev/sdb , /dev/sdb2 , and /dev/sdb3 . There might be only one, or up to 3 OSDs depending on how your system was affected. Again see the OSD management doc . Use dd to wipe the first sectors of the partitions followed by the disk itself. E.g., dd if=/dev/zero of=/dev/sdb2 bs=1M dd if=/dev/zero of=/dev/sdb3 bs=1M dd if=/dev/zero of=/dev/sdb bs=1M Then wipe clean /dev/sdb to prepare it for a new OSD. See the teardown document for details. After this, scale up the Rook operator to deploy a new OSD to /dev/sdb . This will allow Ceph to use /dev/sdb for data recovery and replication while the next OSDs are removed. Now Repeat steps 1-4 for /dev/sde and /dev/sde2 , and continue for any other corrupted disks. If your Rook cluster does not have any critical data stored in it, it may be simpler to uninstall Rook completely and redeploy with v1.6.8 or higher. Operator environment variables are ignored \u00b6 Symptoms \u00b6 Configuration settings passed as environment variables do not take effect as expected. For example, the discover daemonset is not created, even though ROOK_ENABLE_DISCOVERY_DAEMON=\"true\" is set. Investigation \u00b6 Inspect the rook-ceph-operator-config ConfigMap for conflicting settings. The ConfigMap takes precedence over the environment. The ConfigMap must exist , even if all actual configuration is supplied through the environment. Look for lines with the op-k8sutil prefix in the operator logs. These lines detail the final values, and source, of the different configuration variables. Verify that both of the following messages are present in the operator logs: 1 2 rook-ceph-operator-config-controller successfully started rook-ceph-operator-config-controller done reconciling Solution \u00b6 If it does not exist, create an empty ConfigMap: 1 2 3 4 5 6 kind : ConfigMap apiVersion : v1 metadata : name : rook-ceph-operator-config namespace : rook-ceph # namespace:operator data : {} If the ConfigMap exists, remove any keys that you wish to configure through the environment.","title":"Ceph Common Issues"},{"location":"Troubleshooting/ceph-common-issues/#troubleshooting-techniques","text":"There are two main categories of information you will need to investigate issues in the cluster: Kubernetes status and logs documented here Ceph cluster status (see upcoming Ceph tools section)","title":"Troubleshooting Techniques"},{"location":"Troubleshooting/ceph-common-issues/#ceph-tools","text":"After you verify the basic health of the running pods, next you will want to run Ceph tools for status of the storage components. There are two ways to run the Ceph tools, either in the Rook toolbox or inside other Rook pods that are already running. Logs on a specific node to find why a PVC is failing to mount See the log collection topic for a script that will help you gather the logs Other artifacts: The monitors that are expected to be in quorum: kubectl -n  get configmap rook-ceph-mon-endpoints -o yaml | grep data","title":"Ceph Tools"},{"location":"Troubleshooting/ceph-common-issues/#tools-in-the-rook-toolbox","text":"The rook-ceph-tools pod provides a simple environment to run Ceph tools. Once the pod is up and running, connect to the pod to execute Ceph commands to evaluate that current state of the cluster. 1 kubectl -n rook-ceph exec -it $(kubectl -n rook-ceph get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[*].metadata.name}') bash","title":"Tools in the Rook Toolbox"},{"location":"Troubleshooting/ceph-common-issues/#ceph-commands","text":"Here are some common commands to troubleshoot a Ceph cluster: ceph status ceph osd status ceph osd df ceph osd utilization ceph osd pool stats ceph osd tree ceph pg stat The first two status commands provide the overall cluster health. The normal state for cluster operations is HEALTH_OK, but will still function when the state is in a HEALTH_WARN state. If you are in a WARN state, then the cluster is in a condition that it may enter the HEALTH_ERROR state at which point all disk I/O operations are halted. If a HEALTH_WARN state is observed, then one should take action to prevent the cluster from halting when it enters the HEALTH_ERROR state. There are many Ceph sub-commands to look at and manipulate Ceph objects, well beyond the scope this document. See the Ceph documentation for more details of gathering information about the health of the cluster. In addition, there are other helpful hints and some best practices located in the Advanced Configuration section . Of particular note, there are scripts for collecting logs and gathering OSD information there.","title":"Ceph Commands"},{"location":"Troubleshooting/ceph-common-issues/#cluster-failing-to-service-requests","text":"","title":"Cluster failing to service requests"},{"location":"Troubleshooting/ceph-common-issues/#symptoms","text":"Execution of the ceph command hangs PersistentVolumes are not being created Large amount of slow requests are blocking Large amount of stuck requests are blocking One or more MONs are restarting periodically","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation","text":"Create a rook-ceph-tools pod to investigate the current state of Ceph. Here is an example of what one might see. In this case the ceph status command would just hang so a CTRL-C needed to be sent. 1 2 3 4 kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- ceph status ceph status ^CCluster connection interrupted or timed out Another indication is when one or more of the MON pods restart frequently. Note the 'mon107' that has only been up for 16 minutes in the following output. 1 2 3 4 5 6 7 8 $ kubectl -n rook-ceph get all -o wide --show-all NAME READY STATUS RESTARTS AGE IP NODE po/rook-ceph-mgr0-2487684371-gzlbq 1/1 Running 0 17h 192.168.224.46 k8-host-0402 po/rook-ceph-mon107-p74rj 1/1 Running 0 16m 192.168.224.28 k8-host-0402 rook-ceph-mon1-56fgm 1/1 Running 0 2d 192.168.91.135 k8-host-0404 rook-ceph-mon2-rlxcd 1/1 Running 0 2d 192.168.123.33 k8-host-0403 rook-ceph-osd-bg2vj 1/1 Running 0 2d 192.168.91.177 k8-host-0404 rook-ceph-osd-mwxdm 1/1 Running 0 2d 192.168.123.31 k8-host-0403","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution","text":"What is happening here is that the MON pods are restarting and one or more of the Ceph daemons are not getting configured with the proper cluster information. This is commonly the result of not specifying a value for dataDirHostPath in your Cluster CRD. The dataDirHostPath setting specifies a path on the local host for the Ceph daemons to store configuration and data. Setting this to a path like /var/lib/rook , reapplying your Cluster CRD and restarting all the Ceph daemons (MON, MGR, OSD, RGW) should solve this problem. After the Ceph daemons have been restarted, it is advisable to restart the rook-tools pod .","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#monitors-are-the-only-pods-running","text":"","title":"Monitors are the only pods running"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_1","text":"Rook operator is running Either a single mon starts or the mons start very slowly (at least several minutes apart) The crash-collector pods are crashing No mgr, osd, or other daemons are created except the CSI driver","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_1","text":"When the operator is starting a cluster, the operator will start one mon at a time and check that they are healthy before continuing to bring up all three mons. If the first mon is not detected healthy, the operator will continue to check until it is healthy. If the first mon fails to start, a second and then a third mon may attempt to start. However, they will never form quorum and the orchestration will be blocked from proceeding. The crash-collector pods will be blocked from starting until the mons have formed quorum the first time. There are several common causes for the mons failing to form quorum: The operator pod does not have network connectivity to the mon pod(s). The network may be configured incorrectly. One or more mon pods are in running state, but the operator log shows they are not able to form quorum A mon is using configuration from a previous installation. See the cleanup guide for cleaning the previous cluster. A firewall may be blocking the ports required for the Ceph mons to form quorum. Ensure ports 6789 and 3300 are enabled. See the Ceph networking guide for more details. There may be MTU mismatch between different networking components. Some networks may be more susceptible to mismatch than others. If Kubernetes CNI or hosts enable jumbo frames (MTU 9000), Ceph will use large packets to maximize network bandwidth. If other parts of the networking chain don't support jumbo frames, this could result in lost or rejected packets unexpectedly.","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#operator-fails-to-connect-to-the-mon","text":"First look at the logs of the operator to confirm if it is able to connect to the mons. 1 kubectl -n rook-ceph logs -l app=rook-ceph-operator Likely you will see an error similar to the following that the operator is timing out when connecting to the mon. The last command is ceph mon_status , followed by a timeout message five minutes later. 1 2 3 4 5 6 2018-01-21 21:47:32.375833 I | exec: Running command: ceph mon_status --cluster=rook --conf=/var/lib/rook/rook-ceph/rook.config --keyring=/var/lib/rook/rook-ceph/client.admin.keyring --format json --out-file /tmp/442263890 2018-01-21 21:52:35.370533 I | exec: 2018-01-21 21:52:35.071462 7f96a3b82700 0 monclient(hunting): authenticate timed out after 300 2018-01-21 21:52:35.071462 7f96a3b82700 0 monclient(hunting): authenticate timed out after 300 2018-01-21 21:52:35.071524 7f96a3b82700 0 librados: client.admin authentication error (110) Connection timed out 2018-01-21 21:52:35.071524 7f96a3b82700 0 librados: client.admin authentication error (110) Connection timed out [errno 110] error connecting to the cluster The error would appear to be an authentication error, but it is misleading. The real issue is a timeout.","title":"Operator fails to connect to the mon"},{"location":"Troubleshooting/ceph-common-issues/#solution_1","text":"If you see the timeout in the operator log, verify if the mon pod is running (see the next section). If the mon pod is running, check the network connectivity between the operator pod and the mon pod. A common issue is that the CNI is not configured correctly. To verify the network connectivity: Get the endpoint for a mon Curl the mon from the operator pod For example, this command will curl the first mon from the operator: 1 2 $ kubectl -n rook-ceph exec deploy/rook-ceph-operator -- curl $( kubectl -n rook-ceph get svc -l app = rook-ceph-mon -o jsonpath = '{.items[0].spec.clusterIP}' ) :3300 2 >/dev/null ceph v2 If \"ceph v2\" is printed to the console, the connection was successful. If the command does not respond or otherwise fails, the network connection cannot be established.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#failing-mon-pod","text":"Second we need to verify if the mon pod started successfully. 1 2 3 $ kubectl -n rook-ceph get pod -l app = rook-ceph-mon NAME READY STATUS RESTARTS AGE rook-ceph-mon-a-69fb9c78cd-58szd 1/1 CrashLoopBackOff 2 47s If the mon pod is failing as in this example, you will need to look at the mon pod status or logs to determine the cause. If the pod is in a crash loop backoff state, you should see the reason by describing the pod. 1 2 3 4 5 6 7 8 # The pod shows a termination status that the keyring does not match the existing keyring $ kubectl -n rook-ceph describe pod -l mon = rook-ceph-mon0 ... Last State: Terminated Reason: Error Message: The keyring does not match the existing keyring in /var/lib/rook/rook-ceph-mon0/data/keyring. You may need to delete the contents of dataDirHostPath on the host from a previous deployment. ... See the solution in the next section regarding cleaning up the dataDirHostPath on the nodes.","title":"Failing mon pod"},{"location":"Troubleshooting/ceph-common-issues/#solution_2","text":"This is a common problem reinitializing the Rook cluster when the local directory used for persistence has not been purged. This directory is the dataDirHostPath setting in the cluster CRD and is typically set to /var/lib/rook . To fix the issue you will need to delete all components of Rook and then delete the contents of /var/lib/rook (or the directory specified by dataDirHostPath ) on each of the hosts in the cluster. Then when the cluster CRD is applied to start a new cluster, the rook-operator should start all the pods as expected. Caution Deleting the dataDirHostPath folder is destructive to the storage. Only delete the folder if you are trying to permanently purge the Rook cluster. See the Cleanup Guide for more details.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#pvcs-stay-in-pending-state","text":"","title":"PVCs stay in pending state"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_2","text":"When you create a PVC based on a rook storage class, it stays pending indefinitely For the Wordpress example, you might see two PVCs in pending state. 1 2 3 4 $ kubectl get pvc NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE mysql-pv-claim Pending rook-ceph-block 8s wp-pv-claim Pending rook-ceph-block 16s","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_2","text":"There are two common causes for the PVCs staying in pending state: There are no OSDs in the cluster The CSI provisioner pod is not running or is not responding to the request to provision the storage","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#confirm-if-there-are-osds","text":"To confirm if you have OSDs in your cluster, connect to the Rook Toolbox and run the ceph status command. You should see that you have at least one OSD up and in . The minimum number of OSDs required depends on the replicated.size setting in the pool created for the storage class. In a \"test\" cluster, only one OSD is required (see storageclass-test.yaml ). In the production storage class example ( storageclass.yaml ), three OSDs would be required. 1 2 3 4 5 6 7 8 9 $ ceph status cluster: id: a0452c76-30d9-4c1a-a948-5d8405f19a7c health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 11m) mgr: a(active, since 10m) osd: 1 osds: 1 up (since 46s), 1 in (since 109m)","title":"Confirm if there are OSDs"},{"location":"Troubleshooting/ceph-common-issues/#osd-prepare-logs","text":"If you don't see the expected number of OSDs, let's investigate why they weren't created. On each node where Rook looks for OSDs to configure, you will see an \"osd prepare\" pod. 1 2 3 $ kubectl -n rook-ceph get pod -l app = rook-ceph-osd-prepare NAME ... READY STATUS RESTARTS AGE rook-ceph-osd-prepare-minikube-9twvk 0/2 Completed 0 30m See the section on why OSDs are not getting created to investigate the logs.","title":"OSD Prepare Logs"},{"location":"Troubleshooting/ceph-common-issues/#csi-driver","text":"The CSI driver may not be responding to the requests. Look in the logs of the CSI provisioner pod to see if there are any errors during the provisioning. There are two provisioner pods: 1 kubectl -n rook-ceph get pod -l app=csi-rbdplugin-provisioner Get the logs of each of the pods. One of them should be the \"leader\" and be responding to requests. 1 kubectl -n rook-ceph logs csi-cephfsplugin-provisioner-d77bb49c6-q9hwq csi-provisioner See also the CSI Troubleshooting Guide .","title":"CSI Driver"},{"location":"Troubleshooting/ceph-common-issues/#operator-unresponsiveness","text":"Lastly, if you have OSDs up and in , the next step is to confirm the operator is responding to the requests. Look in the Operator pod logs around the time when the PVC was created to confirm if the request is being raised. If the operator does not show requests to provision the block image, the operator may be stuck on some other operation. In this case, restart the operator pod to get things going again.","title":"Operator unresponsiveness"},{"location":"Troubleshooting/ceph-common-issues/#solution_3","text":"If the \"osd prepare\" logs didn't give you enough clues about why the OSDs were not being created, please review your cluster.yaml configuration. The common misconfigurations include: If useAllDevices: true , Rook expects to find local devices attached to the nodes. If no devices are found, no OSDs will be created. If useAllDevices: false , OSDs will only be created if deviceFilter is specified. Only local devices attached to the nodes will be configurable by Rook. In other words, the devices must show up under /dev . The devices must not have any partitions or filesystems on them. Rook will only configure raw devices. Partitions are not yet supported.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#osd-pods-are-failing-to-start","text":"","title":"OSD pods are failing to start"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_3","text":"OSD pods are failing to start You have started a cluster after tearing down another cluster","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_3","text":"When an OSD starts, the device or directory will be configured for consumption. If there is an error with the configuration, the pod will crash and you will see the CrashLoopBackoff status for the pod. Look in the osd pod logs for an indication of the failure. 1 2 $ kubectl -n rook-ceph logs rook-ceph-osd-fl8fs ... One common case for failure is that you have re-deployed a test cluster and some state may remain from a previous deployment. If your cluster is larger than a few nodes, you may get lucky enough that the monitors were able to start and form quorum. However, now the OSDs pods may fail to start due to the old state. Looking at the OSD pod logs you will see an error about the file already existing. 1 2 3 4 5 6 7 $ kubectl -n rook-ceph logs rook-ceph-osd-fl8fs ... 2017-10-31 20:13:11.187106 I | mkfs-osd0: 2017-10-31 20:13:11.186992 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) _read_fsid unparsable uuid 2017-10-31 20:13:11.187208 I | mkfs-osd0: 2017-10-31 20:13:11.187026 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) _setup_block_symlink_or_file failed to create block symlink to /dev/disk/by-partuuid/651153ba-2dfc-4231-ba06-94759e5ba273: (17) File exists 2017-10-31 20:13:11.187233 I | mkfs-osd0: 2017-10-31 20:13:11.187038 7f0059d62e00 -1 bluestore(/var/lib/rook/osd0) mkfs failed, (17) File exists 2017-10-31 20:13:11.187254 I | mkfs-osd0: 2017-10-31 20:13:11.187042 7f0059d62e00 -1 OSD::mkfs: ObjectStore::mkfs failed with error (17) File exists 2017-10-31 20:13:11.187275 I | mkfs-osd0: 2017-10-31 20:13:11.187121 7f0059d62e00 -1 ** ERROR: error creating empty object store in /var/lib/rook/osd0: (17) File exists","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution_4","text":"If the error is from the file that already exists, this is a common problem reinitializing the Rook cluster when the local directory used for persistence has not been purged. This directory is the dataDirHostPath setting in the cluster CRD and is typically set to /var/lib/rook . To fix the issue you will need to delete all components of Rook and then delete the contents of /var/lib/rook (or the directory specified by dataDirHostPath ) on each of the hosts in the cluster. Then when the cluster CRD is applied to start a new cluster, the rook-operator should start all the pods as expected.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#osd-pods-are-not-created-on-my-devices","text":"","title":"OSD pods are not created on my devices"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_4","text":"No OSD pods are started in the cluster Devices are not configured with OSDs even though specified in the Cluster CRD One OSD pod is started on each node instead of multiple pods for each device","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_4","text":"First, ensure that you have specified the devices correctly in the CRD. The Cluster CRD has several ways to specify the devices that are to be consumed by the Rook storage: useAllDevices: true : Rook will consume all devices it determines to be available deviceFilter : Consume all devices that match this regular expression devices : Explicit list of device names on each node to consume Second, if Rook determines that a device is not available (has existing partitions or a formatted filesystem), Rook will skip consuming the devices. If Rook is not starting OSDs on the devices you expect, Rook may have skipped it for this reason. To see if a device was skipped, view the OSD preparation log on the node where the device was skipped. Note that it is completely normal and expected for OSD prepare pod to be in the completed state. After the job is complete, Rook leaves the pod around in case the logs need to be investigated. 1 2 3 4 5 6 # Get the prepare pods in the cluster $ kubectl -n rook-ceph get pod -l app = rook-ceph-osd-prepare NAME READY STATUS RESTARTS AGE rook-ceph-osd-prepare-node1-fvmrp 0/1 Completed 0 18m rook-ceph-osd-prepare-node2-w9xv9 0/1 Completed 0 22m rook-ceph-osd-prepare-node3-7rgnv 0/1 Completed 0 22m 1 2 3 # view the logs for the node of interest in the \"provision\" container $ kubectl -n rook-ceph logs rook-ceph-osd-prepare-node1-fvmrp provision [...] Here are some key lines to look for in the log: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 # A device will be skipped if Rook sees it has partitions or a filesystem 2019-05-30 19:02:57.353171 W | cephosd: skipping device sda that is in use 2019-05-30 19:02:57.452168 W | skipping device \"sdb5\": [\"Used by ceph-disk\"] # Other messages about a disk being unusable by ceph include: Insufficient space (<5GB) on vgs Insufficient space (<5GB) LVM detected Has BlueStore device label locked read-only # A device is going to be configured 2019-05-30 19:02:57.535598 I | cephosd: device sdc to be configured by ceph-volume # For each device configured you will see a report printed to the log 2019-05-30 19:02:59.844642 I | Type Path LV Size % of device 2019-05-30 19:02:59.844651 I | ---------------------------------------------------------------------------------------------------- 2019-05-30 19:02:59.844677 I | [data] /dev/sdc 7.00 GB 100%","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution_5","text":"Either update the CR with the correct settings, or clean the partitions or filesystem from your devices. To clean devices from a previous install see the cleanup guide . After the settings are updated or the devices are cleaned, trigger the operator to analyze the devices again by restarting the operator. Each time the operator starts, it will ensure all the desired devices are configured. The operator does automatically deploy OSDs in most scenarios, but an operator restart will cover any scenarios that the operator doesn't detect automatically. 1 2 3 # Restart the operator to ensure devices are configured. A new pod will automatically be started when the current operator pod is deleted. $ kubectl -n rook-ceph delete pod -l app = rook-ceph-operator [...]","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#node-hangs-after-reboot","text":"This issue is fixed in Rook v1.3 or later.","title":"Node hangs after reboot"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_5","text":"After issuing a reboot command, node never returned online Only a power cycle helps","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_5","text":"On a node running a pod with a Ceph persistent volume 1 2 3 4 mount | grep rbd # _netdev mount option is absent, also occurs for cephfs # OS is not aware PV is mounted over network /dev/rbdx on ... (rw,relatime, ..., noquota) When the reboot command is issued, network interfaces are terminated before disks are unmounted. This results in the node hanging as repeated attempts to unmount Ceph persistent volumes fail with the following error: 1 libceph: connect [monitor-ip]:6789 error -101","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution_6","text":"The node needs to be drained before reboot. After the successful drain, the node can be rebooted as usual. Because kubectl drain command automatically marks the node as unschedulable ( kubectl cordon effect), the node needs to be uncordoned once it's back online. Drain the node: 1 kubectl drain  --ignore-daemonsets --delete-local-data Uncordon the node: 1 kubectl uncordon ","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#using-multiple-shared-filesystem-cephfs-is-attempted-on-a-kernel-version-older-than-47","text":"","title":"Using multiple shared filesystem (CephFS) is attempted on a kernel version older than 4.7"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_6","text":"More than one shared filesystem (CephFS) has been created in the cluster A pod attempts to mount any other shared filesystem besides the first one that was created The pod incorrectly gets the first filesystem mounted instead of the intended filesystem","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#solution_7","text":"The only solution to this problem is to upgrade your kernel to 4.7 or higher. This is due to a mount flag added in the kernel version 4.7 which allows to chose the filesystem by name. For additional info on the kernel version requirement for multiple shared filesystems (CephFS), see Filesystem - Kernel version requirement .","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#set-debug-log-level-for-all-ceph-daemons","text":"You can set a given log level and apply it to all the Ceph daemons at the same time. For this, make sure the toolbox pod is running, then determine the level you want (between 0 and 20). You can find the list of all subsystems and their default values in Ceph logging and debug official guide . Be careful when increasing the level as it will produce very verbose logs. Assuming you want a log level of 1, you will run: 1 2 3 4 $ kubectl -n rook-ceph exec deploy/rook-ceph-tools -- set-ceph-debug-level 1 ceph config set global debug_context 1 ceph config set global debug_lockdep 1 [...] Once you are done debugging, you can revert all the debug flag to their default value by running the following: 1 kubectl -n rook-ceph exec deploy/rook-ceph-tools -- set-ceph-debug-level default","title":"Set debug log level for all Ceph daemons"},{"location":"Troubleshooting/ceph-common-issues/#activate-log-to-file-for-a-particular-ceph-daemon","text":"They are cases where looking at Kubernetes logs is not enough for diverse reasons, but just to name a few: not everyone is familiar for Kubernetes logging and expects to find logs in traditional directories logs get eaten (buffer limit from the log engine) and thus not requestable from Kubernetes So for each daemon, dataDirHostPath is used to store logs, if logging is activated. Rook will bindmount dataDirHostPath for every pod. Let's say you want to enable logging for mon.a , but only for this daemon. Using the toolbox or from inside the operator run: 1 ceph config set mon.a log_to_file true This will activate logging on the filesystem, you will be able to find logs in dataDirHostPath/$NAMESPACE/log , so typically this would mean /var/lib/rook/rook-ceph/log . You don't need to restart the pod, the effect will be immediate. To disable the logging on file, simply set log_to_file to false .","title":"Activate log to file for a particular Ceph daemon"},{"location":"Troubleshooting/ceph-common-issues/#a-worker-node-using-rbd-devices-hangs-up","text":"","title":"A worker node using RBD devices hangs up"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_7","text":"There is no progress on I/O from/to one of RBD devices ( /dev/rbd* or /dev/nbd* ). After that, the whole worker node hangs up.","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_6","text":"This happens when the following conditions are satisfied. The problematic RBD device and the corresponding OSDs are co-located. There is an XFS filesystem on top of this device. In addition, when this problem happens, you can see the following messages in dmesg . 1 2 3 4 5 6 $ dmesg ... [51717.039319] INFO: task kworker/2:1:5938 blocked for more than 120 seconds. [51717.039361] Not tainted 4.15.0-72-generic #81-Ubuntu [51717.039388] \"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\" disables this message. ... It's so-called hung_task problem and means that there is a deadlock in the kernel. For more detail, please refer to the corresponding issue comment .","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution_8","text":"This problem will be solve by the following two fixes. Linux kernel: A minor feature that is introduced by this commit . It will be included in Linux v5.6. Ceph: A fix that uses the above-mentioned kernel's feature. The Ceph community will probably discuss this fix after releasing Linux v5.6. You can bypass this problem by using ext4 or any other filesystems rather than XFS. Filesystem type can be specified with csi.storage.k8s.io/fstype in StorageClass resource.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#too-few-pgs-per-osd-warning-is-shown","text":"","title":"Too few PGs per OSD warning is shown"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_8","text":"ceph status shows \"too few PGs per OSD\" warning as follows. 1 2 3 4 5 6 $ ceph status cluster: id: fd06d7c3-5c5c-45ca-bdea-1cf26b783065 health: HEALTH_WARN too few PGs per OSD (16 < min 30) [...]","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#solution_9","text":"The meaning of this warning is written in the document . However, in many cases it is benign. For more information, please see the blog entry . Please refer to Configuring Pools if you want to know the proper pg_num of pools and change these values.","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#lvm-metadata-can-be-corrupted-with-osd-on-lv-backed-pvc","text":"","title":"LVM metadata can be corrupted with OSD on LV-backed PVC"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_9","text":"There is a critical flaw in OSD on LV-backed PVC. LVM metadata can be corrupted if both the host and OSD container modify it simultaneously. For example, the administrator might modify it on the host, while the OSD initialization process in a container could modify it too. In addition, if lvmetad is running, the possibility of occurrence gets higher. In this case, the change of LVM metadata in OSD container is not reflected to LVM metadata cache in host for a while. If you still decide to configure an OSD on LVM, please keep the following in mind to reduce the probability of this issue.","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#solution_10","text":"Disable lvmetad. Avoid configuration of LVs from the host. In addition, don't touch the VGs and physical volumes that back these LVs. Avoid incrementing the count field of storageClassDeviceSets and create a new LV that backs an OSD simultaneously. You can know whether the above-mentioned tag exists with the command: sudo lvs -o lv_name,lv_tags . If the lv_tag field is empty in an LV corresponding to the OSD lv_tags, this OSD encountered the problem. In this case, please retire this OSD or replace with other new OSD before restarting. This problem doesn't happen in newly created LV-backed PVCs because OSD container doesn't modify LVM metadata anymore. The existing lvm mode OSDs work continuously even thought upgrade your Rook. However, using the raw mode OSDs is recommended because of the above-mentioned problem. You can replace the existing OSDs with raw mode OSDs by retiring them and adding new OSDs one by one. See the documents Remove an OSD and Add an OSD on a PVC .","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#osd-prepare-job-fails-due-to-low-aio-max-nr-setting","text":"If the Kernel is configured with a low aio-max-nr setting , the OSD prepare job might fail with the following error: 1 exec: stderr: 2020-09-17T00:30:12.145+0000 7f0c17632f40 -1 bdev(0x56212de88700 /var/lib/ceph/osd/ceph-0//block) _aio_start io_setup(2) failed with EAGAIN; try increasing /proc/sys/fs/aio-max-nr To overcome this, you need to increase the value of fs.aio-max-nr of your sysctl configuration (typically /etc/sysctl.conf ). You can do this with your favorite configuration management system. Alternatively, you can have a DaemonSet to apply the configuration for you on all your nodes.","title":"OSD prepare job fails due to low aio-max-nr setting"},{"location":"Troubleshooting/ceph-common-issues/#unexpected-partitions-created","text":"","title":"Unexpected partitions created"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_10","text":"Users running Rook versions v1.6.0-v1.6.7 may observe unwanted OSDs on partitions that appear unexpectedly and seemingly randomly, which can corrupt existing OSDs. Unexpected partitions are created on host disks that are used by Ceph OSDs. This happens more often on SSDs than HDDs and usually only on disks that are 875GB or larger. Many tools like lsblk , blkid , udevadm , and parted will not show a partition table type for the partition. Newer versions of blkid are generally able to recognize the type as \"atari\". The underlying issue causing this is Atari partition (sometimes identified as AHDI) support in the Linux kernel. Atari partitions have very relaxed specifications compared to other partition types, and it is relatively easy for random data written to a disk to appear as an Atari partition to the Linux kernel. Ceph's Bluestore OSDs have an anecdotally high probability of writing data on to disks that can appear to the kernel as an Atari partition. Below is an example of lsblk output from a node where phantom Atari partitions are present. Note that sdX1 is never present for the phantom partitions, and sdX2 is 48G on all disks. sdX3 is a variable size and may not always be present. It is possible for sdX4 to appear, though it is an anecdotally rare event. 1 2 3 4 5 6 7 8 9 10 11 # lsblk NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT sdb 8:16 0 3T 0 disk \u251c\u2500sdb2 8:18 0 48G 0 part \u2514\u2500sdb3 8:19 0 6.1M 0 part sdc 8:32 0 3T 0 disk \u251c\u2500sdc2 8:34 0 48G 0 part \u2514\u2500sdc3 8:35 0 6.2M 0 part sdd 8:48 0 3T 0 disk \u251c\u2500sdd2 8:50 0 48G 0 part \u2514\u2500sdd3 8:51 0 6.3M 0 part You can see GitHub rook/rook - Issue 7940 unexpected partition on disks >= 1TB (atari partitions) for more detailed information and discussion.","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#solution_11","text":"","title":"Solution"},{"location":"Troubleshooting/ceph-common-issues/#recover-from-corruption-v160-v167","text":"If you are using Rook v1.6, you must first update to v1.6.8 or higher to avoid further incidents of OSD corruption caused by these Atari partitions. An old workaround suggested using deviceFilter: ^sd[a-z]+$ , but this still results in unexpected partitions. Rook will merely stop creating new OSDs on the partitions. It does not fix a related issue that ceph-volume that is unaware of the Atari partition problem. Users who used this workaround are still at risk for OSD failures in the future. To resolve the issue, immediately update to v1.6.8 or higher. After the update, no corruption should occur on OSDs created in the future. Next, to get back to a healthy Ceph cluster state, focus on one corrupted disk at a time and remove all OSDs on each corrupted disk one disk at a time. As an example, you may have /dev/sdb with two unexpected partitions ( /dev/sdb2 and /dev/sdb3 ) as well as a second corrupted disk /dev/sde with one unexpected partition ( /dev/sde2 ). First, remove the OSDs associated with /dev/sdb , /dev/sdb2 , and /dev/sdb3 . There might be only one, or up to 3 OSDs depending on how your system was affected. Again see the OSD management doc . Use dd to wipe the first sectors of the partitions followed by the disk itself. E.g., dd if=/dev/zero of=/dev/sdb2 bs=1M dd if=/dev/zero of=/dev/sdb3 bs=1M dd if=/dev/zero of=/dev/sdb bs=1M Then wipe clean /dev/sdb to prepare it for a new OSD. See the teardown document for details. After this, scale up the Rook operator to deploy a new OSD to /dev/sdb . This will allow Ceph to use /dev/sdb for data recovery and replication while the next OSDs are removed. Now Repeat steps 1-4 for /dev/sde and /dev/sde2 , and continue for any other corrupted disks. If your Rook cluster does not have any critical data stored in it, it may be simpler to uninstall Rook completely and redeploy with v1.6.8 or higher.","title":"Recover from corruption (v1.6.0-v1.6.7)"},{"location":"Troubleshooting/ceph-common-issues/#operator-environment-variables-are-ignored","text":"","title":"Operator environment variables are ignored"},{"location":"Troubleshooting/ceph-common-issues/#symptoms_11","text":"Configuration settings passed as environment variables do not take effect as expected. For example, the discover daemonset is not created, even though ROOK_ENABLE_DISCOVERY_DAEMON=\"true\" is set.","title":"Symptoms"},{"location":"Troubleshooting/ceph-common-issues/#investigation_7","text":"Inspect the rook-ceph-operator-config ConfigMap for conflicting settings. The ConfigMap takes precedence over the environment. The ConfigMap must exist , even if all actual configuration is supplied through the environment. Look for lines with the op-k8sutil prefix in the operator logs. These lines detail the final values, and source, of the different configuration variables. Verify that both of the following messages are present in the operator logs: 1 2 rook-ceph-operator-config-controller successfully started rook-ceph-operator-config-controller done reconciling","title":"Investigation"},{"location":"Troubleshooting/ceph-common-issues/#solution_12","text":"If it does not exist, create an empty ConfigMap: 1 2 3 4 5 6 kind : ConfigMap apiVersion : v1 metadata : name : rook-ceph-operator-config namespace : rook-ceph # namespace:operator data : {} If the ConfigMap exists, remove any keys that you wish to configure through the environment.","title":"Solution"},{"location":"Troubleshooting/ceph-csi-common-issues/","text":"Issues when provisioning volumes with the Ceph CSI driver can happen for many reasons such as: Network connectivity between CSI pods and ceph Cluster health issues Slow operations Kubernetes issues Ceph-CSI configuration or bugs The following troubleshooting steps can help identify a number of issues. Block (RBD) \u00b6 If you are mounting block volumes (usually RWO), these are referred to as RBD volumes in Ceph. See the sections below for RBD if you are having block volume issues. Shared Filesystem (CephFS) \u00b6 If you are mounting shared filesystem volumes (usually RWX), these are referred to as CephFS volumes in Ceph. See the sections below for CephFS if you are having filesystem volume issues. Network Connectivity \u00b6 The Ceph monitors are the most critical component of the cluster to check first. Retrieve the mon endpoints from the services: 1 2 3 4 5 $ kubectl -n rook-ceph get svc -l app = rook-ceph-mon NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mon-a ClusterIP 10.104.165.31  6789/TCP,3300/TCP 18h rook-ceph-mon-b ClusterIP 10.97.244.93  6789/TCP,3300/TCP 21s rook-ceph-mon-c ClusterIP 10.99.248.163  6789/TCP,3300/TCP 8s If host networking is enabled in the CephCluster CR, you will instead need to find the node IPs for the hosts where the mons are running. The clusterIP is the mon IP and 3300 is the port that will be used by Ceph-CSI to connect to the ceph cluster. These endpoints must be accessible by all clients in the cluster, including the CSI driver. If you are seeing issues provisioning the PVC then you need to check the network connectivity from the provisioner pods. For CephFS PVCs, check network connectivity from the csi-cephfsplugin container of the csi-cephfsplugin-provisioner pods For Block PVCs, check network connectivity from the csi-rbdplugin container of the csi-rbdplugin-provisioner pods For redundancy, there are two provisioner pods for each type. Make sure to test connectivity from all provisioner pods. Connect to the provisioner pods and verify the connection to the mon endpoints such as the following: 1 2 3 4 5 6 # Connect to the csi-cephfsplugin container in the provisioner pod kubectl -n rook-ceph exec -ti deploy/csi-cephfsplugin-provisioner -c csi-cephfsplugin -- bash # Test the network connection to the mon endpoint curl 10.104.165.31:3300 2>/dev/null ceph v2 If you see the response \"ceph v2\", the connection succeeded. If there is no response then there is a network issue connecting to the ceph cluster. Check network connectivity for all monitor IP\u2019s and ports which are passed to ceph-csi. Ceph Health \u00b6 Sometimes an unhealthy Ceph cluster can contribute to the issues in creating or mounting the PVC. Check that your Ceph cluster is healthy by connecting to the Toolbox and running the ceph commands: 1 ceph health detail 1 HEALTH_OK Slow Operations \u00b6 Even slow ops in the ceph cluster can contribute to the issues. In the toolbox, make sure that no slow ops are present and the ceph cluster is healthy 1 2 3 4 5 6 $ ceph -s cluster: id: ba41ac93-3b55-4f32-9e06-d3d8c6ff7334 health: HEALTH_WARN 30 slow ops, oldest one blocked for 10624 sec, mon.a has slow ops [...] If Ceph is not healthy, check the following health for more clues: The Ceph monitor logs for errors The OSD logs for errors Disk Health Network Health Ceph Troubleshooting \u00b6 Check if the RBD Pool exists \u00b6 Make sure the pool you have specified in the storageclass.yaml exists in the ceph cluster. Suppose the pool name mentioned in the storageclass.yaml is replicapool . It can be verified to exist in the toolbox: 1 2 3 $ ceph osd lspools 1 device_health_metrics 2 replicapool If the pool is not in the list, create the CephBlockPool CR for the pool if you have not already. If you have already created the pool, check the Rook operator log for errors creating the pool. Check if the Filesystem exists \u00b6 For the shared filesystem (CephFS), check that the filesystem and pools you have specified in the storageclass.yaml exist in the Ceph cluster. Suppose the fsName name mentioned in the storageclass.yaml is myfs . It can be verified in the toolbox: 1 2 $ ceph fs ls name: myfs, metadata pool: myfs-metadata, data pools: [myfs-data0 ] Now verify the pool mentioned in the storageclass.yaml exists, such as the example myfs-data0 . 1 2 3 4 5 ceph osd lspools 1 device_health_metrics 2 replicapool 3 myfs-metadata0 4 myfs-data0 The pool for the filesystem will have the suffix -data0 compared the filesystem name that is created by the CephFilesystem CR. subvolumegroups \u00b6 If the subvolumegroup is not specified in the ceph-csi configmap (where you have passed the ceph monitor information), Ceph-CSI creates the default subvolumegroup with the name csi. Verify that the subvolumegroup exists: 1 2 3 4 5 6 $ ceph fs subvolumegroup ls myfs [ { \"name\": \"csi\" } ] If you don\u2019t see any issues with your Ceph cluster, the following sections will start debugging the issue from the CSI side. Provisioning Volumes \u00b6 At times the issue can also exist in the Ceph-CSI or the sidecar containers used in Ceph-CSI. Ceph-CSI has included number of sidecar containers in the provisioner pods such as: csi-attacher , csi-resizer , csi-provisioner , csi-cephfsplugin , csi-snapshotter , and liveness-prometheus . The CephFS provisioner core CSI driver container name is csi-cephfsplugin as one of the container names. For the RBD (Block) provisioner you will see csi-rbdplugin as the container name. Here is a summary of the sidecar containers: csi-provisioner \u00b6 The external-provisioner is a sidecar container that dynamically provisions volumes by calling ControllerCreateVolume() and ControllerDeleteVolume() functions of CSI drivers. More details about external-provisioner can be found here. If there is an issue with PVC Create or Delete, check the logs of the csi-provisioner sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-provisioner csi-resizer \u00b6 The CSI external-resizer is a sidecar container that watches the Kubernetes API server for PersistentVolumeClaim updates and triggers ControllerExpandVolume operations against a CSI endpoint if the user requested more storage on the PersistentVolumeClaim object. More details about external-provisioner can be found here. If any issue exists in PVC expansion you can check the logs of the csi-resizer sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-resizer csi-snapshotter \u00b6 The CSI external-snapshotter sidecar only watches for VolumeSnapshotContent create/update/delete events. It will talk to ceph-csi containers to create or delete snapshots. More details about external-snapshotter can be found here . In Kubernetes 1.17 the volume snapshot feature was promoted to beta. In Kubernetes 1.20, the feature gate is enabled by default on standard Kubernetes deployments and cannot be turned off. Make sure you have installed the correct snapshotter CRD version. If you have not installed the snapshotter controller, see the Snapshots guide . 1 2 3 4 $ kubectl get crd | grep snapshot volumesnapshotclasses.snapshot.storage.k8s.io 2021-01-25T11:19:38Z volumesnapshotcontents.snapshot.storage.k8s.io 2021-01-25T11:19:39Z volumesnapshots.snapshot.storage.k8s.io 2021-01-25T11:19:40Z The above CRDs must have the matching version in your snapshotclass.yaml or snapshot.yaml . Otherwise, the VolumeSnapshot and VolumesnapshotContent will not be created. The snapshot controller is responsible for creating both VolumeSnapshot and VolumesnapshotContent object. If the objects are not getting created, you may need to check the logs of the snapshot-controller container. Rook only installs the snapshotter sidecar container, not the controller. It is recommended that Kubernetes distributors bundle and deploy the controller and CRDs as part of their Kubernetes cluster management process (independent of any CSI Driver). If your Kubernetes distribution does not bundle the snapshot controller, you may manually install these components. If any issue exists in the snapshot Create/Delete operation you can check the logs of the csi-snapshotter sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-snapshotter If you see an error about a volume already existing such as: 1 2 GRPC error: rpc error: code = Aborted desc = an operation with the given Volume ID 0001-0009-rook-ceph-0000000000000001-8d0ba728-0e17-11eb-a680-ce6eecc894de already exists. The issue typically is in the Ceph cluster or network connectivity. If the issue is in Provisioning the PVC Restarting the Provisioner pods help(for CephFS issue restart csi-cephfsplugin-provisioner-xxxxxx CephFS Provisioner. For RBD, restart the csi-rbdplugin-provisioner-xxxxxx pod. If the issue is in mounting the PVC, restart the csi-rbdplugin-xxxxx pod (for RBD) and the csi-cephfsplugin-xxxxx pod for CephFS issue. Mounting the volume to application pods \u00b6 When a user requests to create the application pod with PVC, there is a three-step process CSI driver registration Create volume attachment object Stage and publish the volume csi-driver registration \u00b6 csi-cephfsplugin-xxxx or csi-rbdplugin-xxxx is a daemonset pod running on all the nodes where your application gets scheduled. If the plugin pods are not running on the node where your application is scheduled might cause the issue, make sure plugin pods are always running. Each plugin pod has two important containers: one is driver-registrar and csi-rbdplugin or csi-cephfsplugin . Sometimes there is also a liveness-prometheus container. driver-registrar \u00b6 The node-driver-registrar is a sidecar container that registers the CSI driver with Kubelet. More details can be found here . If any issue exists in attaching the PVC to the application pod check logs from driver-registrar sidecar container in plugin pod where your application pod is scheduled. 1 2 3 4 5 6 7 8 9 10 11 12 $ kubectl -n rook-ceph logs deploy/csi-rbdplugin -c driver-registrar [...] I0120 12:28:34.231761 124018 main.go:112] Version: v2.0.1 I0120 12:28:34.233910 124018 connection.go:151] Connecting to unix:///csi/csi.sock I0120 12:28:35.242469 124018 node_register.go:55] Starting Registration Server at: /registration/rook-ceph.rbd.csi.ceph.com-reg.sock I0120 12:28:35.243364 124018 node_register.go:64] Registration Server started at: /registration/rook-ceph.rbd.csi.ceph.com-reg.sock I0120 12:28:35.243673 124018 node_register.go:86] Skipping healthz server because port set to: 0 I0120 12:28:36.318482 124018 main.go:79] Received GetInfo call: &InfoRequest{} I0120 12:28:37.455211 124018 main.go:89] Received NotifyRegistrationStatus call: &RegistrationStatus{PluginRegistered:true,Error:,} E0121 05:19:28.658390 124018 connection.go:129] Lost connection to unix:///csi/csi.sock. E0125 07:11:42.926133 124018 connection.go:129] Lost connection to unix:///csi/csi.sock. [...] You should see the response RegistrationStatus{PluginRegistered:true,Error:,} in the logs to confirm that plugin is registered with kubelet. If you see a driver not found an error in the application pod describe output. Restarting the csi-xxxxplugin-xxx pod on the node may help. Volume Attachment \u00b6 Each provisioner pod also has a sidecar container called csi-attacher . csi-attacher \u00b6 The external-attacher is a sidecar container that attaches volumes to nodes by calling ControllerPublish and ControllerUnpublish functions of CSI drivers. It is necessary because the internal Attach/Detach controller running in Kubernetes controller-manager does not have any direct interfaces to CSI drivers. More details can be found here . If any issue exists in attaching the PVC to the application pod first check the volumeattachment object created and also log from csi-attacher sidecar container in provisioner pod. 1 2 3 $ kubectl get volumeattachment NAME ATTACHER PV NODE ATTACHED AGE csi-75903d8a902744853900d188f12137ea1cafb6c6f922ebc1c116fd58e950fc92 rook-ceph.cephfs.csi.ceph.com pvc-5c547d2a-fdb8-4cb2-b7fe-e0f30b88d454 minikube true 4m26s 1 kubectl logs po/csi-rbdplugin-provisioner-d857bfb5f-ddctl -c csi-attacher CephFS Stale operations \u00b6 Check for any stale mount commands on the csi-cephfsplugin-xxxx pod on the node where your application pod is scheduled. You need to exec in the csi-cephfsplugin-xxxx pod and grep for stale mount operators. Identify the csi-cephfsplugin-xxxx pod running on the node where your application is scheduled with kubectl get po -o wide and match the node names. 1 2 3 4 $ kubectl exec -it csi-cephfsplugin-tfk2g -c csi-cephfsplugin -- sh $ ps -ef | grep mount [...] root 67 60 0 11:55 pts/0 00:00:00 grep mount 1 2 3 4 ps -ef |grep ceph [...] root 1 0 0 Jan20 ? 00:00:26 /usr/local/bin/cephcsi --nodeid=minikube --type=cephfs --endpoint=unix:///csi/csi.sock --v=0 --nodeserver=true --drivername=rook-ceph.cephfs.csi.ceph.com --pidlimit=-1 --metricsport=9091 --forcecephkernelclient=true --metricspath=/metrics --enablegrpcmetrics=true root 69 60 0 11:55 pts/0 00:00:00 grep ceph If any commands are stuck check the dmesg logs from the node. Restarting the csi-cephfsplugin pod may also help sometimes. If you don\u2019t see any stuck messages, confirm the network connectivity, Ceph health, and slow ops. RBD Stale operations \u00b6 Check for any stale map/mkfs/mount commands on the csi-rbdplugin-xxxx pod on the node where your application pod is scheduled. You need to exec in the csi-rbdplugin-xxxx pod and grep for stale operators like ( rbd map, rbd unmap, mkfs, mount and umount ). Identify the csi-rbdplugin-xxxx pod running on the node where your application is scheduled with kubectl get po -o wide and match the node names. 1 2 3 4 $ kubectl exec -it csi-rbdplugin-vh8d5 -c csi-rbdplugin -- sh $ ps -ef | grep map [...] root 1297024 1296907 0 12:00 pts/0 00:00:00 grep map 1 2 3 4 5 $ ps -ef | grep mount [...] root 1824 1 0 Jan19 ? 00:00:00 /usr/sbin/rpc.mountd ceph 1041020 1040955 1 07:11 ? 00:03:43 ceph-mgr --fsid=ba41ac93-3b55-4f32-9e06-d3d8c6ff7334 --keyring=/etc/ceph/keyring-store/keyring --log-to-stderr=true --err-to-stderr=true --mon-cluster-log-to-stderr=true --log-stderr-prefix=debug --default-log-to-file=false --default-mon-cluster-log-to-file=false --mon-host=[v2:10.111.136.166:3300,v1:10.111.136.166:6789] --mon-initial-members=a --id=a --setuser=ceph --setgroup=ceph --client-mount-uid=0 --client-mount-gid=0 --foreground --public-addr=172.17.0.6 root 1297115 1296907 0 12:00 pts/0 00:00:00 grep mount 1 2 3 $ ps -ef | grep mkfs [...] root 1297291 1296907 0 12:00 pts/0 00:00:00 grep mkfs 1 2 3 $ ps -ef | grep umount [...] root 1298500 1296907 0 12:01 pts/0 00:00:00 grep umount 1 2 3 $ ps -ef | grep unmap [...] root 1298578 1296907 0 12:01 pts/0 00:00:00 grep unmap If any commands are stuck check the dmesg logs from the node. Restarting the csi-rbdplugin pod also may help sometimes. If you don\u2019t see any stuck messages, confirm the network connectivity, Ceph health, and slow ops. dmesg logs \u00b6 Check the dmesg logs on the node where pvc mounting is failing or the csi-rbdplugin container of the csi-rbdplugin-xxxx pod on that node. 1 dmesg RBD Commands \u00b6 If nothing else helps, get the last executed command from the ceph-csi pod logs and run it manually inside the provisioner or plugin pod to see if there are errors returned even if they couldn't be seen in the logs. 1 rbd ls --id=csi-rbd-node -m=10.111.136.166:6789 --key=AQDpIQhg+v83EhAAgLboWIbl+FL/nThJzoI3Fg== Where -m is one of the mon endpoints and the --key is the key used by the CSI driver for accessing the Ceph cluster. Node Loss \u00b6 When a node is lost, you will see application pods on the node stuck in the Terminating state while another pod is rescheduled and is in the ContainerCreating state. Important For clusters with Kubernetes version 1.26 or greater, see the improved automation to recover from the node loss. If using K8s 1.25 or older, continue with these instructions. Force deleting the pod \u00b6 To force delete the pod stuck in the Terminating state: 1 kubectl -n rook-ceph delete pod my-app-69cd495f9b-nl6hf --grace-period 0 --force After the force delete, wait for a timeout of about 8-10 minutes. If the pod still not in the running state, continue with the next section to blocklist the node. Blocklisting a node \u00b6 To shorten the timeout, you can mark the node as \"blocklisted\" from the Rook toolbox so Rook can safely failover the pod sooner. 1 2 $ ceph osd blocklist add  # get the node IP you want to blocklist blocklisting  After running the above command within a few minutes the pod will be running. Removing a node blocklist \u00b6 After you are absolutely sure the node is permanently offline and that the node no longer needs to be blocklisted, remove the node from the blocklist. 1 2 $ ceph osd blocklist rm  un-blocklisting ","title":"CSI Common Issues"},{"location":"Troubleshooting/ceph-csi-common-issues/#block-rbd","text":"If you are mounting block volumes (usually RWO), these are referred to as RBD volumes in Ceph. See the sections below for RBD if you are having block volume issues.","title":"Block (RBD)"},{"location":"Troubleshooting/ceph-csi-common-issues/#shared-filesystem-cephfs","text":"If you are mounting shared filesystem volumes (usually RWX), these are referred to as CephFS volumes in Ceph. See the sections below for CephFS if you are having filesystem volume issues.","title":"Shared Filesystem (CephFS)"},{"location":"Troubleshooting/ceph-csi-common-issues/#network-connectivity","text":"The Ceph monitors are the most critical component of the cluster to check first. Retrieve the mon endpoints from the services: 1 2 3 4 5 $ kubectl -n rook-ceph get svc -l app = rook-ceph-mon NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE rook-ceph-mon-a ClusterIP 10.104.165.31  6789/TCP,3300/TCP 18h rook-ceph-mon-b ClusterIP 10.97.244.93  6789/TCP,3300/TCP 21s rook-ceph-mon-c ClusterIP 10.99.248.163  6789/TCP,3300/TCP 8s If host networking is enabled in the CephCluster CR, you will instead need to find the node IPs for the hosts where the mons are running. The clusterIP is the mon IP and 3300 is the port that will be used by Ceph-CSI to connect to the ceph cluster. These endpoints must be accessible by all clients in the cluster, including the CSI driver. If you are seeing issues provisioning the PVC then you need to check the network connectivity from the provisioner pods. For CephFS PVCs, check network connectivity from the csi-cephfsplugin container of the csi-cephfsplugin-provisioner pods For Block PVCs, check network connectivity from the csi-rbdplugin container of the csi-rbdplugin-provisioner pods For redundancy, there are two provisioner pods for each type. Make sure to test connectivity from all provisioner pods. Connect to the provisioner pods and verify the connection to the mon endpoints such as the following: 1 2 3 4 5 6 # Connect to the csi-cephfsplugin container in the provisioner pod kubectl -n rook-ceph exec -ti deploy/csi-cephfsplugin-provisioner -c csi-cephfsplugin -- bash # Test the network connection to the mon endpoint curl 10.104.165.31:3300 2>/dev/null ceph v2 If you see the response \"ceph v2\", the connection succeeded. If there is no response then there is a network issue connecting to the ceph cluster. Check network connectivity for all monitor IP\u2019s and ports which are passed to ceph-csi.","title":"Network Connectivity"},{"location":"Troubleshooting/ceph-csi-common-issues/#ceph-health","text":"Sometimes an unhealthy Ceph cluster can contribute to the issues in creating or mounting the PVC. Check that your Ceph cluster is healthy by connecting to the Toolbox and running the ceph commands: 1 ceph health detail 1 HEALTH_OK","title":"Ceph Health"},{"location":"Troubleshooting/ceph-csi-common-issues/#slow-operations","text":"Even slow ops in the ceph cluster can contribute to the issues. In the toolbox, make sure that no slow ops are present and the ceph cluster is healthy 1 2 3 4 5 6 $ ceph -s cluster: id: ba41ac93-3b55-4f32-9e06-d3d8c6ff7334 health: HEALTH_WARN 30 slow ops, oldest one blocked for 10624 sec, mon.a has slow ops [...] If Ceph is not healthy, check the following health for more clues: The Ceph monitor logs for errors The OSD logs for errors Disk Health Network Health","title":"Slow Operations"},{"location":"Troubleshooting/ceph-csi-common-issues/#ceph-troubleshooting","text":"","title":"Ceph Troubleshooting"},{"location":"Troubleshooting/ceph-csi-common-issues/#check-if-the-rbd-pool-exists","text":"Make sure the pool you have specified in the storageclass.yaml exists in the ceph cluster. Suppose the pool name mentioned in the storageclass.yaml is replicapool . It can be verified to exist in the toolbox: 1 2 3 $ ceph osd lspools 1 device_health_metrics 2 replicapool If the pool is not in the list, create the CephBlockPool CR for the pool if you have not already. If you have already created the pool, check the Rook operator log for errors creating the pool.","title":"Check if the RBD Pool exists"},{"location":"Troubleshooting/ceph-csi-common-issues/#check-if-the-filesystem-exists","text":"For the shared filesystem (CephFS), check that the filesystem and pools you have specified in the storageclass.yaml exist in the Ceph cluster. Suppose the fsName name mentioned in the storageclass.yaml is myfs . It can be verified in the toolbox: 1 2 $ ceph fs ls name: myfs, metadata pool: myfs-metadata, data pools: [myfs-data0 ] Now verify the pool mentioned in the storageclass.yaml exists, such as the example myfs-data0 . 1 2 3 4 5 ceph osd lspools 1 device_health_metrics 2 replicapool 3 myfs-metadata0 4 myfs-data0 The pool for the filesystem will have the suffix -data0 compared the filesystem name that is created by the CephFilesystem CR.","title":"Check if the Filesystem exists"},{"location":"Troubleshooting/ceph-csi-common-issues/#subvolumegroups","text":"If the subvolumegroup is not specified in the ceph-csi configmap (where you have passed the ceph monitor information), Ceph-CSI creates the default subvolumegroup with the name csi. Verify that the subvolumegroup exists: 1 2 3 4 5 6 $ ceph fs subvolumegroup ls myfs [ { \"name\": \"csi\" } ] If you don\u2019t see any issues with your Ceph cluster, the following sections will start debugging the issue from the CSI side.","title":"subvolumegroups"},{"location":"Troubleshooting/ceph-csi-common-issues/#provisioning-volumes","text":"At times the issue can also exist in the Ceph-CSI or the sidecar containers used in Ceph-CSI. Ceph-CSI has included number of sidecar containers in the provisioner pods such as: csi-attacher , csi-resizer , csi-provisioner , csi-cephfsplugin , csi-snapshotter , and liveness-prometheus . The CephFS provisioner core CSI driver container name is csi-cephfsplugin as one of the container names. For the RBD (Block) provisioner you will see csi-rbdplugin as the container name. Here is a summary of the sidecar containers:","title":"Provisioning Volumes"},{"location":"Troubleshooting/ceph-csi-common-issues/#csi-provisioner","text":"The external-provisioner is a sidecar container that dynamically provisions volumes by calling ControllerCreateVolume() and ControllerDeleteVolume() functions of CSI drivers. More details about external-provisioner can be found here. If there is an issue with PVC Create or Delete, check the logs of the csi-provisioner sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-provisioner","title":"csi-provisioner"},{"location":"Troubleshooting/ceph-csi-common-issues/#csi-resizer","text":"The CSI external-resizer is a sidecar container that watches the Kubernetes API server for PersistentVolumeClaim updates and triggers ControllerExpandVolume operations against a CSI endpoint if the user requested more storage on the PersistentVolumeClaim object. More details about external-provisioner can be found here. If any issue exists in PVC expansion you can check the logs of the csi-resizer sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-resizer","title":"csi-resizer"},{"location":"Troubleshooting/ceph-csi-common-issues/#csi-snapshotter","text":"The CSI external-snapshotter sidecar only watches for VolumeSnapshotContent create/update/delete events. It will talk to ceph-csi containers to create or delete snapshots. More details about external-snapshotter can be found here . In Kubernetes 1.17 the volume snapshot feature was promoted to beta. In Kubernetes 1.20, the feature gate is enabled by default on standard Kubernetes deployments and cannot be turned off. Make sure you have installed the correct snapshotter CRD version. If you have not installed the snapshotter controller, see the Snapshots guide . 1 2 3 4 $ kubectl get crd | grep snapshot volumesnapshotclasses.snapshot.storage.k8s.io 2021-01-25T11:19:38Z volumesnapshotcontents.snapshot.storage.k8s.io 2021-01-25T11:19:39Z volumesnapshots.snapshot.storage.k8s.io 2021-01-25T11:19:40Z The above CRDs must have the matching version in your snapshotclass.yaml or snapshot.yaml . Otherwise, the VolumeSnapshot and VolumesnapshotContent will not be created. The snapshot controller is responsible for creating both VolumeSnapshot and VolumesnapshotContent object. If the objects are not getting created, you may need to check the logs of the snapshot-controller container. Rook only installs the snapshotter sidecar container, not the controller. It is recommended that Kubernetes distributors bundle and deploy the controller and CRDs as part of their Kubernetes cluster management process (independent of any CSI Driver). If your Kubernetes distribution does not bundle the snapshot controller, you may manually install these components. If any issue exists in the snapshot Create/Delete operation you can check the logs of the csi-snapshotter sidecar container. 1 kubectl -n rook-ceph logs deploy/csi-rbdplugin-provisioner -c csi-snapshotter If you see an error about a volume already existing such as: 1 2 GRPC error: rpc error: code = Aborted desc = an operation with the given Volume ID 0001-0009-rook-ceph-0000000000000001-8d0ba728-0e17-11eb-a680-ce6eecc894de already exists. The issue typically is in the Ceph cluster or network connectivity. If the issue is in Provisioning the PVC Restarting the Provisioner pods help(for CephFS issue restart csi-cephfsplugin-provisioner-xxxxxx CephFS Provisioner. For RBD, restart the csi-rbdplugin-provisioner-xxxxxx pod. If the issue is in mounting the PVC, restart the csi-rbdplugin-xxxxx pod (for RBD) and the csi-cephfsplugin-xxxxx pod for CephFS issue.","title":"csi-snapshotter"},{"location":"Troubleshooting/ceph-csi-common-issues/#mounting-the-volume-to-application-pods","text":"When a user requests to create the application pod with PVC, there is a three-step process CSI driver registration Create volume attachment object Stage and publish the volume","title":"Mounting the volume to application pods"},{"location":"Troubleshooting/ceph-csi-common-issues/#csi-driver-registration","text":"csi-cephfsplugin-xxxx or csi-rbdplugin-xxxx is a daemonset pod running on all the nodes where your application gets scheduled. If the plugin pods are not running on the node where your application is scheduled might cause the issue, make sure plugin pods are always running. Each plugin pod has two important containers: one is driver-registrar and csi-rbdplugin or csi-cephfsplugin . Sometimes there is also a liveness-prometheus container.","title":"csi-driver registration"},{"location":"Troubleshooting/ceph-csi-common-issues/#driver-registrar","text":"The node-driver-registrar is a sidecar container that registers the CSI driver with Kubelet. More details can be found here . If any issue exists in attaching the PVC to the application pod check logs from driver-registrar sidecar container in plugin pod where your application pod is scheduled. 1 2 3 4 5 6 7 8 9 10 11 12 $ kubectl -n rook-ceph logs deploy/csi-rbdplugin -c driver-registrar [...] I0120 12:28:34.231761 124018 main.go:112] Version: v2.0.1 I0120 12:28:34.233910 124018 connection.go:151] Connecting to unix:///csi/csi.sock I0120 12:28:35.242469 124018 node_register.go:55] Starting Registration Server at: /registration/rook-ceph.rbd.csi.ceph.com-reg.sock I0120 12:28:35.243364 124018 node_register.go:64] Registration Server started at: /registration/rook-ceph.rbd.csi.ceph.com-reg.sock I0120 12:28:35.243673 124018 node_register.go:86] Skipping healthz server because port set to: 0 I0120 12:28:36.318482 124018 main.go:79] Received GetInfo call: &InfoRequest{} I0120 12:28:37.455211 124018 main.go:89] Received NotifyRegistrationStatus call: &RegistrationStatus{PluginRegistered:true,Error:,} E0121 05:19:28.658390 124018 connection.go:129] Lost connection to unix:///csi/csi.sock. E0125 07:11:42.926133 124018 connection.go:129] Lost connection to unix:///csi/csi.sock. [...] You should see the response RegistrationStatus{PluginRegistered:true,Error:,} in the logs to confirm that plugin is registered with kubelet. If you see a driver not found an error in the application pod describe output. Restarting the csi-xxxxplugin-xxx pod on the node may help.","title":"driver-registrar"},{"location":"Troubleshooting/ceph-csi-common-issues/#volume-attachment","text":"Each provisioner pod also has a sidecar container called csi-attacher .","title":"Volume Attachment"},{"location":"Troubleshooting/ceph-csi-common-issues/#csi-attacher","text":"The external-attacher is a sidecar container that attaches volumes to nodes by calling ControllerPublish and ControllerUnpublish functions of CSI drivers. It is necessary because the internal Attach/Detach controller running in Kubernetes controller-manager does not have any direct interfaces to CSI drivers. More details can be found here . If any issue exists in attaching the PVC to the application pod first check the volumeattachment object created and also log from csi-attacher sidecar container in provisioner pod. 1 2 3 $ kubectl get volumeattachment NAME ATTACHER PV NODE ATTACHED AGE csi-75903d8a902744853900d188f12137ea1cafb6c6f922ebc1c116fd58e950fc92 rook-ceph.cephfs.csi.ceph.com pvc-5c547d2a-fdb8-4cb2-b7fe-e0f30b88d454 minikube true 4m26s 1 kubectl logs po/csi-rbdplugin-provisioner-d857bfb5f-ddctl -c csi-attacher","title":"csi-attacher"},{"location":"Troubleshooting/ceph-csi-common-issues/#cephfs-stale-operations","text":"Check for any stale mount commands on the csi-cephfsplugin-xxxx pod on the node where your application pod is scheduled. You need to exec in the csi-cephfsplugin-xxxx pod and grep for stale mount operators. Identify the csi-cephfsplugin-xxxx pod running on the node where your application is scheduled with kubectl get po -o wide and match the node names. 1 2 3 4 $ kubectl exec -it csi-cephfsplugin-tfk2g -c csi-cephfsplugin -- sh $ ps -ef | grep mount [...] root 67 60 0 11:55 pts/0 00:00:00 grep mount 1 2 3 4 ps -ef |grep ceph [...] root 1 0 0 Jan20 ? 00:00:26 /usr/local/bin/cephcsi --nodeid=minikube --type=cephfs --endpoint=unix:///csi/csi.sock --v=0 --nodeserver=true --drivername=rook-ceph.cephfs.csi.ceph.com --pidlimit=-1 --metricsport=9091 --forcecephkernelclient=true --metricspath=/metrics --enablegrpcmetrics=true root 69 60 0 11:55 pts/0 00:00:00 grep ceph If any commands are stuck check the dmesg logs from the node. Restarting the csi-cephfsplugin pod may also help sometimes. If you don\u2019t see any stuck messages, confirm the network connectivity, Ceph health, and slow ops.","title":"CephFS Stale operations"},{"location":"Troubleshooting/ceph-csi-common-issues/#rbd-stale-operations","text":"Check for any stale map/mkfs/mount commands on the csi-rbdplugin-xxxx pod on the node where your application pod is scheduled. You need to exec in the csi-rbdplugin-xxxx pod and grep for stale operators like ( rbd map, rbd unmap, mkfs, mount and umount ). Identify the csi-rbdplugin-xxxx pod running on the node where your application is scheduled with kubectl get po -o wide and match the node names. 1 2 3 4 $ kubectl exec -it csi-rbdplugin-vh8d5 -c csi-rbdplugin -- sh $ ps -ef | grep map [...] root 1297024 1296907 0 12:00 pts/0 00:00:00 grep map 1 2 3 4 5 $ ps -ef | grep mount [...] root 1824 1 0 Jan19 ? 00:00:00 /usr/sbin/rpc.mountd ceph 1041020 1040955 1 07:11 ? 00:03:43 ceph-mgr --fsid=ba41ac93-3b55-4f32-9e06-d3d8c6ff7334 --keyring=/etc/ceph/keyring-store/keyring --log-to-stderr=true --err-to-stderr=true --mon-cluster-log-to-stderr=true --log-stderr-prefix=debug --default-log-to-file=false --default-mon-cluster-log-to-file=false --mon-host=[v2:10.111.136.166:3300,v1:10.111.136.166:6789] --mon-initial-members=a --id=a --setuser=ceph --setgroup=ceph --client-mount-uid=0 --client-mount-gid=0 --foreground --public-addr=172.17.0.6 root 1297115 1296907 0 12:00 pts/0 00:00:00 grep mount 1 2 3 $ ps -ef | grep mkfs [...] root 1297291 1296907 0 12:00 pts/0 00:00:00 grep mkfs 1 2 3 $ ps -ef | grep umount [...] root 1298500 1296907 0 12:01 pts/0 00:00:00 grep umount 1 2 3 $ ps -ef | grep unmap [...] root 1298578 1296907 0 12:01 pts/0 00:00:00 grep unmap If any commands are stuck check the dmesg logs from the node. Restarting the csi-rbdplugin pod also may help sometimes. If you don\u2019t see any stuck messages, confirm the network connectivity, Ceph health, and slow ops.","title":"RBD Stale operations"},{"location":"Troubleshooting/ceph-csi-common-issues/#dmesg-logs","text":"Check the dmesg logs on the node where pvc mounting is failing or the csi-rbdplugin container of the csi-rbdplugin-xxxx pod on that node. 1 dmesg","title":"dmesg logs"},{"location":"Troubleshooting/ceph-csi-common-issues/#rbd-commands","text":"If nothing else helps, get the last executed command from the ceph-csi pod logs and run it manually inside the provisioner or plugin pod to see if there are errors returned even if they couldn't be seen in the logs. 1 rbd ls --id=csi-rbd-node -m=10.111.136.166:6789 --key=AQDpIQhg+v83EhAAgLboWIbl+FL/nThJzoI3Fg== Where -m is one of the mon endpoints and the --key is the key used by the CSI driver for accessing the Ceph cluster.","title":"RBD Commands"},{"location":"Troubleshooting/ceph-csi-common-issues/#node-loss","text":"When a node is lost, you will see application pods on the node stuck in the Terminating state while another pod is rescheduled and is in the ContainerCreating state. Important For clusters with Kubernetes version 1.26 or greater, see the improved automation to recover from the node loss. If using K8s 1.25 or older, continue with these instructions.","title":"Node Loss"},{"location":"Troubleshooting/ceph-csi-common-issues/#force-deleting-the-pod","text":"To force delete the pod stuck in the Terminating state: 1 kubectl -n rook-ceph delete pod my-app-69cd495f9b-nl6hf --grace-period 0 --force After the force delete, wait for a timeout of about 8-10 minutes. If the pod still not in the running state, continue with the next section to blocklist the node.","title":"Force deleting the pod"},{"location":"Troubleshooting/ceph-csi-common-issues/#blocklisting-a-node","text":"To shorten the timeout, you can mark the node as \"blocklisted\" from the Rook toolbox so Rook can safely failover the pod sooner. 1 2 $ ceph osd blocklist add  # get the node IP you want to blocklist blocklisting  After running the above command within a few minutes the pod will be running.","title":"Blocklisting a node"},{"location":"Troubleshooting/ceph-csi-common-issues/#removing-a-node-blocklist","text":"After you are absolutely sure the node is permanently offline and that the node no longer needs to be blocklisted, remove the node from the blocklist. 1 2 $ ceph osd blocklist rm  un-blocklisting ","title":"Removing a node blocklist"},{"location":"Troubleshooting/ceph-toolbox/","text":"The Rook toolbox is a container with common tools used for rook debugging and testing. The toolbox is based on CentOS, so more tools of your choosing can be easily installed with yum . The toolbox can be run in two modes: Interactive : Start a toolbox pod where you can connect and execute Ceph commands from a shell One-time job : Run a script with Ceph commands and collect the results from the job log Hint Before running the toolbox you should have a running Rook cluster deployed (see the Quickstart Guide ). Note The toolbox is not necessary if you are using Krew plugin to execute Ceph commands. Interactive Toolbox \u00b6 The rook toolbox can run as a deployment in a Kubernetes cluster where you can connect and run arbitrary Ceph commands. Launch the rook-ceph-tools pod: 1 kubectl create -f deploy/examples/toolbox.yaml Wait for the toolbox pod to download its container and get to the running state: 1 kubectl -n rook-ceph rollout status deploy/rook-ceph-tools Once the rook-ceph-tools pod is running, you can connect to it with: 1 kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash All available tools in the toolbox are ready for your troubleshooting needs. Example : ceph status ceph osd status ceph df rados df When you are done with the toolbox, you can remove the deployment: 1 kubectl -n rook-ceph delete deploy/rook-ceph-tools Toolbox Job \u00b6 If you want to run Ceph commands as a one-time operation and collect the results later from the logs, you can run a script as a Kubernetes Job. The toolbox job will run a script that is embedded in the job spec. The script has the full flexibility of a bash script. In this example, the ceph status command is executed when the job is created. Create the toolbox job: 1 kubectl create -f deploy/examples/toolbox-job.yaml After the job completes, see the results of the script: 1 kubectl -n rook-ceph logs -l job-name=rook-ceph-toolbox-job","title":"Toolbox"},{"location":"Troubleshooting/ceph-toolbox/#interactive-toolbox","text":"The rook toolbox can run as a deployment in a Kubernetes cluster where you can connect and run arbitrary Ceph commands. Launch the rook-ceph-tools pod: 1 kubectl create -f deploy/examples/toolbox.yaml Wait for the toolbox pod to download its container and get to the running state: 1 kubectl -n rook-ceph rollout status deploy/rook-ceph-tools Once the rook-ceph-tools pod is running, you can connect to it with: 1 kubectl -n rook-ceph exec -it deploy/rook-ceph-tools -- bash All available tools in the toolbox are ready for your troubleshooting needs. Example : ceph status ceph osd status ceph df rados df When you are done with the toolbox, you can remove the deployment: 1 kubectl -n rook-ceph delete deploy/rook-ceph-tools","title":"Interactive Toolbox"},{"location":"Troubleshooting/ceph-toolbox/#toolbox-job","text":"If you want to run Ceph commands as a one-time operation and collect the results later from the logs, you can run a script as a Kubernetes Job. The toolbox job will run a script that is embedded in the job spec. The script has the full flexibility of a bash script. In this example, the ceph status command is executed when the job is created. Create the toolbox job: 1 kubectl create -f deploy/examples/toolbox-job.yaml After the job completes, see the results of the script: 1 kubectl -n rook-ceph logs -l job-name=rook-ceph-toolbox-job","title":"Toolbox Job"},{"location":"Troubleshooting/common-issues/","text":"To help troubleshoot your Rook clusters, here are some tips on what information will help solve the issues you might be seeing. If after trying the suggestions found on this page and the problem is not resolved, the Rook team is very happy to help you troubleshoot the issues in their Slack channel. Once you have registered for the Rook Slack , proceed to the General channel to ask for assistance. Ceph Common Issues \u00b6 For common issues specific to Ceph, see the Ceph Common Issues page. Troubleshooting Techniques \u00b6 Kubernetes status and logs are the main resources needed to investigate issues in any Rook cluster. Kubernetes Tools \u00b6 Kubernetes status is the first line of investigating when something goes wrong with the cluster. Here are a few artifacts that are helpful to gather: Rook pod status: kubectl get pod -n  -o wide e.g., kubectl get pod -n rook-ceph -o wide Logs for Rook pods Logs for the operator: kubectl logs -n  -l app= e.g., kubectl logs -n rook-ceph -l app=rook-ceph-operator Logs for a specific pod: kubectl logs -n   , or a pod using a label such as mon1: kubectl logs -n  -l  e.g., kubectl logs -n rook-ceph -l mon=a Logs on a specific node to find why a PVC is failing to mount: Connect to the node, then get kubelet logs (if your distro is using systemd): journalctl -u kubelet Pods with multiple containers For all containers, in order: kubectl -n  logs  --all-containers For a single container: kubectl -n  logs  -c  Logs for pods which are no longer running: kubectl -n  logs --previous  Some pods have specialized init containers, so you may need to look at logs for different containers within the pod. kubectl -n  logs  -c  Other Rook artifacts: kubectl -n  get all","title":"Common Issues"},{"location":"Troubleshooting/common-issues/#ceph-common-issues","text":"For common issues specific to Ceph, see the Ceph Common Issues page.","title":"Ceph Common Issues"},{"location":"Troubleshooting/common-issues/#troubleshooting-techniques","text":"Kubernetes status and logs are the main resources needed to investigate issues in any Rook cluster.","title":"Troubleshooting Techniques"},{"location":"Troubleshooting/common-issues/#kubernetes-tools","text":"Kubernetes status is the first line of investigating when something goes wrong with the cluster. Here are a few artifacts that are helpful to gather: Rook pod status: kubectl get pod -n  -o wide e.g., kubectl get pod -n rook-ceph -o wide Logs for Rook pods Logs for the operator: kubectl logs -n  -l app= e.g., kubectl logs -n rook-ceph -l app=rook-ceph-operator Logs for a specific pod: kubectl logs -n   , or a pod using a label such as mon1: kubectl logs -n  -l  e.g., kubectl logs -n rook-ceph -l mon=a Logs on a specific node to find why a PVC is failing to mount: Connect to the node, then get kubelet logs (if your distro is using systemd): journalctl -u kubelet Pods with multiple containers For all containers, in order: kubectl -n  logs  --all-containers For a single container: kubectl -n  logs  -c  Logs for pods which are no longer running: kubectl -n  logs --previous  Some pods have specialized init containers, so you may need to look at logs for different containers within the pod. kubectl -n  logs  -c  Other Rook artifacts: kubectl -n  get all","title":"Kubernetes Tools"},{"location":"Troubleshooting/direct-tools/","text":"Rook is designed with Kubernetes design principles from the ground up. This topic is going to escape the bounds of Kubernetes storage and show you how to use block and file storage directly from a pod without any of the Kubernetes magic. The purpose of this topic is to help you quickly test a new configuration, although it is not meant to be used in production. All of the benefits of Kubernetes storage including failover, detach, and attach will not be available. If your pod dies, your mount will die with it. Start the Direct Mount Pod \u00b6 To test mounting your Ceph volumes, start a pod with the necessary mounts. An example is provided in the examples test directory: 1 kubectl create -f deploy/examples/direct-mount.yaml After the pod is started, connect to it like this: 1 2 kubectl -n rook-ceph get pod -l app=rook-direct-mount $ kubectl -n rook-ceph exec -it  bash Block Storage Tools \u00b6 After you have created a pool as described in the Block Storage topic, you can create a block image and mount it directly in a pod. This example will show how the Ceph rbd volume can be mounted in the direct mount pod. Create the Direct Mount Pod . Create a volume image (10MB): 1 2 3 4 5 rbd create replicapool/test --size 10 rbd info replicapool/test # Disable the rbd features that are not in the kernel module rbd feature disable replicapool/test fast-diff deep-flatten object-map Map the block volume and format it and mount it: 1 2 3 4 5 6 7 8 9 10 11 12 13 # Map the rbd device. If the Direct Mount Pod was started with \"hostNetwork: false\" this hangs and you have to stop it with Ctrl-C, # however the command still succeeds ; see https://github.com/rook/rook/issues/2021 rbd map replicapool/test # Find the device name, such as rbd0 lsblk | grep rbd # Format the volume ( only do this the first time or you will lose data ) mkfs.ext4 -m0 /dev/rbd0 # Mount the block device mkdir /tmp/rook-volume mount /dev/rbd0 /tmp/rook-volume Write and read a file: 1 2 echo \"Hello Rook\" > /tmp/rook-volume/hello cat /tmp/rook-volume/hello Unmount the Block device \u00b6 Unmount the volume and unmap the kernel device: 1 2 umount /tmp/rook-volume rbd unmap /dev/rbd0 Shared Filesystem Tools \u00b6 After you have created a filesystem as described in the Shared Filesystem topic, you can mount the filesystem from multiple pods. The the other topic you may have mounted the filesystem already in the registry pod. Now we will mount the same filesystem in the Direct Mount pod. This is just a simple way to validate the Ceph filesystem and is not recommended for production Kubernetes pods. Follow Direct Mount Pod to start a pod with the necessary mounts and then proceed with the following commands after connecting to the pod. 1 2 3 4 5 6 7 8 9 10 11 12 # Create the directory mkdir /tmp/registry # Detect the mon endpoints and the user secret for the connection mon_endpoints=$(grep mon_host /etc/ceph/ceph.conf | awk '{print $3}') my_secret=$(grep key /etc/ceph/keyring | awk '{print $3}') # Mount the filesystem mount -t ceph -o mds_namespace=myfs,name=admin,secret=$my_secret $mon_endpoints:/ /tmp/registry # See your mounted filesystem df -h Now you should have a mounted filesystem. If you have pushed images to the registry you will see a directory called docker . 1 ls /tmp/registry Try writing and reading a file to the shared filesystem. 1 2 3 4 5 echo \"Hello Rook\" > /tmp/registry/hello cat /tmp/registry/hello # delete the file when you ' re done rm -f /tmp/registry/hello Unmount the Filesystem \u00b6 To unmount the shared filesystem from the Direct Mount Pod: 1 2 umount /tmp/registry rmdir /tmp/registry No data will be deleted by unmounting the filesystem.","title":"Direct Tools"},{"location":"Troubleshooting/direct-tools/#start-the-direct-mount-pod","text":"To test mounting your Ceph volumes, start a pod with the necessary mounts. An example is provided in the examples test directory: 1 kubectl create -f deploy/examples/direct-mount.yaml After the pod is started, connect to it like this: 1 2 kubectl -n rook-ceph get pod -l app=rook-direct-mount $ kubectl -n rook-ceph exec -it  bash","title":"Start the Direct Mount Pod"},{"location":"Troubleshooting/direct-tools/#block-storage-tools","text":"After you have created a pool as described in the Block Storage topic, you can create a block image and mount it directly in a pod. This example will show how the Ceph rbd volume can be mounted in the direct mount pod. Create the Direct Mount Pod . Create a volume image (10MB): 1 2 3 4 5 rbd create replicapool/test --size 10 rbd info replicapool/test # Disable the rbd features that are not in the kernel module rbd feature disable replicapool/test fast-diff deep-flatten object-map Map the block volume and format it and mount it: 1 2 3 4 5 6 7 8 9 10 11 12 13 # Map the rbd device. If the Direct Mount Pod was started with \"hostNetwork: false\" this hangs and you have to stop it with Ctrl-C, # however the command still succeeds ; see https://github.com/rook/rook/issues/2021 rbd map replicapool/test # Find the device name, such as rbd0 lsblk | grep rbd # Format the volume ( only do this the first time or you will lose data ) mkfs.ext4 -m0 /dev/rbd0 # Mount the block device mkdir /tmp/rook-volume mount /dev/rbd0 /tmp/rook-volume Write and read a file: 1 2 echo \"Hello Rook\" > /tmp/rook-volume/hello cat /tmp/rook-volume/hello","title":"Block Storage Tools"},{"location":"Troubleshooting/direct-tools/#unmount-the-block-device","text":"Unmount the volume and unmap the kernel device: 1 2 umount /tmp/rook-volume rbd unmap /dev/rbd0","title":"Unmount the Block device"},{"location":"Troubleshooting/direct-tools/#shared-filesystem-tools","text":"After you have created a filesystem as described in the Shared Filesystem topic, you can mount the filesystem from multiple pods. The the other topic you may have mounted the filesystem already in the registry pod. Now we will mount the same filesystem in the Direct Mount pod. This is just a simple way to validate the Ceph filesystem and is not recommended for production Kubernetes pods. Follow Direct Mount Pod to start a pod with the necessary mounts and then proceed with the following commands after connecting to the pod. 1 2 3 4 5 6 7 8 9 10 11 12 # Create the directory mkdir /tmp/registry # Detect the mon endpoints and the user secret for the connection mon_endpoints=$(grep mon_host /etc/ceph/ceph.conf | awk '{print $3}') my_secret=$(grep key /etc/ceph/keyring | awk '{print $3}') # Mount the filesystem mount -t ceph -o mds_namespace=myfs,name=admin,secret=$my_secret $mon_endpoints:/ /tmp/registry # See your mounted filesystem df -h Now you should have a mounted filesystem. If you have pushed images to the registry you will see a directory called docker . 1 ls /tmp/registry Try writing and reading a file to the shared filesystem. 1 2 3 4 5 echo \"Hello Rook\" > /tmp/registry/hello cat /tmp/registry/hello # delete the file when you ' re done rm -f /tmp/registry/hello","title":"Shared Filesystem Tools"},{"location":"Troubleshooting/direct-tools/#unmount-the-filesystem","text":"To unmount the shared filesystem from the Direct Mount Pod: 1 2 umount /tmp/registry rmdir /tmp/registry No data will be deleted by unmounting the filesystem.","title":"Unmount the Filesystem"},{"location":"Troubleshooting/disaster-recovery/","text":"Under extenuating circumstances, steps may be necessary to recover the cluster health. There are several types of recovery addressed in this document. Restoring Mon Quorum \u00b6 Under extenuating circumstances, the mons may lose quorum. If the mons cannot form quorum again, there is a manual procedure to get the quorum going again. The only requirement is that at least one mon is still healthy. The following steps will remove the unhealthy mons from quorum and allow you to form a quorum again with a single mon, then grow the quorum back to the original size. The Rook Krew Plugin has a command restore-quorum that will walk you through the mon quorum automated restoration process. If the name of the healthy mon is c , you would run the command: 1 kubectl rook-ceph mons restore-quorum c See the restore-quorum documentation for more details. Restoring CRDs After Deletion \u00b6 When the Rook CRDs are deleted, the Rook operator will respond to the deletion event to attempt to clean up the cluster resources. If any data appears present in the cluster, Rook will refuse to allow the resources to be deleted since the operator will refuse to remove the finalizer on the CRs until the underlying data is deleted. For more details, see the dependency design doc . While it is good that the CRs will not be deleted and the underlying Ceph data and daemons continue to be available, the CRs will be stuck indefinitely in a Deleting state in which the operator will not continue to ensure cluster health. Upgrades will be blocked, further updates to the CRs are prevented, and so on. Since Kubernetes does not allow undeleting resources, the following procedure will allow you to restore the CRs to their prior state without even necessarily suffering cluster downtime. Note In the following commands, the affected CephCluster resource is called rook-ceph . If yours is named differently, the commands will need to be adjusted. Scale down the operator. 1 kubectl -n rook-ceph scale --replicas=0 deploy/rook-ceph-operator Backup all Rook CRs and critical metadata 1 2 3 4 5 6 # Store the ` CephCluster ` CR settings. Also, save other Rook CRs that are in terminating state. kubectl -n rook-ceph get cephcluster rook-ceph -o yaml > cluster.yaml # Backup critical secrets and configmaps in case something goes wrong later in the procedure kubectl -n rook-ceph get secret -o yaml > secrets.yaml kubectl -n rook-ceph get configmap -o yaml > configmaps.yaml (Optional, if webhook is enabled) Delete the ValidatingWebhookConfiguration . This is the resource which connects Rook custom resources to the operator pod's validating webhook. Because the operator is unavailable, we must temporarily disable the valdiating webhook in order to make changes. 1 2 3 ```console kubectl delete ValidatingWebhookConfiguration rook-ceph-webhook ``` Remove the owner references from all critical Rook resources that were referencing the CephCluster CR. Programmatically determine all such resources, using this command: 1 2 3 4 5 6 # Determine the ` CephCluster ` UID ROOK_UID=$(kubectl -n rook-ceph get cephcluster rook-ceph -o 'jsonpath={.metadata.uid}') # List all secrets, configmaps, services, deployments, and PVCs with that ownership UID. RESOURCES=$(kubectl -n rook-ceph get secret,configmap,service,deployment,pvc -o jsonpath='{range .items[?(@.metadata.ownerReferences[*].uid==\"'\"$ROOK_UID\"'\")]}{.kind}{\"/\"}{.metadata.name}{\"\\n\"}{end}') # Show the collected resources. kubectl -n rook-ceph get $RESOURCES Verify that all critical resources are shown in the output. The critical resources are these: Secrets: rook-ceph-admin-keyring , rook-ceph-config , rook-ceph-mon , rook-ceph-mons-keyring ConfigMap: rook-ceph-mon-endpoints Services: rook-ceph-mon-* , rook-ceph-mgr-* Deployments: rook-ceph-mon-* , rook-ceph-osd-* , rook-ceph-mgr-* PVCs (if applicable): rook-ceph-mon-* and the OSD PVCs (named -* , for example set1-data-* ) For each listed resource, remove the ownerReferences metadata field, in order to unlink it from the deleting CephCluster CR. To do so programmatically, use the command: 1 2 3 for resource in $(kubectl -n rook-ceph get $RESOURCES -o name); do kubectl -n rook-ceph patch $resource -p '{\"metadata\": {\"ownerReferences\":null}}' done For a manual alternative, issue kubectl edit on each resource, and remove the block matching: 1 2 3 4 5 6 7 ownerReferences : - apiVersion : ceph.rook.io/v1 blockOwnerDeletion : true controller : true kind : ` CephCluster` name : rook-ceph uid :  Before completing this step, validate these things. Failing to do so could result in data loss. Confirm that cluster.yaml contains the CephCluster CR. Confirm all critical resources listed above have had the ownerReference to the CephCluster CR removed. Remove the finalizer from the CephCluster resource. This will cause the resource to be immediately deleted by Kubernetes. 1 kubectl -n rook-ceph patch cephcluster/rook-ceph --type json --patch='[ { \"op\": \"remove\", \"path\": \"/metadata/finalizers\" } ]' After the finalizer is removed, the CephCluster will be immediately deleted. If all owner references were properly removed, all ceph daemons will continue running and there will be no downtime. Create the CephCluster CR with the same settings as previously 1 2 # Use the same cluster settings as exported in step 2 . kubectl create -f cluster.yaml If there are other CRs in terminating state such as CephBlockPools, CephObjectStores, or CephFilesystems, follow the above steps as well for those CRs: Backup the CR Remove the finalizer and confirm the CR is deleted (the underlying Ceph resources will be preserved) Create the CR again Scale up the operator 1 kubectl -n rook-ceph scale --replicas=1 deploy/rook-ceph-operator Watch the operator log to confirm that the reconcile completes successfully. 1 kubectl -n rook-ceph logs -f deployment/rook-ceph-operator Adopt an existing Rook Ceph cluster into a new Kubernetes cluster \u00b6 Situations this section can help resolve: The Kubernetes environment underlying a running Rook Ceph cluster failed catastrophically, requiring a new Kubernetes environment in which the user wishes to recover the previous Rook Ceph cluster. The user wishes to migrate their existing Rook Ceph cluster to a new Kubernetes environment, and downtime can be tolerated. Prerequisites \u00b6 A working Kubernetes cluster to which we will migrate the previous Rook Ceph cluster. At least one Ceph mon db is in quorum, and sufficient number of Ceph OSD is up and in before disaster. The previous Rook Ceph cluster is not running. Overview for Steps below \u00b6 Start a new and clean Rook Ceph cluster, with old CephCluster CephBlockPool CephFilesystem CephNFS CephObjectStore . Shut the new cluster down when it has been created successfully. Replace ceph-mon data with that of the old cluster. Replace fsid in secrets/rook-ceph-mon with that of the old one. Fix monmap in ceph-mon db. Fix ceph mon auth key. Disable auth. Start the new cluster, watch it resurrect. Fix admin auth key, and enable auth. Restart cluster for the final time. Steps \u00b6 Assuming dataHostPathData is /var/lib/rook , and the CephCluster trying to adopt is named rook-ceph . Make sure the old Kubernetes cluster is completely torn down and the new Kubernetes cluster is up and running without Rook Ceph. Backup /var/lib/rook in all the Rook Ceph nodes to a different directory. Backups will be used later. Pick a /var/lib/rook/rook-ceph/rook-ceph.config from any previous Rook Ceph node and save the old cluster fsid from its content. Remove /var/lib/rook from all the Rook Ceph nodes. Add identical CephCluster descriptor to the new Kubernetes cluster, especially identical spec.storage.config and spec.storage.nodes , except mon.count , which should be set to 1 . Add identical CephFilesystem CephBlockPool CephNFS CephObjectStore descriptors (if any) to the new Kubernetes cluster. Install Rook Ceph in the new Kubernetes cluster. Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the cluster will have rook-ceph-mon-a , rook-ceph-mgr-a , and all the auxiliary pods up and running, and zero (hopefully) rook-ceph-osd-ID-xxxxxx running. ceph -s output should report 1 mon, 1 mgr running, and all of the OSDs down, all PGs are in unknown state. Rook should not start any OSD daemon since all devices belongs to the old cluster (which have a different fsid ). Run kubectl -n rook-ceph exec -it rook-ceph-mon-a-xxxxxxxx bash to enter the rook-ceph-mon-a pod, 1 2 mon-a# cat /etc/ceph/keyring-store/keyring # save this keyring content for later use mon-a# exit Stop the Rook operator by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 0 . Stop cluster daemons by running kubectl -n rook-ceph delete deploy/X where X is every deployment in namespace rook-ceph , except rook-ceph-operator and rook-ceph-tools . Save the rook-ceph-mon-a address with kubectl -n rook-ceph get cm/rook-ceph-mon-endpoints -o yaml in the new Kubernetes cluster for later use. SSH to the host where rook-ceph-mon-a in the new Kubernetes cluster resides. Remove /var/lib/rook/mon-a Pick a healthy rook-ceph-mon-ID directory ( /var/lib/rook/mon-ID ) in the previous backup, copy to /var/lib/rook/mon-a . ID is any healthy mon node ID of the old cluster. Replace /var/lib/rook/mon-a/keyring with the saved keyring, preserving only the [mon.] section, remove [client.admin] section. Run docker run -it --rm -v /var/lib/rook:/var/lib/rook ceph/ceph:v14.2.1-20190430 bash . The Docker image tag should match the Ceph version used in the Rook cluster. The /etc/ceph/ceph.conf file needs to exist for ceph-mon to work. 1 2 3 4 5 6 7 8 9 10 11 12 13 touch /etc/ceph/ceph.conf cd /var/lib/rook ceph-mon --extract-monmap monmap --mon-data ./mon-a/data # Extract monmap from old ceph-mon db and save as monmap monmaptool --print monmap # Print the monmap content, which reflects the old cluster ceph-mon configuration. monmaptool --rm a monmap # Delete `a` from monmap. monmaptool --rm b monmap # Repeat, and delete `b` from monmap. monmaptool --rm c monmap # Repeat this pattern until all the old ceph-mons are removed monmaptool --rm d monmap monmaptool --rm e monmap monmaptool --addv a [v2:10.77.2.216:3300,v1:10.77.2.216:6789] monmap # Replace it with the rook-ceph-mon-a address you got from previous command. ceph-mon --inject-monmap monmap --mon-data ./mon-a/data # Replace monmap in ceph-mon db with our modified version. rm monmap exit Tell Rook to run as old cluster by running kubectl -n rook-ceph edit secret/rook-ceph-mon and changing fsid to the original fsid . Note that the fsid is base64 encoded and must not contain a trailing carriage return. For example: 1 echo -n a811f99a-d865-46b7-8f2c-f94c064e4356 | base64 # Replace with the fsid from your old cluster. Disable authentication by running kubectl -n rook-ceph edit cm/rook-config-override and adding content below: 1 2 3 4 5 6 7 data : config : | [global] auth cluster required = none auth service required = none auth client required = none auth supported = none Bring the Rook Ceph operator back online by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 1 . Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the new cluster should be up and running with authentication disabled. ceph -s should report 1 mon & 1 mgr & all of the OSDs up and running, and all PGs in either active or degraded state. Run kubectl -n rook-ceph exec -it rook-ceph-tools-XXXXXXX bash to enter tools pod: 1 2 3 4 vi key # [ paste keyring content saved before, preserving only ` [ client admin ] ` section ] ceph auth import -i key rm key Re-enable authentication by running kubectl -n rook-ceph edit cm/rook-config-override and removing auth configuration added in previous steps. Stop the Rook operator by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 0 . Shut down entire new cluster by running kubectl -n rook-ceph delete deploy/X where X is every deployment in namespace rook-ceph , except rook-ceph-operator and rook-ceph-tools , again. This time OSD daemons are present and should be removed too. Bring the Rook Ceph operator back online by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 1 . Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the new cluster should be up and running with authentication enabled. ceph -s output should not change much comparing to previous steps. Backing up and restoring a cluster based on PVCs into a new Kubernetes cluster \u00b6 It is possible to migrate/restore an rook/ceph cluster from an existing Kubernetes cluster to a new one without resorting to SSH access or ceph tooling. This allows doing the migration using standard kubernetes resources only. This guide assumes the following: You have a CephCluster that uses PVCs to persist mon and osd data. Let's call it the \"old cluster\" You can restore the PVCs as-is in the new cluster. Usually this is done by taking regular snapshots of the PVC volumes and using a tool that can re-create PVCs from these snapshots in the underlying cloud provider. Velero is one such tool. You have regular backups of the secrets and configmaps in the rook-ceph namespace. Velero provides this functionality too. Do the following in the new cluster: Stop the rook operator by scaling the deployment rook-ceph-operator down to zero: kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas 0 and deleting the other deployments. An example command to do this is k -n rook-ceph delete deployment -l operator!=rook Restore the rook PVCs to the new cluster. Copy the keyring and fsid secrets from the old cluster: rook-ceph-mgr-a-keyring , rook-ceph-mon , rook-ceph-mons-keyring , rook-ceph-osd-0-keyring , ... Delete mon services and copy them from the old cluster: rook-ceph-mon-a , rook-ceph-mon-b , ... Note that simply re-applying won't work because the goal here is to restore the clusterIP in each service and this field is immutable in Service resources. Copy the endpoints configmap from the old cluster: rook-ceph-mon-endpoints Scale the rook operator up again : kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas 1 Wait until the reconciliation is over. Restoring the Rook cluster after the Rook namespace is deleted \u00b6 When the rook-ceph namespace is accidentally deleted, the good news is that the cluster can be restored. With the content in the directory dataDirHostPath and the original OSD disks, the ceph cluster could be restored with this guide. You need to manually create a ConfigMap and a Secret to make it work. The information required for the ConfigMap and Secret can be found in the dataDirHostPath directory. The first resource is the secret named rook-ceph-mon as seen in this example below: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 apiVersion : v1 data : ceph-secret : QVFCZ0h6VmorcVNhSGhBQXVtVktNcjcrczNOWW9Oa2psYkErS0E9PQ== ceph-username : Y2xpZW50LmFkbWlu fsid : M2YyNzE4NDEtNjE4OC00N2MxLWIzZmQtOTBmZDRmOTc4Yzc2 mon-secret : QVFCZ0h6VmorcVNhSGhBQXVtVktNcjcrczNOWW9Oa2psYkErS0E9PQ== kind : Secret metadata : finalizers : - ceph.rook.io/disaster-protection name : rook-ceph-mon namespace : rook-ceph ownerReferences : null type : kubernetes.io/rook The values for the secret can be found in $dataDirHostPath/rook-ceph/client.admin.keyring and $dataDirHostPath/rook-ceph/rook-ceph.config . - ceph-secret and mon-secret are to be filled with the client.admin 's keyring contents. - ceph-username : set to the string client.admin - fsid : set to the original ceph cluster id. All the fields in data section need to be encoded in base64. Coding could be done like this: 1 echo -n \"string to code\" | base64 -i - Now save the secret as rook-ceph-mon.yaml , to be created later in the restore. The second resource is the configmap named rook-ceph-mon-endpoints as seen in this example below: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : v1 data : csi-cluster-config-json : '[{\"clusterID\":\"rook-ceph\",\"monitors\":[\"169.169.241.153:6789\",\"169.169.82.57:6789\",\"169.169.7.81:6789\"],\"namespace\":\"\"}]' data : k=169.169.241.153:6789,m=169.169.82.57:6789,o=169.169.7.81:6789 mapping : '{\"node\":{\"k\":{\"Name\":\"10.138.55.111\",\"Hostname\":\"10.138.55.111\",\"Address\":\"10.138.55.111\"},\"m\":{\"Name\":\"10.138.55.120\",\"Hostname\":\"10.138.55.120\",\"Address\":\"10.138.55.120\"},\"o\":{\"Name\":\"10.138.55.112\",\"Hostname\":\"10.138.55.112\",\"Address\":\"10.138.55.112\"}}}' maxMonId : \"15\" kind : ConfigMap metadata : finalizers : - ceph.rook.io/disaster-protection name : rook-ceph-mon-endpoints namespace : rook-ceph ownerReferences : null The Monitor's service IPs are kept in the monitor data store and you need to create them by original ones. After you create this configmap with the original service IPs, the rook operator will create the correct services for you with IPs matching in the monitor data store. Along with monitor ids, their service IPs and mapping relationship of them can be found in dataDirHostPath/rook-ceph/rook-ceph.config, for example: 1 2 3 4 [global] fsid = 3f271841-6188-47c1-b3fd-90fd4f978c76 mon initial members = m o k mon host = [v2:169.169.82.57:3300,v1:169.169.82.57:6789],[v2:169.169.7.81:3300,v1:169.169.7.81:6789],[v2:169.169.241.153:3300,v1:169.169.241.153:6789] mon initial members and mon host are holding sequences of monitors' id and IP respectively; the sequence are going in the same order among monitors as a result you can tell which monitors have which service IP addresses. Modify your rook-ceph-mon-endpoints.yaml on fields csi-cluster-config-json and data based on the understanding of rook-ceph.config above. The field mapping tells rook where to schedule monitor's pods. you could search in dataDirHostPath in all Ceph cluster hosts for mon-m,mon-o,mon-k . If you find mon-m in host 10.138.55.120 , you should fill 10.138.55.120 in field mapping for m . Others are the same. Update the maxMonId to be the max numeric ID of the highest monitor ID. For example, 15 is the 0-based ID for mon o . Now save this configmap in the file rook-ceph-mon-endpoints.yaml, to be created later in the restore. Now that you have the info for the secret and the configmap, you are ready to restore the running cluster. Deploy Rook Ceph using the YAML files or Helm, with the same settings you had previously. 1 kubectl create -f crds.yaml -f common.yaml -f operator.yaml After the operator is running, create the configmap and secret you have just crafted: 1 kubectl create -f rook-ceph-mon.yaml -f rook-ceph-mon-endpoints.yaml Create your Ceph cluster CR (if possible, with the same settings as existed previously): 1 kubectl create -f cluster.yaml Now your Rook Ceph cluster should be running again.","title":"Disaster Recovery"},{"location":"Troubleshooting/disaster-recovery/#restoring-mon-quorum","text":"Under extenuating circumstances, the mons may lose quorum. If the mons cannot form quorum again, there is a manual procedure to get the quorum going again. The only requirement is that at least one mon is still healthy. The following steps will remove the unhealthy mons from quorum and allow you to form a quorum again with a single mon, then grow the quorum back to the original size. The Rook Krew Plugin has a command restore-quorum that will walk you through the mon quorum automated restoration process. If the name of the healthy mon is c , you would run the command: 1 kubectl rook-ceph mons restore-quorum c See the restore-quorum documentation for more details.","title":"Restoring Mon Quorum"},{"location":"Troubleshooting/disaster-recovery/#restoring-crds-after-deletion","text":"When the Rook CRDs are deleted, the Rook operator will respond to the deletion event to attempt to clean up the cluster resources. If any data appears present in the cluster, Rook will refuse to allow the resources to be deleted since the operator will refuse to remove the finalizer on the CRs until the underlying data is deleted. For more details, see the dependency design doc . While it is good that the CRs will not be deleted and the underlying Ceph data and daemons continue to be available, the CRs will be stuck indefinitely in a Deleting state in which the operator will not continue to ensure cluster health. Upgrades will be blocked, further updates to the CRs are prevented, and so on. Since Kubernetes does not allow undeleting resources, the following procedure will allow you to restore the CRs to their prior state without even necessarily suffering cluster downtime. Note In the following commands, the affected CephCluster resource is called rook-ceph . If yours is named differently, the commands will need to be adjusted. Scale down the operator. 1 kubectl -n rook-ceph scale --replicas=0 deploy/rook-ceph-operator Backup all Rook CRs and critical metadata 1 2 3 4 5 6 # Store the ` CephCluster ` CR settings. Also, save other Rook CRs that are in terminating state. kubectl -n rook-ceph get cephcluster rook-ceph -o yaml > cluster.yaml # Backup critical secrets and configmaps in case something goes wrong later in the procedure kubectl -n rook-ceph get secret -o yaml > secrets.yaml kubectl -n rook-ceph get configmap -o yaml > configmaps.yaml (Optional, if webhook is enabled) Delete the ValidatingWebhookConfiguration . This is the resource which connects Rook custom resources to the operator pod's validating webhook. Because the operator is unavailable, we must temporarily disable the valdiating webhook in order to make changes. 1 2 3 ```console kubectl delete ValidatingWebhookConfiguration rook-ceph-webhook ``` Remove the owner references from all critical Rook resources that were referencing the CephCluster CR. Programmatically determine all such resources, using this command: 1 2 3 4 5 6 # Determine the ` CephCluster ` UID ROOK_UID=$(kubectl -n rook-ceph get cephcluster rook-ceph -o 'jsonpath={.metadata.uid}') # List all secrets, configmaps, services, deployments, and PVCs with that ownership UID. RESOURCES=$(kubectl -n rook-ceph get secret,configmap,service,deployment,pvc -o jsonpath='{range .items[?(@.metadata.ownerReferences[*].uid==\"'\"$ROOK_UID\"'\")]}{.kind}{\"/\"}{.metadata.name}{\"\\n\"}{end}') # Show the collected resources. kubectl -n rook-ceph get $RESOURCES Verify that all critical resources are shown in the output. The critical resources are these: Secrets: rook-ceph-admin-keyring , rook-ceph-config , rook-ceph-mon , rook-ceph-mons-keyring ConfigMap: rook-ceph-mon-endpoints Services: rook-ceph-mon-* , rook-ceph-mgr-* Deployments: rook-ceph-mon-* , rook-ceph-osd-* , rook-ceph-mgr-* PVCs (if applicable): rook-ceph-mon-* and the OSD PVCs (named -* , for example set1-data-* ) For each listed resource, remove the ownerReferences metadata field, in order to unlink it from the deleting CephCluster CR. To do so programmatically, use the command: 1 2 3 for resource in $(kubectl -n rook-ceph get $RESOURCES -o name); do kubectl -n rook-ceph patch $resource -p '{\"metadata\": {\"ownerReferences\":null}}' done For a manual alternative, issue kubectl edit on each resource, and remove the block matching: 1 2 3 4 5 6 7 ownerReferences : - apiVersion : ceph.rook.io/v1 blockOwnerDeletion : true controller : true kind : ` CephCluster` name : rook-ceph uid :  Before completing this step, validate these things. Failing to do so could result in data loss. Confirm that cluster.yaml contains the CephCluster CR. Confirm all critical resources listed above have had the ownerReference to the CephCluster CR removed. Remove the finalizer from the CephCluster resource. This will cause the resource to be immediately deleted by Kubernetes. 1 kubectl -n rook-ceph patch cephcluster/rook-ceph --type json --patch='[ { \"op\": \"remove\", \"path\": \"/metadata/finalizers\" } ]' After the finalizer is removed, the CephCluster will be immediately deleted. If all owner references were properly removed, all ceph daemons will continue running and there will be no downtime. Create the CephCluster CR with the same settings as previously 1 2 # Use the same cluster settings as exported in step 2 . kubectl create -f cluster.yaml If there are other CRs in terminating state such as CephBlockPools, CephObjectStores, or CephFilesystems, follow the above steps as well for those CRs: Backup the CR Remove the finalizer and confirm the CR is deleted (the underlying Ceph resources will be preserved) Create the CR again Scale up the operator 1 kubectl -n rook-ceph scale --replicas=1 deploy/rook-ceph-operator Watch the operator log to confirm that the reconcile completes successfully. 1 kubectl -n rook-ceph logs -f deployment/rook-ceph-operator","title":"Restoring CRDs After Deletion"},{"location":"Troubleshooting/disaster-recovery/#adopt-an-existing-rook-ceph-cluster-into-a-new-kubernetes-cluster","text":"Situations this section can help resolve: The Kubernetes environment underlying a running Rook Ceph cluster failed catastrophically, requiring a new Kubernetes environment in which the user wishes to recover the previous Rook Ceph cluster. The user wishes to migrate their existing Rook Ceph cluster to a new Kubernetes environment, and downtime can be tolerated.","title":"Adopt an existing Rook Ceph cluster into a new Kubernetes cluster"},{"location":"Troubleshooting/disaster-recovery/#prerequisites","text":"A working Kubernetes cluster to which we will migrate the previous Rook Ceph cluster. At least one Ceph mon db is in quorum, and sufficient number of Ceph OSD is up and in before disaster. The previous Rook Ceph cluster is not running.","title":"Prerequisites"},{"location":"Troubleshooting/disaster-recovery/#overview-for-steps-below","text":"Start a new and clean Rook Ceph cluster, with old CephCluster CephBlockPool CephFilesystem CephNFS CephObjectStore . Shut the new cluster down when it has been created successfully. Replace ceph-mon data with that of the old cluster. Replace fsid in secrets/rook-ceph-mon with that of the old one. Fix monmap in ceph-mon db. Fix ceph mon auth key. Disable auth. Start the new cluster, watch it resurrect. Fix admin auth key, and enable auth. Restart cluster for the final time.","title":"Overview for Steps below"},{"location":"Troubleshooting/disaster-recovery/#steps","text":"Assuming dataHostPathData is /var/lib/rook , and the CephCluster trying to adopt is named rook-ceph . Make sure the old Kubernetes cluster is completely torn down and the new Kubernetes cluster is up and running without Rook Ceph. Backup /var/lib/rook in all the Rook Ceph nodes to a different directory. Backups will be used later. Pick a /var/lib/rook/rook-ceph/rook-ceph.config from any previous Rook Ceph node and save the old cluster fsid from its content. Remove /var/lib/rook from all the Rook Ceph nodes. Add identical CephCluster descriptor to the new Kubernetes cluster, especially identical spec.storage.config and spec.storage.nodes , except mon.count , which should be set to 1 . Add identical CephFilesystem CephBlockPool CephNFS CephObjectStore descriptors (if any) to the new Kubernetes cluster. Install Rook Ceph in the new Kubernetes cluster. Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the cluster will have rook-ceph-mon-a , rook-ceph-mgr-a , and all the auxiliary pods up and running, and zero (hopefully) rook-ceph-osd-ID-xxxxxx running. ceph -s output should report 1 mon, 1 mgr running, and all of the OSDs down, all PGs are in unknown state. Rook should not start any OSD daemon since all devices belongs to the old cluster (which have a different fsid ). Run kubectl -n rook-ceph exec -it rook-ceph-mon-a-xxxxxxxx bash to enter the rook-ceph-mon-a pod, 1 2 mon-a# cat /etc/ceph/keyring-store/keyring # save this keyring content for later use mon-a# exit Stop the Rook operator by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 0 . Stop cluster daemons by running kubectl -n rook-ceph delete deploy/X where X is every deployment in namespace rook-ceph , except rook-ceph-operator and rook-ceph-tools . Save the rook-ceph-mon-a address with kubectl -n rook-ceph get cm/rook-ceph-mon-endpoints -o yaml in the new Kubernetes cluster for later use. SSH to the host where rook-ceph-mon-a in the new Kubernetes cluster resides. Remove /var/lib/rook/mon-a Pick a healthy rook-ceph-mon-ID directory ( /var/lib/rook/mon-ID ) in the previous backup, copy to /var/lib/rook/mon-a . ID is any healthy mon node ID of the old cluster. Replace /var/lib/rook/mon-a/keyring with the saved keyring, preserving only the [mon.] section, remove [client.admin] section. Run docker run -it --rm -v /var/lib/rook:/var/lib/rook ceph/ceph:v14.2.1-20190430 bash . The Docker image tag should match the Ceph version used in the Rook cluster. The /etc/ceph/ceph.conf file needs to exist for ceph-mon to work. 1 2 3 4 5 6 7 8 9 10 11 12 13 touch /etc/ceph/ceph.conf cd /var/lib/rook ceph-mon --extract-monmap monmap --mon-data ./mon-a/data # Extract monmap from old ceph-mon db and save as monmap monmaptool --print monmap # Print the monmap content, which reflects the old cluster ceph-mon configuration. monmaptool --rm a monmap # Delete `a` from monmap. monmaptool --rm b monmap # Repeat, and delete `b` from monmap. monmaptool --rm c monmap # Repeat this pattern until all the old ceph-mons are removed monmaptool --rm d monmap monmaptool --rm e monmap monmaptool --addv a [v2:10.77.2.216:3300,v1:10.77.2.216:6789] monmap # Replace it with the rook-ceph-mon-a address you got from previous command. ceph-mon --inject-monmap monmap --mon-data ./mon-a/data # Replace monmap in ceph-mon db with our modified version. rm monmap exit Tell Rook to run as old cluster by running kubectl -n rook-ceph edit secret/rook-ceph-mon and changing fsid to the original fsid . Note that the fsid is base64 encoded and must not contain a trailing carriage return. For example: 1 echo -n a811f99a-d865-46b7-8f2c-f94c064e4356 | base64 # Replace with the fsid from your old cluster. Disable authentication by running kubectl -n rook-ceph edit cm/rook-config-override and adding content below: 1 2 3 4 5 6 7 data : config : | [global] auth cluster required = none auth service required = none auth client required = none auth supported = none Bring the Rook Ceph operator back online by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 1 . Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the new cluster should be up and running with authentication disabled. ceph -s should report 1 mon & 1 mgr & all of the OSDs up and running, and all PGs in either active or degraded state. Run kubectl -n rook-ceph exec -it rook-ceph-tools-XXXXXXX bash to enter tools pod: 1 2 3 4 vi key # [ paste keyring content saved before, preserving only ` [ client admin ] ` section ] ceph auth import -i key rm key Re-enable authentication by running kubectl -n rook-ceph edit cm/rook-config-override and removing auth configuration added in previous steps. Stop the Rook operator by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 0 . Shut down entire new cluster by running kubectl -n rook-ceph delete deploy/X where X is every deployment in namespace rook-ceph , except rook-ceph-operator and rook-ceph-tools , again. This time OSD daemons are present and should be removed too. Bring the Rook Ceph operator back online by running kubectl -n rook-ceph edit deploy/rook-ceph-operator and set replicas to 1 . Watch the operator logs with kubectl -n rook-ceph logs -f rook-ceph-operator-xxxxxxx , and wait until the orchestration has settled. STATE : Now the new cluster should be up and running with authentication enabled. ceph -s output should not change much comparing to previous steps.","title":"Steps"},{"location":"Troubleshooting/disaster-recovery/#backing-up-and-restoring-a-cluster-based-on-pvcs-into-a-new-kubernetes-cluster","text":"It is possible to migrate/restore an rook/ceph cluster from an existing Kubernetes cluster to a new one without resorting to SSH access or ceph tooling. This allows doing the migration using standard kubernetes resources only. This guide assumes the following: You have a CephCluster that uses PVCs to persist mon and osd data. Let's call it the \"old cluster\" You can restore the PVCs as-is in the new cluster. Usually this is done by taking regular snapshots of the PVC volumes and using a tool that can re-create PVCs from these snapshots in the underlying cloud provider. Velero is one such tool. You have regular backups of the secrets and configmaps in the rook-ceph namespace. Velero provides this functionality too. Do the following in the new cluster: Stop the rook operator by scaling the deployment rook-ceph-operator down to zero: kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas 0 and deleting the other deployments. An example command to do this is k -n rook-ceph delete deployment -l operator!=rook Restore the rook PVCs to the new cluster. Copy the keyring and fsid secrets from the old cluster: rook-ceph-mgr-a-keyring , rook-ceph-mon , rook-ceph-mons-keyring , rook-ceph-osd-0-keyring , ... Delete mon services and copy them from the old cluster: rook-ceph-mon-a , rook-ceph-mon-b , ... Note that simply re-applying won't work because the goal here is to restore the clusterIP in each service and this field is immutable in Service resources. Copy the endpoints configmap from the old cluster: rook-ceph-mon-endpoints Scale the rook operator up again : kubectl -n rook-ceph scale deployment rook-ceph-operator --replicas 1 Wait until the reconciliation is over.","title":"Backing up and restoring a cluster based on PVCs into a new Kubernetes cluster"},{"location":"Troubleshooting/disaster-recovery/#restoring-the-rook-cluster-after-the-rook-namespace-is-deleted","text":"When the rook-ceph namespace is accidentally deleted, the good news is that the cluster can be restored. With the content in the directory dataDirHostPath and the original OSD disks, the ceph cluster could be restored with this guide. You need to manually create a ConfigMap and a Secret to make it work. The information required for the ConfigMap and Secret can be found in the dataDirHostPath directory. The first resource is the secret named rook-ceph-mon as seen in this example below: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 apiVersion : v1 data : ceph-secret : QVFCZ0h6VmorcVNhSGhBQXVtVktNcjcrczNOWW9Oa2psYkErS0E9PQ== ceph-username : Y2xpZW50LmFkbWlu fsid : M2YyNzE4NDEtNjE4OC00N2MxLWIzZmQtOTBmZDRmOTc4Yzc2 mon-secret : QVFCZ0h6VmorcVNhSGhBQXVtVktNcjcrczNOWW9Oa2psYkErS0E9PQ== kind : Secret metadata : finalizers : - ceph.rook.io/disaster-protection name : rook-ceph-mon namespace : rook-ceph ownerReferences : null type : kubernetes.io/rook The values for the secret can be found in $dataDirHostPath/rook-ceph/client.admin.keyring and $dataDirHostPath/rook-ceph/rook-ceph.config . - ceph-secret and mon-secret are to be filled with the client.admin 's keyring contents. - ceph-username : set to the string client.admin - fsid : set to the original ceph cluster id. All the fields in data section need to be encoded in base64. Coding could be done like this: 1 echo -n \"string to code\" | base64 -i - Now save the secret as rook-ceph-mon.yaml , to be created later in the restore. The second resource is the configmap named rook-ceph-mon-endpoints as seen in this example below: 1 2 3 4 5 6 7 8 9 10 11 12 13 apiVersion : v1 data : csi-cluster-config-json : '[{\"clusterID\":\"rook-ceph\",\"monitors\":[\"169.169.241.153:6789\",\"169.169.82.57:6789\",\"169.169.7.81:6789\"],\"namespace\":\"\"}]' data : k=169.169.241.153:6789,m=169.169.82.57:6789,o=169.169.7.81:6789 mapping : '{\"node\":{\"k\":{\"Name\":\"10.138.55.111\",\"Hostname\":\"10.138.55.111\",\"Address\":\"10.138.55.111\"},\"m\":{\"Name\":\"10.138.55.120\",\"Hostname\":\"10.138.55.120\",\"Address\":\"10.138.55.120\"},\"o\":{\"Name\":\"10.138.55.112\",\"Hostname\":\"10.138.55.112\",\"Address\":\"10.138.55.112\"}}}' maxMonId : \"15\" kind : ConfigMap metadata : finalizers : - ceph.rook.io/disaster-protection name : rook-ceph-mon-endpoints namespace : rook-ceph ownerReferences : null The Monitor's service IPs are kept in the monitor data store and you need to create them by original ones. After you create this configmap with the original service IPs, the rook operator will create the correct services for you with IPs matching in the monitor data store. Along with monitor ids, their service IPs and mapping relationship of them can be found in dataDirHostPath/rook-ceph/rook-ceph.config, for example: 1 2 3 4 [global] fsid = 3f271841-6188-47c1-b3fd-90fd4f978c76 mon initial members = m o k mon host = [v2:169.169.82.57:3300,v1:169.169.82.57:6789],[v2:169.169.7.81:3300,v1:169.169.7.81:6789],[v2:169.169.241.153:3300,v1:169.169.241.153:6789] mon initial members and mon host are holding sequences of monitors' id and IP respectively; the sequence are going in the same order among monitors as a result you can tell which monitors have which service IP addresses. Modify your rook-ceph-mon-endpoints.yaml on fields csi-cluster-config-json and data based on the understanding of rook-ceph.config above. The field mapping tells rook where to schedule monitor's pods. you could search in dataDirHostPath in all Ceph cluster hosts for mon-m,mon-o,mon-k . If you find mon-m in host 10.138.55.120 , you should fill 10.138.55.120 in field mapping for m . Others are the same. Update the maxMonId to be the max numeric ID of the highest monitor ID. For example, 15 is the 0-based ID for mon o . Now save this configmap in the file rook-ceph-mon-endpoints.yaml, to be created later in the restore. Now that you have the info for the secret and the configmap, you are ready to restore the running cluster. Deploy Rook Ceph using the YAML files or Helm, with the same settings you had previously. 1 kubectl create -f crds.yaml -f common.yaml -f operator.yaml After the operator is running, create the configmap and secret you have just crafted: 1 kubectl create -f rook-ceph-mon.yaml -f rook-ceph-mon-endpoints.yaml Create your Ceph cluster CR (if possible, with the same settings as existed previously): 1 kubectl create -f cluster.yaml Now your Rook Ceph cluster should be running again.","title":"Restoring the Rook cluster after the Rook namespace is deleted"},{"location":"Troubleshooting/krew-plugin/","text":"The Rook Krew plugin is a tool to help troubleshoot your Rook cluster. Here are a few of the operations that the plugin will assist with: - Health of the Rook pods - Health of the Ceph cluster - Create \"debug\" pods for mons and OSDs that are in need of special Ceph maintenance operations - Restart the operator - Purge an OSD - Run any ceph command See the kubectl-rook-ceph documentation for more details. Installation \u00b6 Install Krew Install Rook plugin 1 kubectl krew install rook-ceph Ceph Commands \u00b6 Run any ceph command with kubectl rook-ceph ceph  . For example, get the Ceph status: 1 kubectl rook-ceph ceph status Output: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 cluster: id: a1ac6554-4cc8-4c3b-a8a3-f17f5ec6f529 health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 11m) mgr: a(active, since 10m) mds: 1/1 daemons up, 1 hot standby osd: 3 osds: 3 up (since 10m), 3 in (since 8d) data: volumes: 1/1 healthy pools: 6 pools, 137 pgs objects: 34 objects, 4.1 KiB usage: 58 MiB used, 59 GiB / 59 GiB avail pgs: 137 active+clean io: client: 1.2 KiB/s rd, 2 op/s rd, 0 op/s wr Reference: Ceph Status Debug Mode \u00b6 Debug mode can be useful when a MON or OSD needs advanced maintenance operations that require the daemon to be stopped. Ceph tools such as ceph-objectstore-tool , ceph-bluestore-tool , or ceph-monstore-tool are commonly used in these scenarios. Debug mode will set up the MON or OSD so that these commands can be run. Start the debug pod for mon b 1 kubectl rook-ceph debug start rook-ceph-mon-b Stop the debug pod for mon b 1 kubectl rook-ceph debug stop rook-ceph-mon-b Reference: Debug Mode","title":"Krew Plugin"},{"location":"Troubleshooting/krew-plugin/#installation","text":"Install Krew Install Rook plugin 1 kubectl krew install rook-ceph","title":"Installation"},{"location":"Troubleshooting/krew-plugin/#ceph-commands","text":"Run any ceph command with kubectl rook-ceph ceph  . For example, get the Ceph status: 1 kubectl rook-ceph ceph status Output: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 cluster: id: a1ac6554-4cc8-4c3b-a8a3-f17f5ec6f529 health: HEALTH_OK services: mon: 3 daemons, quorum a,b,c (age 11m) mgr: a(active, since 10m) mds: 1/1 daemons up, 1 hot standby osd: 3 osds: 3 up (since 10m), 3 in (since 8d) data: volumes: 1/1 healthy pools: 6 pools, 137 pgs objects: 34 objects, 4.1 KiB usage: 58 MiB used, 59 GiB / 59 GiB avail pgs: 137 active+clean io: client: 1.2 KiB/s rd, 2 op/s rd, 0 op/s wr Reference: Ceph Status","title":"Ceph Commands"},{"location":"Troubleshooting/krew-plugin/#debug-mode","text":"Debug mode can be useful when a MON or OSD needs advanced maintenance operations that require the daemon to be stopped. Ceph tools such as ceph-objectstore-tool , ceph-bluestore-tool , or ceph-monstore-tool are commonly used in these scenarios. Debug mode will set up the MON or OSD so that these commands can be run. Start the debug pod for mon b 1 kubectl rook-ceph debug start rook-ceph-mon-b Stop the debug pod for mon b 1 kubectl rook-ceph debug stop rook-ceph-mon-b Reference: Debug Mode","title":"Debug Mode"},{"location":"Troubleshooting/openshift-common-issues/","text":"Enable Monitoring in the Storage Dashboard \u00b6 OpenShift Console uses OpenShift Prometheus for monitoring and populating data in Storage Dashboard. Additional configuration is required to monitor the Ceph Cluster from the storage dashboard. Change the monitoring namespace to openshift-monitoring Change the namespace of the RoleBinding rook-ceph-metrics from rook-ceph to openshift-monitoring for the prometheus-k8s ServiceAccount in rbac.yaml . 1 2 3 4 subjects : - kind : ServiceAccount name : prometheus-k8s namespace : openshift-monitoring Enable Ceph Cluster monitoring Follow ceph-monitoring/prometheus-alerts . Set the required label on the namespace 1 oc label namespace rook-ceph \"openshift.io/cluster-monitoring=true\" Troubleshoot Monitoring Issues \u00b6 Attention Switch to rook-ceph namespace using oc project rook-ceph . Ensure ceph-mgr pod is Running 1 2 3 $ oc get pods -l app = rook-ceph-mgr NAME READY STATUS RESTARTS AGE rook-ceph-mgr 1/1 Running 0 14h Ensure service monitor is present 1 2 3 $ oc get servicemonitor rook-ceph-mgr NAME AGE rook-ceph-mgr 14h Ensure the prometheus rules object has been created 1 2 3 $ oc get prometheusrules -l prometheus = rook-prometheus NAME AGE prometheus-ceph-rules 14h","title":"OpenShift Common Issues"},{"location":"Troubleshooting/openshift-common-issues/#enable-monitoring-in-the-storage-dashboard","text":"OpenShift Console uses OpenShift Prometheus for monitoring and populating data in Storage Dashboard. Additional configuration is required to monitor the Ceph Cluster from the storage dashboard. Change the monitoring namespace to openshift-monitoring Change the namespace of the RoleBinding rook-ceph-metrics from rook-ceph to openshift-monitoring for the prometheus-k8s ServiceAccount in rbac.yaml . 1 2 3 4 subjects : - kind : ServiceAccount name : prometheus-k8s namespace : openshift-monitoring Enable Ceph Cluster monitoring Follow ceph-monitoring/prometheus-alerts . Set the required label on the namespace 1 oc label namespace rook-ceph \"openshift.io/cluster-monitoring=true\"","title":"Enable Monitoring in the Storage Dashboard"},{"location":"Troubleshooting/openshift-common-issues/#troubleshoot-monitoring-issues","text":"Attention Switch to rook-ceph namespace using oc project rook-ceph . Ensure ceph-mgr pod is Running 1 2 3 $ oc get pods -l app = rook-ceph-mgr NAME READY STATUS RESTARTS AGE rook-ceph-mgr 1/1 Running 0 14h Ensure service monitor is present 1 2 3 $ oc get servicemonitor rook-ceph-mgr NAME AGE rook-ceph-mgr 14h Ensure the prometheus rules object has been created 1 2 3 $ oc get prometheusrules -l prometheus = rook-prometheus NAME AGE prometheus-ceph-rules 14h","title":"Troubleshoot Monitoring Issues"},{"location":"Troubleshooting/performance-profiling/","text":"Collect perf data of a ceph process at runtime \u00b6 Warn This is an advanced topic please be aware of the steps you're performing or reach out to the experts for further guidance. There are some cases where the debug logs are not sufficient to investigate issues like high CPU utilization of a Ceph process. In that situation, coredump and perf information of a Ceph process is useful to be collected which can be shared with the Ceph team in an issue. To collect this information, please follow these steps: Edit the rook-ceph-operator deployment and set ROOK_HOSTPATH_REQUIRES_PRIVILEGED to true . Wait for the pods to get reinitialized: 1 # watch kubectl -n rook-ceph get pods Enter the respective pod of the Ceph process which needs to be investigated. For example: 1 # kubectl -n rook-ceph exec -it deploy/rook-ceph-mon-a -- bash Install gdb , perf and git inside the pod. For example: 1 # dnf install gdb git perf -y Capture perf data of the respective Ceph process: 1 2 # perf record -e cycles --call-graph dwarf -p  # perf report > perf_report_ Grab the pid of the respective Ceph process to collect its backtrace at multiple time instances, attach gdb to it and share the output gdb.txt : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # gdb -p  - set pag off - set log on - thr a a bt full # This captures the complete backtrace of the process - backtrace - Ctrl+C - backtrace - Ctrl+C - backtrace - Ctrl+C - backtrace - set log off - q (to exit out of gdb) Grab the live coredump of the respective process using gcore : 1 # gcore  Capture the Wallclock Profiler data for the respective Ceph process and share the output gdbpmp.data generated: 1 2 3 # git clone https://github.com/markhpc/gdbpmp # cd gdbpmp # ./gdbpmp.py -p  -n 100 -o gdbpmp.data Collect the perf.data , perf_report , backtrace of the process gdb.txt , core file and profiler data gdbpmp.data and upload it to the tracker issue for troubleshooting purposes.","title":"Performance Profiling"},{"location":"Troubleshooting/performance-profiling/#collect-perf-data-of-a-ceph-process-at-runtime","text":"Warn This is an advanced topic please be aware of the steps you're performing or reach out to the experts for further guidance. There are some cases where the debug logs are not sufficient to investigate issues like high CPU utilization of a Ceph process. In that situation, coredump and perf information of a Ceph process is useful to be collected which can be shared with the Ceph team in an issue. To collect this information, please follow these steps: Edit the rook-ceph-operator deployment and set ROOK_HOSTPATH_REQUIRES_PRIVILEGED to true . Wait for the pods to get reinitialized: 1 # watch kubectl -n rook-ceph get pods Enter the respective pod of the Ceph process which needs to be investigated. For example: 1 # kubectl -n rook-ceph exec -it deploy/rook-ceph-mon-a -- bash Install gdb , perf and git inside the pod. For example: 1 # dnf install gdb git perf -y Capture perf data of the respective Ceph process: 1 2 # perf record -e cycles --call-graph dwarf -p  # perf report > perf_report_ Grab the pid of the respective Ceph process to collect its backtrace at multiple time instances, attach gdb to it and share the output gdb.txt : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 # gdb -p  - set pag off - set log on - thr a a bt full # This captures the complete backtrace of the process - backtrace - Ctrl+C - backtrace - Ctrl+C - backtrace - Ctrl+C - backtrace - set log off - q (to exit out of gdb) Grab the live coredump of the respective process using gcore : 1 # gcore  Capture the Wallclock Profiler data for the respective Ceph process and share the output gdbpmp.data generated: 1 2 3 # git clone https://github.com/markhpc/gdbpmp # cd gdbpmp # ./gdbpmp.py -p  -n 100 -o gdbpmp.data Collect the perf.data , perf_report , backtrace of the process gdb.txt , core file and profiler data gdbpmp.data and upload it to the tracker issue for troubleshooting purposes.","title":"Collect perf data of a ceph process at runtime"},{"location":"Upgrade/ceph-upgrade/","text":"This guide will walk through the steps to upgrade the version of Ceph in a Rook cluster. Rook and Ceph upgrades are designed to ensure data remains available even while the upgrade is proceeding. Rook will perform the upgrades in a rolling fashion such that application pods are not disrupted. Rook is cautious when performing upgrades. When an upgrade is requested (the Ceph image has been updated in the CR), Rook will go through all the daemons one by one and will individually perform checks on them. It will make sure a particular daemon can be stopped before performing the upgrade. Once the deployment has been updated, it checks if this is ok to continue. After each daemon is updated we wait for things to settle (monitors to be in a quorum, PGs to be clean for OSDs, up for MDSes, etc.), then only when the condition is met we move to the next daemon. We repeat this process until all the daemons have been updated. Considerations \u00b6 WARNING : Upgrading a Rook cluster is not without risk. There may be unexpected issues or obstacles that damage the integrity and health of the storage cluster, including data loss. The Rook cluster's storage may be unavailable for short periods during the upgrade process. Read this document in full before undertaking a Rook cluster upgrade. Supported Versions \u00b6 Rook v1.12 supports the following Ceph versions: Ceph Reef v18.2.0 or newer Ceph Quincy v17.2.0 or newer Ceph Pacific v16.2.7 or newer Support for Ceph Pacific (16.2.x) will be removed in the next Rook release. It will be mandatory to upgrade to Quincy or Reef before upgrading to the Rook release after v1.12.x. Important When an update is requested, the operator will check Ceph's status, if it is in HEALTH_ERR the operator will refuse to proceed with the upgrade. Warning Ceph v17.2.2 has a blocking issue when running with Rook. Use v17.2.3 or newer when possible. Quincy Consideration \u00b6 In Ceph Quincy (v17), the device_health_metrics pool was renamed to .mgr . Ceph will perform this migration automatically. The pool rename will be automatically handled by Rook if the configuration of the device_health_metrics pool is not customized via CephBlockPool. If the configuration of the device_health_metrics pool is customized via CephBlockPool, two extra steps are required after the Ceph upgrade is complete. Once upgrade is complete: Create a new CephBlockPool to configure the .mgr built-in pool. For an example, see builtin mgr pool . Delete the old CephBlockPool that represents the device_health_metrics pool. CephNFS User Consideration \u00b6 Ceph Quincy v17.2.1 has a potentially breaking regression with CephNFS. See the NFS documentation's known issue for more detail. Ceph Images \u00b6 Official Ceph container images can be found on Quay . These images are tagged in a few ways: The most explicit form of tags are full-ceph-version-and-build tags (e.g., v17.2.6-20230410 ). These tags are recommended for production clusters, as there is no possibility for the cluster to be heterogeneous with respect to the version of Ceph running in containers. Ceph major version tags (e.g., v17 ) are useful for development and test clusters so that the latest version of Ceph is always available. Ceph containers other than the official images from the registry above will not be supported. Example Upgrade to Ceph Quincy \u00b6 1. Update the Ceph daemons \u00b6 The upgrade will be automated by the Rook operator after the desired Ceph image is changed in the CephCluster CRD ( spec.cephVersion.image ). 1 2 3 ROOK_CLUSTER_NAMESPACE=rook-ceph NEW_CEPH_IMAGE='quay.io/ceph/ceph:v17.2.6-20230410' kubectl -n $ROOK_CLUSTER_NAMESPACE patch CephCluster $ROOK_CLUSTER_NAMESPACE --type=merge -p \"{\\\"spec\\\": {\\\"cephVersion\\\": {\\\"image\\\": \\\"$NEW_CEPH_IMAGE\\\"}}}\" 2. Wait for the pod updates \u00b6 As with upgrading Rook, now wait for the upgrade to complete. Status can be determined in a similar way to the Rook upgrade as well. 1 watch --exec kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\tceph-version=\"}{.metadata.labels.ceph-version}{\"\\n\"}{end}' Confirm the upgrade is completed when the versions are all on the desired Ceph version. 1 2 3 4 5 6 kubectl -n $ROOK_CLUSTER_NAMESPACE get deployment -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{\"ceph-version=\"}{.metadata.labels.ceph-version}{\"\\n\"}{end}' | sort | uniq This cluster is not yet finished: ceph-version=15.2.13-0 ceph-version=v17.2.6-0 This cluster is finished: ceph-version=v17.2.6-0 3. Verify cluster health \u00b6 Verify the Ceph cluster's health using the health verification .","title":"Ceph Upgrades"},{"location":"Upgrade/ceph-upgrade/#considerations","text":"WARNING : Upgrading a Rook cluster is not without risk. There may be unexpected issues or obstacles that damage the integrity and health of the storage cluster, including data loss. The Rook cluster's storage may be unavailable for short periods during the upgrade process. Read this document in full before undertaking a Rook cluster upgrade.","title":"Considerations"},{"location":"Upgrade/ceph-upgrade/#supported-versions","text":"Rook v1.12 supports the following Ceph versions: Ceph Reef v18.2.0 or newer Ceph Quincy v17.2.0 or newer Ceph Pacific v16.2.7 or newer Support for Ceph Pacific (16.2.x) will be removed in the next Rook release. It will be mandatory to upgrade to Quincy or Reef before upgrading to the Rook release after v1.12.x. Important When an update is requested, the operator will check Ceph's status, if it is in HEALTH_ERR the operator will refuse to proceed with the upgrade. Warning Ceph v17.2.2 has a blocking issue when running with Rook. Use v17.2.3 or newer when possible.","title":"Supported Versions"},{"location":"Upgrade/ceph-upgrade/#quincy-consideration","text":"In Ceph Quincy (v17), the device_health_metrics pool was renamed to .mgr . Ceph will perform this migration automatically. The pool rename will be automatically handled by Rook if the configuration of the device_health_metrics pool is not customized via CephBlockPool. If the configuration of the device_health_metrics pool is customized via CephBlockPool, two extra steps are required after the Ceph upgrade is complete. Once upgrade is complete: Create a new CephBlockPool to configure the .mgr built-in pool. For an example, see builtin mgr pool . Delete the old CephBlockPool that represents the device_health_metrics pool.","title":"Quincy Consideration"},{"location":"Upgrade/ceph-upgrade/#cephnfs-user-consideration","text":"Ceph Quincy v17.2.1 has a potentially breaking regression with CephNFS. See the NFS documentation's known issue for more detail.","title":"CephNFS User Consideration"},{"location":"Upgrade/ceph-upgrade/#ceph-images","text":"Official Ceph container images can be found on Quay . These images are tagged in a few ways: The most explicit form of tags are full-ceph-version-and-build tags (e.g., v17.2.6-20230410 ). These tags are recommended for production clusters, as there is no possibility for the cluster to be heterogeneous with respect to the version of Ceph running in containers. Ceph major version tags (e.g., v17 ) are useful for development and test clusters so that the latest version of Ceph is always available. Ceph containers other than the official images from the registry above will not be supported.","title":"Ceph Images"},{"location":"Upgrade/ceph-upgrade/#example-upgrade-to-ceph-quincy","text":"","title":"Example Upgrade to Ceph Quincy"},{"location":"Upgrade/ceph-upgrade/#1-update-the-ceph-daemons","text":"The upgrade will be automated by the Rook operator after the desired Ceph image is changed in the CephCluster CRD ( spec.cephVersion.image ). 1 2 3 ROOK_CLUSTER_NAMESPACE=rook-ceph NEW_CEPH_IMAGE='quay.io/ceph/ceph:v17.2.6-20230410' kubectl -n $ROOK_CLUSTER_NAMESPACE patch CephCluster $ROOK_CLUSTER_NAMESPACE --type=merge -p \"{\\\"spec\\\": {\\\"cephVersion\\\": {\\\"image\\\": \\\"$NEW_CEPH_IMAGE\\\"}}}\"","title":"1. Update the Ceph daemons"},{"location":"Upgrade/ceph-upgrade/#2-wait-for-the-pod-updates","text":"As with upgrading Rook, now wait for the upgrade to complete. Status can be determined in a similar way to the Rook upgrade as well. 1 watch --exec kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\tceph-version=\"}{.metadata.labels.ceph-version}{\"\\n\"}{end}' Confirm the upgrade is completed when the versions are all on the desired Ceph version. 1 2 3 4 5 6 kubectl -n $ROOK_CLUSTER_NAMESPACE get deployment -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{\"ceph-version=\"}{.metadata.labels.ceph-version}{\"\\n\"}{end}' | sort | uniq This cluster is not yet finished: ceph-version=15.2.13-0 ceph-version=v17.2.6-0 This cluster is finished: ceph-version=v17.2.6-0","title":"2. Wait for the pod updates"},{"location":"Upgrade/ceph-upgrade/#3-verify-cluster-health","text":"Verify the Ceph cluster's health using the health verification .","title":"3. Verify cluster health"},{"location":"Upgrade/health-verification/","text":"Rook and Ceph upgrades are designed to ensure data remains available even while the upgrade is proceeding. Rook will perform the upgrades in a rolling fashion such that application pods are not disrupted. To ensure the upgrades are seamless, it is important to begin the upgrades with Ceph in a fully healthy state. This guide reviews ways of verifying the health of a CephCluster. See the troubleshooting documentation for any issues during upgrades: General K8s troubleshooting Ceph common issues CSI common issues Pods all Running \u00b6 In a healthy Rook cluster, all pods in the Rook namespace should be in the Running (or Completed ) state and have few, if any, pod restarts. 1 2 ROOK_CLUSTER_NAMESPACE=rook-ceph kubectl -n $ROOK_CLUSTER_NAMESPACE get pods Status Output \u00b6 The Rook toolbox contains the Ceph tools that gives status details of the cluster with the ceph status command. Below is an output sample: 1 2 TOOLS_POD=$(kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[*].metadata.name}') kubectl -n $ROOK_CLUSTER_NAMESPACE exec -it $TOOLS_POD -- ceph status The output should look similar to the following: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 cluster: id: a3f4d647-9538-4aff-9fd1-b845873c3fe9 health: HEALTH_OK services: mon: 3 daemons, quorum b,c,a mgr: a(active) mds: myfs-1/1/1 up {0=myfs-a=up:active}, 1 up:standby-replay osd: 6 osds: 6 up, 6 in rgw: 1 daemon active data: pools: 9 pools, 900 pgs objects: 67 objects, 11 KiB usage: 6.1 GiB used, 54 GiB / 60 GiB avail pgs: 900 active+clean io: client: 7.4 KiB/s rd, 681 B/s wr, 11 op/s rd, 4 op/s wr recovery: 164 B/s, 1 objects/s In the output above, note the following indications that the cluster is in a healthy state: Cluster health: The overall cluster status is HEALTH_OK and there are no warning or error status messages displayed. Monitors (mon): All of the monitors are included in the quorum list. Manager (mgr): The Ceph manager is in the active state. OSDs (osd): All OSDs are up and in . Placement groups (pgs): All PGs are in the active+clean state. (If applicable) Ceph filesystem metadata server (mds): all MDSes are active for all filesystems (If applicable) Ceph object store RADOS gateways (rgw): all daemons are active If the ceph status output has deviations from the general good health described above, there may be an issue that needs to be investigated further. Other commands may show more relevant details on the health of the system, such as ceph osd status . See the Ceph troubleshooting docs for help. Upgrading an unhealthy cluster \u00b6 Rook will not upgrade Ceph daemons if the health is in a HEALTH_ERR state. Rook can be configured to proceed with the (potentially unsafe) upgrade by setting either skipUpgradeChecks: true or continueUpgradeAfterChecksEvenIfNotHealthy: true as described in the cluster CR settings . Container Versions \u00b6 The container version running in a specific pod in the Rook cluster can be verified in its pod spec output. For example, for the monitor pod mon-b , verify the container version it is running with the below commands: 1 2 POD_NAME=$(kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -o custom-columns=name:.metadata.name --no-headers | grep rook-ceph-mon-b) kubectl -n $ROOK_CLUSTER_NAMESPACE get pod ${POD_NAME} -o jsonpath='{.spec.containers[0].image}' The status and container versions for all Rook pods can be collected all at once with the following commands: 1 2 kubectl -n $ROOK_OPERATOR_NAMESPACE get pod -o jsonpath='{range .items[*]}{.metadata.name}{\"\\n\\t\"}{.status.phase}{\"\\t\\t\"}{.spec.containers[0].image}{\"\\t\"}{.spec.initContainers[0]}{\"\\n\"}{end}' && \\ kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -o jsonpath='{range .items[*]}{.metadata.name}{\"\\n\\t\"}{.status.phase}{\"\\t\\t\"}{.spec.containers[0].image}{\"\\t\"}{.spec.initContainers[0].image}{\"\\n\"}{end}' The rook-version label exists on Ceph resources. For various resource controllers, a summary of the resource controllers can be gained with the commands below. These will report the requested, updated, and currently available replicas for various Rook resources in addition to the version of Rook for resources managed by Rook. Note that the operator and toolbox deployments do not have a rook-version label set. 1 2 3 kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' kubectl -n $ROOK_CLUSTER_NAMESPACE get jobs -o jsonpath='{range .items[*]}{.metadata.name}{\" \\tsucceeded: \"}{.status.succeeded}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' Rook Volume Health \u00b6 Any pod that is using a Rook volume should also remain healthy: The pod should be in the Running state with few, if any, restarts There should be no errors in its logs The pod should still be able to read and write to the attached Rook volume.","title":"Health Verification"},{"location":"Upgrade/health-verification/#pods-all-running","text":"In a healthy Rook cluster, all pods in the Rook namespace should be in the Running (or Completed ) state and have few, if any, pod restarts. 1 2 ROOK_CLUSTER_NAMESPACE=rook-ceph kubectl -n $ROOK_CLUSTER_NAMESPACE get pods","title":"Pods all Running"},{"location":"Upgrade/health-verification/#status-output","text":"The Rook toolbox contains the Ceph tools that gives status details of the cluster with the ceph status command. Below is an output sample: 1 2 TOOLS_POD=$(kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -l \"app=rook-ceph-tools\" -o jsonpath='{.items[*].metadata.name}') kubectl -n $ROOK_CLUSTER_NAMESPACE exec -it $TOOLS_POD -- ceph status The output should look similar to the following: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 cluster: id: a3f4d647-9538-4aff-9fd1-b845873c3fe9 health: HEALTH_OK services: mon: 3 daemons, quorum b,c,a mgr: a(active) mds: myfs-1/1/1 up {0=myfs-a=up:active}, 1 up:standby-replay osd: 6 osds: 6 up, 6 in rgw: 1 daemon active data: pools: 9 pools, 900 pgs objects: 67 objects, 11 KiB usage: 6.1 GiB used, 54 GiB / 60 GiB avail pgs: 900 active+clean io: client: 7.4 KiB/s rd, 681 B/s wr, 11 op/s rd, 4 op/s wr recovery: 164 B/s, 1 objects/s In the output above, note the following indications that the cluster is in a healthy state: Cluster health: The overall cluster status is HEALTH_OK and there are no warning or error status messages displayed. Monitors (mon): All of the monitors are included in the quorum list. Manager (mgr): The Ceph manager is in the active state. OSDs (osd): All OSDs are up and in . Placement groups (pgs): All PGs are in the active+clean state. (If applicable) Ceph filesystem metadata server (mds): all MDSes are active for all filesystems (If applicable) Ceph object store RADOS gateways (rgw): all daemons are active If the ceph status output has deviations from the general good health described above, there may be an issue that needs to be investigated further. Other commands may show more relevant details on the health of the system, such as ceph osd status . See the Ceph troubleshooting docs for help.","title":"Status Output"},{"location":"Upgrade/health-verification/#upgrading-an-unhealthy-cluster","text":"Rook will not upgrade Ceph daemons if the health is in a HEALTH_ERR state. Rook can be configured to proceed with the (potentially unsafe) upgrade by setting either skipUpgradeChecks: true or continueUpgradeAfterChecksEvenIfNotHealthy: true as described in the cluster CR settings .","title":"Upgrading an unhealthy cluster"},{"location":"Upgrade/health-verification/#container-versions","text":"The container version running in a specific pod in the Rook cluster can be verified in its pod spec output. For example, for the monitor pod mon-b , verify the container version it is running with the below commands: 1 2 POD_NAME=$(kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -o custom-columns=name:.metadata.name --no-headers | grep rook-ceph-mon-b) kubectl -n $ROOK_CLUSTER_NAMESPACE get pod ${POD_NAME} -o jsonpath='{.spec.containers[0].image}' The status and container versions for all Rook pods can be collected all at once with the following commands: 1 2 kubectl -n $ROOK_OPERATOR_NAMESPACE get pod -o jsonpath='{range .items[*]}{.metadata.name}{\"\\n\\t\"}{.status.phase}{\"\\t\\t\"}{.spec.containers[0].image}{\"\\t\"}{.spec.initContainers[0]}{\"\\n\"}{end}' && \\ kubectl -n $ROOK_CLUSTER_NAMESPACE get pod -o jsonpath='{range .items[*]}{.metadata.name}{\"\\n\\t\"}{.status.phase}{\"\\t\\t\"}{.spec.containers[0].image}{\"\\t\"}{.spec.initContainers[0].image}{\"\\n\"}{end}' The rook-version label exists on Ceph resources. For various resource controllers, a summary of the resource controllers can be gained with the commands below. These will report the requested, updated, and currently available replicas for various Rook resources in addition to the version of Rook for resources managed by Rook. Note that the operator and toolbox deployments do not have a rook-version label set. 1 2 3 kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' kubectl -n $ROOK_CLUSTER_NAMESPACE get jobs -o jsonpath='{range .items[*]}{.metadata.name}{\" \\tsucceeded: \"}{.status.succeeded}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}'","title":"Container Versions"},{"location":"Upgrade/health-verification/#rook-volume-health","text":"Any pod that is using a Rook volume should also remain healthy: The pod should be in the Running state with few, if any, restarts There should be no errors in its logs The pod should still be able to read and write to the attached Rook volume.","title":"Rook Volume Health"},{"location":"Upgrade/rook-upgrade/","text":"This guide will walk through the steps to upgrade the software in a Rook cluster from one version to the next. This guide focuses on updating the Rook version for the management layer, while the Ceph upgrade guide focuses on updating the data layer. Upgrades for both the operator and for Ceph are entirely automated except where Rook's permissions need to be explicitly updated by an admin or when incompatibilities need to be addressed manually due to customizations. We welcome feedback and opening issues! Supported Versions \u00b6 This guide is for upgrading from Rook v1.11.x to Rook v1.12.x . Please refer to the upgrade guides from previous releases for supported upgrade paths. Rook upgrades are only supported between official releases. For a guide to upgrade previous versions of Rook, please refer to the version of documentation for those releases. Upgrade 1.10 to 1.11 Upgrade 1.9 to 1.10 Upgrade 1.8 to 1.9 Upgrade 1.7 to 1.8 Upgrade 1.6 to 1.7 Upgrade 1.5 to 1.6 Upgrade 1.4 to 1.5 Upgrade 1.3 to 1.4 Upgrade 1.2 to 1.3 Upgrade 1.1 to 1.2 Upgrade 1.0 to 1.1 Upgrade 0.9 to 1.0 Upgrade 0.8 to 0.9 Upgrade 0.7 to 0.8 Upgrade 0.6 to 0.7 Upgrade 0.5 to 0.6 Important Rook releases from master are expressly unsupported. It is strongly recommended to use official releases of Rook. Unreleased versions from the master branch are subject to changes and incompatibilities that will not be supported in the official releases. Builds from the master branch can have functionality changed or removed at any time without compatibility support and without prior notice. Breaking changes in v1.12 \u00b6 The minimum supported version of Kubernetes is v1.22. CephCSI CephFS driver introduced a breaking change in v3.9.0. If any existing CephFS storageclass in the cluster has MountOptions parameter set, follow the steps mentioned in the CephCSI upgrade guide to ensure a smooth upgrade. Considerations \u00b6 With this upgrade guide, there are a few notes to consider: WARNING : Upgrading a Rook cluster is not without risk. There may be unexpected issues or obstacles that damage the integrity and health the storage cluster, including data loss. The Rook cluster's storage may be unavailable for short periods during the upgrade process for both Rook operator updates and for Ceph version updates. Read this document in full before undertaking a Rook cluster upgrade. Patch Release Upgrades \u00b6 Unless otherwise noted due to extenuating requirements, upgrades from one patch release of Rook to another are as simple as updating the common resources and the image of the Rook operator. For example, when Rook v1.12.1 is released, the process of updating from v1.12.0 is as simple as running the following: 1 2 git clone --single-branch --depth=1 --branch v1.12.1 https://github.com/rook/rook.git cd rook/deploy/examples If the Rook Operator or CephCluster are deployed into a different namespace than rook-ceph , see the Update common resources and CRDs section for instructions on how to change the default namespaces in common.yaml . Then, apply the latest changes from v1.12, and update the Rook Operator image. 1 2 kubectl apply -f common.yaml -f crds.yaml kubectl -n rook-ceph set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:v1.12.1 As exemplified above, it is a good practice to update Rook common resources from the example manifests before any update. The common resources and CRDs might not be updated with every release, but Kubernetes will only apply updates to the ones that changed. Also update optional resources like Prometheus monitoring noted more fully in the upgrade section below . Helm \u00b6 If Rook is installed via the Helm chart, Helm will handle some details of the upgrade itself. The upgrade steps in this guide will clarify what Helm handles automatically. The rook-ceph helm chart upgrade performs the Rook upgrade. The rook-ceph-cluster helm chart upgrade performs a Ceph upgrade if the Ceph image is updated. Note Be sure to update to a supported Helm version Cluster Health \u00b6 In order to successfully upgrade a Rook cluster, the following prerequisites must be met: The cluster should be in a healthy state with full functionality. Review the health verification guide in order to verify a CephCluster is in a good starting state. All pods consuming Rook storage should be created, running, and in a steady state. Rook Operator Upgrade \u00b6 The examples given in this guide upgrade a live Rook cluster running v1.11.7 to the version v1.12.0 . This upgrade should work from any official patch release of Rook v1.11 to any official patch release of v1.12. Let's get started! Environment \u00b6 These instructions will work for as long the environment is parameterized correctly. Set the following environment variables, which will be used throughout this document. 1 2 3 # Parameterize the environment export ROOK_OPERATOR_NAMESPACE=rook-ceph export ROOK_CLUSTER_NAMESPACE=rook-ceph 1. Update common resources and CRDs \u00b6 Hint Common resources and CRDs are automatically updated when using Helm charts. First, apply updates to Rook common resources. This includes modified privileges (RBAC) needed by the Operator. Also update the Custom Resource Definitions (CRDs). Get the latest common resources manifests that contain the latest changes. 1 2 git clone --single-branch --depth=1 --branch master https://github.com/rook/rook.git cd rook/deploy/examples If the Rook Operator or CephCluster are deployed into a different namespace than rook-ceph , update the common resource manifests to use your ROOK_OPERATOR_NAMESPACE and ROOK_CLUSTER_NAMESPACE using sed . 1 2 3 4 sed -i.bak \\ -e \"s/\\(.*\\):.*# namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE # namespace:operator/g\" \\ -e \"s/\\(.*\\):.*# namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE # namespace:cluster/g\" \\ common.yaml Apply the resources. 1 kubectl apply -f common.yaml -f crds.yaml Prometheus Updates \u00b6 If Prometheus monitoring is enabled, follow this step to upgrade the Prometheus RBAC resources as well. 1 kubectl apply -f deploy/examples/monitoring/rbac.yaml 2. Update the Rook Operator \u00b6 Hint The operator is automatically updated when using Helm charts. The largest portion of the upgrade is triggered when the operator's image is updated to v1.12.x . When the operator is updated, it will proceed to update all of the Ceph daemons. 1 kubectl -n $ROOK_OPERATOR_NAMESPACE set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:master 3. Update Ceph CSI \u00b6 Hint This is automatically updated if custom CSI image versions are not set. Important The minimum supported version of Ceph-CSI is v3.8.0. Update to the latest Ceph-CSI drivers if custom CSI images are specified. See the CSI Custom Images documentation. Note If using snapshots, refer to the Upgrade Snapshot API guide . 4. Wait for the upgrade to complete \u00b6 Watch now in amazement as the Ceph mons, mgrs, OSDs, rbd-mirrors, MDSes and RGWs are terminated and replaced with updated versions in sequence. The cluster may be unresponsive very briefly as mons update, and the Ceph Filesystem may fall offline a few times while the MDSes are upgrading. This is normal. The versions of the components can be viewed as they are updated: 1 watch --exec kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' As an example, this cluster is midway through updating the OSDs. When all deployments report 1/1/1 availability and rook-version=v1.12.0 , the Ceph cluster's core components are fully updated. 1 2 3 4 5 6 7 8 9 Every 2.0s: kubectl -n rook-ceph get deployment -o j... rook-ceph-mgr-a req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-a req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-b req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-c req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-osd-0 req/upd/avl: 1// rook-version=v1.12.0 rook-ceph-osd-1 req/upd/avl: 1/1/1 rook-version=v1.11.7 rook-ceph-osd-2 req/upd/avl: 1/1/1 rook-version=v1.11.7 An easy check to see if the upgrade is totally finished is to check that there is only one rook-version reported across the cluster. 1 2 3 4 5 6 # kubectl -n $ROOK_CLUSTER_NAMESPACE get deployment -l rook_cluster = $ROOK_CLUSTER_NAMESPACE -o jsonpath = '{range .items[*]}{\"rook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' | sort | uniq This cluster is not yet finished: rook-version=v1.11.7 rook-version=v1.12.0 This cluster is finished: rook-version=v1.12.0 5. Verify the updated cluster \u00b6 At this point, the Rook operator should be running version rook/ceph:v1.12.0 . Verify the CephCluster health using the health verification doc .","title":"Rook Upgrades"},{"location":"Upgrade/rook-upgrade/#supported-versions","text":"This guide is for upgrading from Rook v1.11.x to Rook v1.12.x . Please refer to the upgrade guides from previous releases for supported upgrade paths. Rook upgrades are only supported between official releases. For a guide to upgrade previous versions of Rook, please refer to the version of documentation for those releases. Upgrade 1.10 to 1.11 Upgrade 1.9 to 1.10 Upgrade 1.8 to 1.9 Upgrade 1.7 to 1.8 Upgrade 1.6 to 1.7 Upgrade 1.5 to 1.6 Upgrade 1.4 to 1.5 Upgrade 1.3 to 1.4 Upgrade 1.2 to 1.3 Upgrade 1.1 to 1.2 Upgrade 1.0 to 1.1 Upgrade 0.9 to 1.0 Upgrade 0.8 to 0.9 Upgrade 0.7 to 0.8 Upgrade 0.6 to 0.7 Upgrade 0.5 to 0.6 Important Rook releases from master are expressly unsupported. It is strongly recommended to use official releases of Rook. Unreleased versions from the master branch are subject to changes and incompatibilities that will not be supported in the official releases. Builds from the master branch can have functionality changed or removed at any time without compatibility support and without prior notice.","title":"Supported Versions"},{"location":"Upgrade/rook-upgrade/#breaking-changes-in-v112","text":"The minimum supported version of Kubernetes is v1.22. CephCSI CephFS driver introduced a breaking change in v3.9.0. If any existing CephFS storageclass in the cluster has MountOptions parameter set, follow the steps mentioned in the CephCSI upgrade guide to ensure a smooth upgrade.","title":"Breaking changes in v1.12"},{"location":"Upgrade/rook-upgrade/#considerations","text":"With this upgrade guide, there are a few notes to consider: WARNING : Upgrading a Rook cluster is not without risk. There may be unexpected issues or obstacles that damage the integrity and health the storage cluster, including data loss. The Rook cluster's storage may be unavailable for short periods during the upgrade process for both Rook operator updates and for Ceph version updates. Read this document in full before undertaking a Rook cluster upgrade.","title":"Considerations"},{"location":"Upgrade/rook-upgrade/#patch-release-upgrades","text":"Unless otherwise noted due to extenuating requirements, upgrades from one patch release of Rook to another are as simple as updating the common resources and the image of the Rook operator. For example, when Rook v1.12.1 is released, the process of updating from v1.12.0 is as simple as running the following: 1 2 git clone --single-branch --depth=1 --branch v1.12.1 https://github.com/rook/rook.git cd rook/deploy/examples If the Rook Operator or CephCluster are deployed into a different namespace than rook-ceph , see the Update common resources and CRDs section for instructions on how to change the default namespaces in common.yaml . Then, apply the latest changes from v1.12, and update the Rook Operator image. 1 2 kubectl apply -f common.yaml -f crds.yaml kubectl -n rook-ceph set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:v1.12.1 As exemplified above, it is a good practice to update Rook common resources from the example manifests before any update. The common resources and CRDs might not be updated with every release, but Kubernetes will only apply updates to the ones that changed. Also update optional resources like Prometheus monitoring noted more fully in the upgrade section below .","title":"Patch Release Upgrades"},{"location":"Upgrade/rook-upgrade/#helm","text":"If Rook is installed via the Helm chart, Helm will handle some details of the upgrade itself. The upgrade steps in this guide will clarify what Helm handles automatically. The rook-ceph helm chart upgrade performs the Rook upgrade. The rook-ceph-cluster helm chart upgrade performs a Ceph upgrade if the Ceph image is updated. Note Be sure to update to a supported Helm version","title":"Helm"},{"location":"Upgrade/rook-upgrade/#cluster-health","text":"In order to successfully upgrade a Rook cluster, the following prerequisites must be met: The cluster should be in a healthy state with full functionality. Review the health verification guide in order to verify a CephCluster is in a good starting state. All pods consuming Rook storage should be created, running, and in a steady state.","title":"Cluster Health"},{"location":"Upgrade/rook-upgrade/#rook-operator-upgrade","text":"The examples given in this guide upgrade a live Rook cluster running v1.11.7 to the version v1.12.0 . This upgrade should work from any official patch release of Rook v1.11 to any official patch release of v1.12. Let's get started!","title":"Rook Operator Upgrade"},{"location":"Upgrade/rook-upgrade/#environment","text":"These instructions will work for as long the environment is parameterized correctly. Set the following environment variables, which will be used throughout this document. 1 2 3 # Parameterize the environment export ROOK_OPERATOR_NAMESPACE=rook-ceph export ROOK_CLUSTER_NAMESPACE=rook-ceph","title":"Environment"},{"location":"Upgrade/rook-upgrade/#1-update-common-resources-and-crds","text":"Hint Common resources and CRDs are automatically updated when using Helm charts. First, apply updates to Rook common resources. This includes modified privileges (RBAC) needed by the Operator. Also update the Custom Resource Definitions (CRDs). Get the latest common resources manifests that contain the latest changes. 1 2 git clone --single-branch --depth=1 --branch master https://github.com/rook/rook.git cd rook/deploy/examples If the Rook Operator or CephCluster are deployed into a different namespace than rook-ceph , update the common resource manifests to use your ROOK_OPERATOR_NAMESPACE and ROOK_CLUSTER_NAMESPACE using sed . 1 2 3 4 sed -i.bak \\ -e \"s/\\(.*\\):.*# namespace:operator/\\1: $ROOK_OPERATOR_NAMESPACE # namespace:operator/g\" \\ -e \"s/\\(.*\\):.*# namespace:cluster/\\1: $ROOK_CLUSTER_NAMESPACE # namespace:cluster/g\" \\ common.yaml Apply the resources. 1 kubectl apply -f common.yaml -f crds.yaml","title":"1. Update common resources and CRDs"},{"location":"Upgrade/rook-upgrade/#prometheus-updates","text":"If Prometheus monitoring is enabled, follow this step to upgrade the Prometheus RBAC resources as well. 1 kubectl apply -f deploy/examples/monitoring/rbac.yaml","title":"Prometheus Updates"},{"location":"Upgrade/rook-upgrade/#2-update-the-rook-operator","text":"Hint The operator is automatically updated when using Helm charts. The largest portion of the upgrade is triggered when the operator's image is updated to v1.12.x . When the operator is updated, it will proceed to update all of the Ceph daemons. 1 kubectl -n $ROOK_OPERATOR_NAMESPACE set image deploy/rook-ceph-operator rook-ceph-operator=rook/ceph:master","title":"2. Update the Rook Operator"},{"location":"Upgrade/rook-upgrade/#3-update-ceph-csi","text":"Hint This is automatically updated if custom CSI image versions are not set. Important The minimum supported version of Ceph-CSI is v3.8.0. Update to the latest Ceph-CSI drivers if custom CSI images are specified. See the CSI Custom Images documentation. Note If using snapshots, refer to the Upgrade Snapshot API guide .","title":"3. Update Ceph CSI"},{"location":"Upgrade/rook-upgrade/#4-wait-for-the-upgrade-to-complete","text":"Watch now in amazement as the Ceph mons, mgrs, OSDs, rbd-mirrors, MDSes and RGWs are terminated and replaced with updated versions in sequence. The cluster may be unresponsive very briefly as mons update, and the Ceph Filesystem may fall offline a few times while the MDSes are upgrading. This is normal. The versions of the components can be viewed as they are updated: 1 watch --exec kubectl -n $ROOK_CLUSTER_NAMESPACE get deployments -l rook_cluster=$ROOK_CLUSTER_NAMESPACE -o jsonpath='{range .items[*]}{.metadata.name}{\" \\treq/upd/avl: \"}{.spec.replicas}{\"/\"}{.status.updatedReplicas}{\"/\"}{.status.readyReplicas}{\" \\trook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' As an example, this cluster is midway through updating the OSDs. When all deployments report 1/1/1 availability and rook-version=v1.12.0 , the Ceph cluster's core components are fully updated. 1 2 3 4 5 6 7 8 9 Every 2.0s: kubectl -n rook-ceph get deployment -o j... rook-ceph-mgr-a req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-a req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-b req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-mon-c req/upd/avl: 1/1/1 rook-version=v1.12.0 rook-ceph-osd-0 req/upd/avl: 1// rook-version=v1.12.0 rook-ceph-osd-1 req/upd/avl: 1/1/1 rook-version=v1.11.7 rook-ceph-osd-2 req/upd/avl: 1/1/1 rook-version=v1.11.7 An easy check to see if the upgrade is totally finished is to check that there is only one rook-version reported across the cluster. 1 2 3 4 5 6 # kubectl -n $ROOK_CLUSTER_NAMESPACE get deployment -l rook_cluster = $ROOK_CLUSTER_NAMESPACE -o jsonpath = '{range .items[*]}{\"rook-version=\"}{.metadata.labels.rook-version}{\"\\n\"}{end}' | sort | uniq This cluster is not yet finished: rook-version=v1.11.7 rook-version=v1.12.0 This cluster is finished: rook-version=v1.12.0","title":"4. Wait for the upgrade to complete"},{"location":"Upgrade/rook-upgrade/#5-verify-the-updated-cluster","text":"At this point, the Rook operator should be running version rook/ceph:v1.12.0 . Verify the CephCluster health using the health verification doc .","title":"5. Verify the updated cluster"}]}
\ No newline at end of file
diff --git a/docs/rook/latest/sitemap.xml.gz b/docs/rook/latest/sitemap.xml.gz
index 51d5dced81735b1fb0537b3ae1b1bdf1993183ef..284838f7f850c196af25e9735a98ead78506d697 100644
GIT binary patch
delta 15
WcmaFG{)(MVzMF&N?$?cMPnZEO$OaPt

delta 15
WcmaFG{)(MVzMF%i=IutdC(HmXS_P^A