Commit e1a76bfb authored by Santosh Pillai's avatar Santosh Pillai
Browse files

ceph: Cluster CleanupPolicy: Delete correct mon directories under the dataDirHostPath


In case of multiple clusters, we don't want to delete all the mon directories under the dataDirHostPath during cluster cleanup.
This PR deletes the mon directory only if the montior secret key matches.
Signed-off-by: default avatarSantosh Pillai <sapillai@redhat.com>
parent 843390a4
Showing with 141 additions and 29 deletions
+141 -29
......@@ -17,12 +17,14 @@ limitations under the License.
package ceph
import (
"io/ioutil"
"os"
"path"
"path/filepath"
"github.com/pkg/errors"
"github.com/rook/rook/cmd/rook/rook"
"github.com/rook/rook/pkg/operator/ceph/cluster/mon"
"github.com/rook/rook/pkg/util/flags"
"github.com/spf13/cobra"
)
......@@ -30,6 +32,7 @@ import (
var (
dataDirHostPath string
namespaceDir string
monSecret string
)
var cleanUpCmd = &cobra.Command{
......@@ -40,6 +43,7 @@ var cleanUpCmd = &cobra.Command{
func init() {
cleanUpCmd.Flags().StringVar(&dataDirHostPath, "data-dir-host-path", "", "dataDirHostPath on the node")
cleanUpCmd.Flags().StringVar(&namespaceDir, "namespace-dir", "", "dataDirHostPath on the node")
cleanUpCmd.Flags().StringVar(&monSecret, "mon-secret", "", "monitor secret from the keyring")
flags.SetFlagsFromEnv(cleanUpCmd.Flags(), rook.RookEnvVarPrefix)
cleanUpCmd.RunE = startCleanUp
}
......@@ -58,21 +62,58 @@ func startCleanUp(cmd *cobra.Command, args []string) error {
logger.Infof("successfully cleaned up %q directory", cleanupDirPath)
}
// Remove all the mon directories.
monDirs, err := filepath.Glob(path.Join(dataDirHostPath, "mon-*"))
cleanMonDirs()
}
return nil
}
func cleanMonDirs() {
monDirs, err := filepath.Glob(path.Join(dataDirHostPath, "mon-*"))
if err != nil {
logger.Errorf("failed to find the mon directories on the dataDirHostPath %q. %v", dataDirHostPath, err)
return
}
if len(monDirs) == 0 {
logger.Infof("no mon directories are available for clean up in the dataDirHostPath %q", dataDirHostPath)
return
}
for _, monDir := range monDirs {
// Clean up mon directory only if mon secret matches with that in the keyring file.
deleteMonDir, err := secretKeyMatch(monDir, monSecret)
if err != nil {
return errors.Wrapf(err, "failed to clean up mon directories on dataDirHostPath %q", dataDirHostPath)
logger.Errorf("failed to clean up the mon directory %q on the dataDirHostPath %q. %v", monDir, dataDirHostPath, err)
continue
}
if len(monDirs) > 0 {
for _, monDir := range monDirs {
if err := os.RemoveAll(monDir); err != nil {
logger.Errorf("failed to clean up mon directory %q on dataDirHostPath. %v", monDir, err)
} else {
logger.Infof("successfully cleaned up mon directory %q on dataDirHostPath", monDir)
}
if deleteMonDir {
if err := os.RemoveAll(monDir); err != nil {
logger.Errorf("failed to clean up the mon directory %q on the dataDirHostPath %q. %v", monDir, dataDirHostPath, err)
} else {
logger.Infof("successfully cleaned up the mon directory %q on the dataDirHostPath %q", monDir, dataDirHostPath)
}
logger.Info("completed clean up of the mon directories in the dataDirHostPath")
} else {
logger.Infof("skipped clean up of the mon directory %q as the secret key did not match", monDir)
}
}
return nil
return
}
func secretKeyMatch(monDir, monSecret string) (bool, error) {
keyringDirPath := path.Join(monDir, "/data/keyring")
if _, err := os.Stat(keyringDirPath); os.IsNotExist(err) {
return false, errors.Wrapf(err, "failed to read keyring %q for the mon directory %q", keyringDirPath, monDir)
}
contents, err := ioutil.ReadFile(filepath.Clean(keyringDirPath))
if err != nil {
return false, errors.Wrapf(err, "failed to read keyring %q for the mon directory %q", keyringDirPath, monDir)
}
extractedKey, err := mon.ExtractKey(string(contents))
if err != nil {
return false, errors.Wrapf(err, "failed to extract secret key from the keyring %q for the mon directory %q", keyringDirPath, monDir)
}
return monSecret == extractedKey, nil
}
......@@ -46,9 +46,10 @@ var (
volumeName = "cleanup-volume"
dataDirHostPath = "ROOK_DATA_DIR_HOST_PATH"
namespaceDir = "ROOK_NAMESPACE_DIR"
monitorSecret = "ROOK_MON_SECRET"
)
func (c *ClusterController) startClusterCleanUp(cluster *cephv1.CephCluster, cephHosts []string) {
func (c *ClusterController) startClusterCleanUp(cluster *cephv1.CephCluster, cephHosts []string, monSecret string) {
logger.Infof("starting clean up for cluster %q", cluster.Name)
err := c.waitForCephDaemonCleanUp(cluster, time.Duration(clusterCleanUpPolicyRetryInterval)*time.Second)
if err != nil {
......@@ -56,14 +57,14 @@ func (c *ClusterController) startClusterCleanUp(cluster *cephv1.CephCluster, cep
return
}
c.startCleanUpJobs(cluster, cephHosts)
c.startCleanUpJobs(cluster, cephHosts, monSecret)
}
func (c *ClusterController) startCleanUpJobs(cluster *cephv1.CephCluster, cephHosts []string) {
func (c *ClusterController) startCleanUpJobs(cluster *cephv1.CephCluster, cephHosts []string, monSecret string) {
for _, hostName := range cephHosts {
logger.Infof("starting clean up job on node %q", hostName)
jobName := k8sutil.TruncateNodeName("cluster-cleanup-job-%s", hostName)
podSpec := c.cleanUpJobTemplateSpec(cluster)
podSpec := c.cleanUpJobTemplateSpec(cluster, monSecret)
podSpec.Spec.NodeSelector = map[string]string{v1.LabelHostname: hostName}
labels := controller.AppLabels(CleanupAppName, cluster.Namespace)
labels[CleanupAppName] = "true"
......@@ -84,7 +85,7 @@ func (c *ClusterController) startCleanUpJobs(cluster *cephv1.CephCluster, cephHo
}
}
func (c *ClusterController) cleanUpJobContainer(cluster *cephv1.CephCluster) v1.Container {
func (c *ClusterController) cleanUpJobContainer(cluster *cephv1.CephCluster, monSecret string) v1.Container {
volumeMounts := []v1.VolumeMount{}
envVars := []v1.EnvVar{}
if cluster.Spec.CleanupPolicy.DeleteDataDirOnHosts != "" && cluster.Spec.DataDirHostPath != "" {
......@@ -93,6 +94,7 @@ func (c *ClusterController) cleanUpJobContainer(cluster *cephv1.CephCluster) v1.
envVars = append(envVars, []v1.EnvVar{
{Name: dataDirHostPath, Value: cluster.Spec.DataDirHostPath},
{Name: namespaceDir, Value: cluster.Namespace},
{Name: monitorSecret, Value: monSecret},
}...)
}
container := v1.Container{
......@@ -106,7 +108,7 @@ func (c *ClusterController) cleanUpJobContainer(cluster *cephv1.CephCluster) v1.
return container
}
func (c *ClusterController) cleanUpJobTemplateSpec(cluster *cephv1.CephCluster) v1.PodTemplateSpec {
func (c *ClusterController) cleanUpJobTemplateSpec(cluster *cephv1.CephCluster, monSecret string) v1.PodTemplateSpec {
volumes := []v1.Volume{}
hostPathVolume := v1.Volume{Name: volumeName, VolumeSource: v1.VolumeSource{HostPath: &v1.HostPathVolumeSource{Path: cluster.Spec.DataDirHostPath}}}
volumes = append(volumes, hostPathVolume)
......@@ -117,7 +119,7 @@ func (c *ClusterController) cleanUpJobTemplateSpec(cluster *cephv1.CephCluster)
},
Spec: v1.PodSpec{
Containers: []v1.Container{
c.cleanUpJobContainer(cluster),
c.cleanUpJobContainer(cluster, monSecret),
},
Volumes: volumes,
RestartPolicy: v1.RestartPolicyOnFailure,
......@@ -182,3 +184,12 @@ func (c *ClusterController) getCephHosts(namespace string) ([]string, error) {
return hostNameList, nil
}
func (c *ClusterController) getMonSecret(namespace string) (string, error) {
clusterInfo, _, _, err := mon.LoadClusterInfo(c.context, namespace)
if err != nil {
return "", errors.Wrap(err, "failed to get cluster info")
}
return clusterInfo.MonitorSecret, nil
}
......@@ -61,7 +61,7 @@ func TestCleanUpJobSpec(t *testing.T) {
},
}
controller := NewClusterController(context, "", &attachment.MockAttachment{}, operatorConfigCallbacks, addCallbacks)
podTemplateSpec := controller.cleanUpJobTemplateSpec(cluster)
podTemplateSpec := controller.cleanUpJobTemplateSpec(cluster, "monSecret")
assert.Equal(t, expectedHostPath, podTemplateSpec.Spec.Containers[0].Env[0].Value)
assert.Equal(t, expectedNamespace, podTemplateSpec.Spec.Containers[0].Env[1].Value)
}
......@@ -575,20 +575,26 @@ func (c *ClusterController) onUpdate(oldObj, newObj interface{}) {
// K8s will only delete the crd and child resources when the finalizers have been removed from the crd.
if newClust.DeletionTimestamp != nil {
logger.Infof("cluster %q has a deletion timestamp", newClust.Namespace)
err := c.handleDelete(newClust, time.Duration(clusterDeleteRetryInterval)*time.Second)
if err != nil {
logger.Errorf("failed finalizer for cluster. %v", err)
return
}
// Start cluster clean up only if cleanupPolicy is applied to the ceph cluster
if hasCleanupPolicy(newClust) {
monSecret, err := c.getMonSecret(newClust.Namespace)
if err != nil {
logger.Errorf("failed to clean up cluster. %v", err)
return
}
cephHosts, err := c.getCephHosts(newClust.Namespace)
if err != nil {
logger.Errorf("failed to find valid ceph hosts in the cluster %q. %v", newClust.Namespace, err)
return
}
go c.startClusterCleanUp(newClust, cephHosts)
go c.startClusterCleanUp(newClust, cephHosts, monSecret)
}
err = c.handleDelete(newClust, time.Duration(clusterDeleteRetryInterval)*time.Second)
if err != nil {
logger.Errorf("failed finalizer for cluster. %v", err)
return
}
// remove the finalizer from the crd, which indicates to k8s that the resource can safely be deleted
......
......@@ -258,10 +258,11 @@ func genSecret(executor exec.Executor, configDir, name string, args []string) (s
if err != nil {
return "", errors.Wrapf(err, "failed to read secret file")
}
return extractKey(string(contents))
return ExtractKey(string(contents))
}
func extractKey(contents string) (string, error) {
// ExtractKey retrives mon secret key from the keyring file
func ExtractKey(contents string) (string, error) {
secret := ""
slice := strings.Fields(sys.Grep(string(contents), "key"))
if len(slice) >= 3 {
......
......@@ -572,8 +572,14 @@ func (h *CephInstaller) UninstallRookFromMultipleNS(systemNamespace string, name
nodes, err := h.GetNodeHostnames()
checkError(h.T(), err, "cannot get node names")
for _, node := range nodes {
err = h.cleanupDir(node, h.hostPathToDelete)
logger.Infof("removing %s from node %s. err=%v", h.hostPathToDelete, node, err)
if h.cleanupHost {
err = h.verifyDirCleanup(node, h.hostPathToDelete)
logger.Infof("verifying clean up of %s from node %s. err=%v", h.hostPathToDelete, node, err)
assert.NoError(h.T(), err)
} else {
err = h.cleanupDir(node, h.hostPathToDelete)
logger.Infof("removing %s from node %s. err=%v", h.hostPathToDelete, node, err)
}
}
}
if h.changeHostnames {
......@@ -650,6 +656,12 @@ func (h *CephInstaller) cleanupDir(node, dir string) error {
return err
}
func (h *CephInstaller) verifyDirCleanup(node, dir string) error {
resources := h.Manifests.GetCleanupVerificationPod(node, dir)
_, err := h.k8shelper.KubectlWithStdin(resources, createFromStdinArgs...)
return err
}
func (h *CephInstaller) CollectOperatorLog(suiteName, testName, namespace string) {
if !h.T().Failed() && Env.Logs != "all" {
return
......
......@@ -39,6 +39,7 @@ type CephManifests interface {
GetRookExternalCluster(settings *ClusterExternalSettings) string
GetRookToolBox(namespace string) string
GetCleanupPod(node, removalDir string) string
GetCleanupVerificationPod(node, hostDirPath string) string
GetBlockPoolDef(poolName, namespace, replicaSize string) string
GetBlockStorageClassDef(csi bool, poolName, storageClassName, reclaimPolicy, namespace, systemNamespace string) string
GetFileStorageClassDef(fsName, storageClassName, namespace string) string
......@@ -2054,6 +2055,41 @@ spec:
path: ` + removalDir
}
// GetCleanupVerificationPod asserts that the dataDirHostPath is empty
func (m *CephManifestsMaster) GetCleanupVerificationPod(node, hostPathDir string) string {
return `apiVersion: batch/v1
kind: Job
metadata:
name: rook-verify-cleanup-` + uuid.Must(uuid.NewRandom()).String() + `
spec:
template:
spec:
restartPolicy: Never
containers:
- name: rook-cleaner
image: rook/ceph:` + m.imageTag + `
securityContext:
privileged: true
volumeMounts:
- name: cleaner
mountPath: /scrub
command:
- "sh"
- "-c"
- |
set -xEeuo pipefail
#Assert dataDirHostPath is empty
if [ "$(ls -A /scrub/)" ]; then
exit 1
fi
nodeSelector:
kubernetes.io/hostname: ` + node + `
volumes:
- name: cleaner
hostPath:
path: ` + hostPathDir
}
func (m *CephManifestsMaster) GetBlockPoolDef(poolName string, namespace string, replicaSize string) string {
return `apiVersion: ceph.rook.io/v1
kind: CephBlockPool
......
......@@ -1698,6 +1698,11 @@ spec:
path: ` + removalDir
}
// GetCleanupVerificationPod asserts that the dataDirHostPath is empty
func (m *CephManifestsV1_1) GetCleanupVerificationPod(node, hostPathDir string) string {
return ""
}
func (m *CephManifestsV1_1) GetBlockPoolDef(poolName string, namespace string, replicaSize string) string {
return `apiVersion: ceph.rook.io/v1
kind: CephBlockPool
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment