Commit 65677d28 authored by Sébastien Han's avatar Sébastien Han Committed by mergify-bot
Browse files

ceph: add log collector


We can now collect logs directly into a side-car container.
A new CRD spec has been added:

spec:
  logCollector:
    enabled: true
    periodicity: 24h

Every 24h we will rotate log files for each Ceph daemon.
Signed-off-by: default avatarSébastien Han <seb@redhat.com>
(cherry picked from commit c6a87203)

# Conflicts:
#	Documentation/ceph-cluster-crd.md
#	cluster/examples/kubernetes/ceph/cluster.yaml
#	pkg/operator/ceph/cluster/crash/crash.go
parent 60c6375e
Showing with 249 additions and 50 deletions
+249 -50
......@@ -198,6 +198,13 @@ For more details on the mons and when to choose a number other than `3`, see the
* `modules`: is the list of Ceph manager modules to enable
* `crashCollector`: The settings for crash collector daemon(s).
* `disable`: is set to `true`, the crash collector will not run on any node where a Ceph daemon runs
<<<<<<< HEAD
=======
* `daysToRetain`: specifies the number of days to keep crash entries in the Ceph cluster. By default the entries are kept indefinitely.
* `logCollector`: The settings for log collector daemon.
* `enabled`: if set to `true`, the log collector will run as a side-car next to each Ceph daemon. The Ceph configuration option `log_to_file` will be turned on, meaning Ceph daemons will log on files in addition to still logging to container's stdout. These logs will be rotated. (default: false)
* `periodicity`: how often to rotate daemon's log. (default: 24h). Specified with a time suffix which may be 'h' for hours or 'd' for days. **Rotating too often will slightly impact the daemon's performance since the signal briefly interrupts the program.**
>>>>>>> c6a87203c... ceph: add log collector
* `annotations`: [annotations configuration settings](#annotations-and-labels)
* `labels`: [labels configuration settings](#annotations-and-labels)
* `placement`: [placement configuration settings](#placement-configuration-settings)
......@@ -543,6 +550,7 @@ You can set resource requests/limits for Rook components through the [Resource R
* `crashcollector`: Set resource requests/limits for crash. This pod runs wherever there is a Ceph pod running.
It scrapes for Ceph daemon core dumps and sends them to the Ceph manager crash module so that core dumps are centralized and can be easily listed/accessed.
You can read more about the [Ceph Crash module](https://docs.ceph.com/docs/master/mgr/crash/).
* `logcollector`: Set resource requests/limits for the log collector. When enabled, this container runs as side-car to each Ceph daemons.
* `cleanup`: Set resource requests/limits for cleanup job, responsible for wiping cluster's data after uninstall
In order to provide the best possible experience running Ceph in containers, Rook internally recommends minimum memory limits if resource limits are passed.
......
......@@ -724,7 +724,7 @@ Let's say you want to enable logging for `mon.a`, but only for this daemon.
Using the toolbox or from inside the operator run:
```console
ceph config daemon mon.a log_to_file true
ceph config set mon.a log_to_file true
```
This will activate logging on the filesystem, you will be able to find logs in `dataDirHostPath/$NAMESPACE/log`, so typically this would mean `/var/lib/rook/rook-ceph/log`.
......
......@@ -319,6 +319,13 @@ spec:
x-kubernetes-preserve-unknown-fields: true
tokenSecretName:
type: string
logCollector:
type: object
properties:
enabled:
type: boolean
periodicity:
type: string
annotations:
type: object
nullable: true
......
......@@ -88,8 +88,18 @@ spec:
# enable the crash collector for ceph daemon crash collection
crashCollector:
disable: false
<<<<<<< HEAD
=======
# Uncomment daysToRetain to prune ceph crash entries older than the
# specified number of days.
#daysToRetain: 30
# enable log collector, daemons will log on files and rotate
# logCollector:
# enabled: true
# periodicity: 24h # SUFFIX may be 'h' for hours or 'd' for days.
>>>>>>> c6a87203c... ceph: add log collector
# automate [data cleanup process](https://github.com/rook/rook/blob/master/Documentation/ceph-teardown.md#delete-the-data-on-hosts) in cluster destruction.
cleanupPolicy:
# cleanupPolicy:
# Since cluster cleanup is destructive to data, confirmation is required.
# To destroy all Rook data on hosts during uninstall, confirmation must be set to "yes-really-destroy-data".
# This value should only be set when the cluster is about to be deleted. After the confirmation is set,
......@@ -169,6 +179,7 @@ spec:
# osd:
# prepareosd:
# crashcollector:
# logcollector:
# cleanup:
# The option to automatically remove OSDs that are out and are safe to destroy.
removeOSDsIfOutAndSafeToRemove: false
......
......@@ -321,6 +321,13 @@ spec:
x-kubernetes-preserve-unknown-fields: true
tokenSecretName:
type: string
logCollector:
type: object
properties:
enabled:
type: boolean
periodicity:
type: string
annotations:
type: object
nullable: true
......
......@@ -206,6 +206,7 @@ spec:
type: integer
format: int32
security: {}
logCollector: {}
placement: {}
resources: {}
healthCheck: {}
......
......@@ -34,6 +34,8 @@ const (
ResourcesKeyMDS = "mds"
// ResourcesKeyCrashCollector represents the name of resource in the CR for the crash
ResourcesKeyCrashCollector = "crashcollector"
// ResourcesKeyLogCollector represents the name of resource in the CR for the log
ResourcesKeyLogCollector = "logcollector"
// ResourcesKeyRBDMirror represents the name of resource in the CR for the rbd mirror
ResourcesKeyRBDMirror = "rbdmirror"
// ResourcesKeyCleanup represents the name of resource in the CR for the cleanup
......@@ -65,6 +67,11 @@ func GetCrashCollectorResources(p rook.ResourceSpec) v1.ResourceRequirements {
return p[ResourcesKeyCrashCollector]
}
// GetLogCollectorResources returns the placement for the crash daemon
func GetLogCollectorResources(p rook.ResourceSpec) v1.ResourceRequirements {
return p[ResourcesKeyCrashCollector]
}
// GetCleanupResources returns the placement for the cleanup job
func GetCleanupResources(p rook.ResourceSpec) v1.ResourceRequirements {
return p[ResourcesKeyCleanup]
......
......@@ -132,6 +132,15 @@ type ClusterSpec struct {
// Security represents security settings
Security SecuritySpec `json:"security,omitempty"`
// Logging represents loggings settings
LogCollector LogCollectorSpec `json:"logCollector,omitempty"`
}
// LogCollectorSpec is the logging spec
type LogCollectorSpec struct {
Enabled bool `json:"enabled,omitempty"`
Periodicity string `json:"periodicity,omitempty"`
}
// SecuritySpec is security spec to include various security items such as kms
......
......@@ -1051,6 +1051,7 @@ func (in *ClusterSpec) DeepCopyInto(out *ClusterSpec) {
out.CleanupPolicy = in.CleanupPolicy
in.HealthCheck.DeepCopyInto(&out.HealthCheck)
in.Security.DeepCopyInto(&out.Security)
out.LogCollector = in.LogCollector
return
}
......@@ -1451,6 +1452,22 @@ func (in *KeyManagementServiceSpec) DeepCopy() *KeyManagementServiceSpec {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LogCollectorSpec) DeepCopyInto(out *LogCollectorSpec) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LogCollectorSpec.
func (in *LogCollectorSpec) DeepCopy() *LogCollectorSpec {
if in == nil {
return nil
}
out := new(LogCollectorSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MetadataServerSpec) DeepCopyInto(out *MetadataServerSpec) {
*out = *in
......
......@@ -24,7 +24,6 @@ import (
"github.com/pkg/errors"
cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
"github.com/rook/rook/pkg/operator/ceph/cluster/mon"
"github.com/rook/rook/pkg/operator/ceph/config"
"github.com/rook/rook/pkg/operator/ceph/config/keyring"
"github.com/rook/rook/pkg/operator/ceph/controller"
......@@ -140,7 +139,7 @@ func getCrashDirInitContainer(cephCluster cephv1.CephCluster) corev1.Container {
crashPostedDir,
},
Image: cephCluster.Spec.CephVersion.Image,
SecurityContext: mon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
Resources: cephv1.GetCrashCollectorResources(cephCluster.Spec.Resources),
VolumeMounts: controller.DaemonVolumeMounts(dataPathMap, ""),
}
......@@ -155,7 +154,7 @@ func getCrashChownInitContainer(cephCluster cephv1.CephCluster) corev1.Container
cephCluster.Spec.CephVersion.Image,
controller.DaemonVolumeMounts(dataPathMap, ""),
cephv1.GetCrashCollectorResources(cephCluster.Spec.Resources),
mon.PodSecurityContext(),
controller.PodSecurityContext(),
)
}
......@@ -176,12 +175,44 @@ func getCrashDaemonContainer(cephCluster cephv1.CephCluster, cephVersion version
Env: envVars,
VolumeMounts: volumeMounts,
Resources: cephv1.GetCrashCollectorResources(cephCluster.Spec.Resources),
SecurityContext: mon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
}
return container
}
<<<<<<< HEAD
=======
func getCrashPruneContainer(cephCluster cephv1.CephCluster, cephVersion version.CephVersion) corev1.Container {
cephImage := cephCluster.Spec.CephVersion.Image
envVars := append(controller.DaemonEnvVars(cephImage), generateCrashEnvVar())
dataPathMap := config.NewDatalessDaemonDataPathMap(cephCluster.GetNamespace(), cephCluster.Spec.DataDirHostPath)
volumeMounts := controller.DaemonVolumeMounts(dataPathMap, "")
volumeMounts = append(volumeMounts, keyring.VolumeMount().CrashCollector())
container := corev1.Container{
Name: "ceph-crash-pruner",
Command: []string{
"ceph",
"-n",
crashClient,
"crash",
"prune",
},
Args: []string{
fmt.Sprintf("%d", cephCluster.Spec.CrashCollector.DaysToRetain),
},
Image: cephImage,
Env: envVars,
VolumeMounts: volumeMounts,
Resources: cephv1.GetCrashCollectorResources(cephCluster.Spec.Resources),
SecurityContext: controller.PodSecurityContext(),
}
return container
}
>>>>>>> c6a87203c... ceph: add log collector
func clusterOwnerRef(clusterName, clusterID string) metav1.OwnerReference {
blockOwner := true
return metav1.OwnerReference{
......
......@@ -27,7 +27,6 @@ import (
rookcephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
rookv1 "github.com/rook/rook/pkg/apis/rook.io/v1"
"github.com/rook/rook/pkg/daemon/ceph/client"
"github.com/rook/rook/pkg/operator/ceph/cluster/mon"
"github.com/rook/rook/pkg/operator/ceph/config"
"github.com/rook/rook/pkg/operator/ceph/config/keyring"
"github.com/rook/rook/pkg/operator/ceph/controller"
......@@ -67,6 +66,13 @@ func (c *Cluster) makeDeployment(mgrConfig *mgrConfig) (*apps.Deployment, error)
},
}
// If the log collector is enabled we add the side-car container
if c.spec.LogCollector.Enabled {
shareProcessNamespace := true
podSpec.Spec.ShareProcessNamespace = &shareProcessNamespace
podSpec.Spec.Containers = append(podSpec.Spec.Containers, *controller.LogCollectorContainer(fmt.Sprintf("ceph-mgr.%s", mgrConfig.DaemonID), c.clusterInfo.Namespace, c.spec))
}
// Replace default unreachable node toleration
k8sutil.AddUnreachableNodeToleration(&podSpec.Spec)
......@@ -165,7 +171,7 @@ func (c *Cluster) makeChownInitContainer(mgrConfig *mgrConfig) v1.Container {
c.spec.CephVersion.Image,
controller.DaemonVolumeMounts(mgrConfig.DataPathMap, mgrConfig.ResourceName),
cephv1.GetMgrResources(c.spec.Resources),
mon.PodSecurityContext(),
controller.PodSecurityContext(),
)
}
......@@ -246,7 +252,7 @@ func (c *Cluster) makeMgrDaemonContainer(mgrConfig *mgrConfig) v1.Container {
c.cephMgrOrchestratorModuleEnvs()...,
),
Resources: cephv1.GetMgrResources(c.spec.Resources),
SecurityContext: mon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
LivenessProbe: getDefaultMgrLivenessProbe(),
}
......
......@@ -236,7 +236,7 @@ func (c *Cluster) startMons(targetCount int) error {
// only once and do it as early as possible in the mon orchestration.
setConfigsNeedsRetry := false
if existingCount > 0 {
err := config.SetDefaultConfigs(c.context, c.ClusterInfo, c.spec.Network)
err := config.SetDefaultConfigs(c.context, c.ClusterInfo, c.spec)
if err != nil {
// If we fail here, it could be because the mons are not healthy, and this might be
// fixed by updating the mon deployments. Instead of returning error here, log a
......@@ -262,7 +262,7 @@ func (c *Cluster) startMons(targetCount int) error {
// values in the config database. Do this only when the existing count is zero so that
// this is only done once when the cluster is created.
if existingCount == 0 {
err := config.SetDefaultConfigs(c.context, c.ClusterInfo, c.spec.Network)
err := config.SetDefaultConfigs(c.context, c.ClusterInfo, c.spec)
if err != nil {
return errors.Wrap(err, "failed to set Rook and/or user-defined Ceph config options after creating the first mon")
}
......@@ -270,7 +270,7 @@ func (c *Cluster) startMons(targetCount int) error {
// Or if we need to retry, only do this when we are on the first iteration of the
// loop. This could be in the same if statement as above, but separate it to get a
// different error message.
err := config.SetDefaultConfigs(c.context, c.ClusterInfo, c.spec.Network)
err := config.SetDefaultConfigs(c.context, c.ClusterInfo, c.spec)
if err != nil {
return errors.Wrap(err, "failed to set Rook and/or user-defined Ceph config options after updating the existing mons")
}
......@@ -284,7 +284,7 @@ func (c *Cluster) startMons(targetCount int) error {
}
if setConfigsNeedsRetry {
err := config.SetDefaultConfigs(c.context, c.ClusterInfo, c.spec.Network)
err := config.SetDefaultConfigs(c.context, c.ClusterInfo, c.spec)
if err != nil {
return errors.Wrap(err, "failed to set Rook and/or user-defined Ceph config options after forcefully updating the existing mons")
}
......
......@@ -18,7 +18,6 @@ package mon
import (
"fmt"
"os"
"path"
"strings"
......@@ -179,6 +178,13 @@ func (c *Cluster) makeMonPod(monConfig *monConfig, canary bool) (*v1.Pod, error)
PriorityClassName: cephv1.GetMonPriorityClassName(c.spec.PriorityClassNames),
}
// If the log collector is enabled we add the side-car container
if c.spec.LogCollector.Enabled {
shareProcessNamespace := true
podSpec.ShareProcessNamespace = &shareProcessNamespace
podSpec.Containers = append(podSpec.Containers, *controller.LogCollectorContainer(fmt.Sprintf("%s.%s", cephMonCommand, monConfig.DaemonName), c.ClusterInfo.Namespace, c.spec))
}
// Replace default unreachable node toleration
if c.monVolumeClaimTemplate(monConfig) != nil {
k8sutil.AddUnreachableNodeToleration(&podSpec)
......@@ -220,25 +226,13 @@ func (c *Cluster) makeMonPod(monConfig *monConfig, canary bool) (*v1.Pod, error)
// Init and daemon containers require the same context, so we call it 'pod' context
// PodSecurityContext detects if the pod needs privileges to run
func PodSecurityContext() *v1.SecurityContext {
privileged := false
if os.Getenv("ROOK_HOSTPATH_REQUIRES_PRIVILEGED") == "true" {
privileged = true
}
return &v1.SecurityContext{
Privileged: &privileged,
}
}
func (c *Cluster) makeChownInitContainer(monConfig *monConfig) v1.Container {
return controller.ChownCephDataDirsInitContainer(
*monConfig.DataPathMap,
c.spec.CephVersion.Image,
controller.DaemonVolumeMounts(monConfig.DataPathMap, keyringStoreName),
cephv1.GetMonResources(c.spec.Resources),
PodSecurityContext(),
controller.PodSecurityContext(),
)
}
......@@ -257,7 +251,7 @@ func (c *Cluster) makeMonFSInitContainer(monConfig *monConfig) v1.Container {
),
Image: c.spec.CephVersion.Image,
VolumeMounts: controller.DaemonVolumeMounts(monConfig.DataPathMap, keyringStoreName),
SecurityContext: PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
// filesystem creation does not require ports to be exposed
Env: controller.DaemonEnvVars(c.spec.CephVersion.Image),
Resources: cephv1.GetMonResources(c.spec.Resources),
......@@ -297,7 +291,7 @@ func (c *Cluster) makeMonDaemonContainer(monConfig *monConfig) v1.Container {
),
Image: c.spec.CephVersion.Image,
VolumeMounts: controller.DaemonVolumeMounts(monConfig.DataPathMap, keyringStoreName),
SecurityContext: PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
Ports: []v1.ContainerPort{
{
Name: "tcp-msgr1",
......
......@@ -26,7 +26,6 @@ import (
"github.com/pkg/errors"
cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
kms "github.com/rook/rook/pkg/daemon/ceph/osd/kms"
opmon "github.com/rook/rook/pkg/operator/ceph/cluster/mon"
"github.com/rook/rook/pkg/operator/ceph/cluster/osd/config"
opconfig "github.com/rook/rook/pkg/operator/ceph/config"
"github.com/rook/rook/pkg/operator/ceph/controller"
......@@ -546,6 +545,13 @@ func (c *Cluster) makeDeployment(osdProps osdProperties, osd OSDInfo, provisionC
},
}
// If the log collector is enabled we add the side-car container
if c.spec.LogCollector.Enabled {
shareProcessNamespace := true
podTemplateSpec.Spec.ShareProcessNamespace = &shareProcessNamespace
podTemplateSpec.Spec.Containers = append(podTemplateSpec.Spec.Containers, *controller.LogCollectorContainer(fmt.Sprintf("ceph-osd.%s", osdID), c.clusterInfo.Namespace, c.spec))
}
// If the liveness probe is enabled
podTemplateSpec.Spec.Containers[0] = opconfig.ConfigureLivenessProbe(cephv1.KeyOSD, podTemplateSpec.Spec.Containers[0], c.spec.HealthCheck)
......@@ -713,7 +719,7 @@ func (c *Cluster) getPVCInitContainer(osdProps osdProperties) v1.Container {
},
},
VolumeMounts: []v1.VolumeMount{getPvcOSDBridgeMount(osdProps.pvc.ClaimName)},
SecurityContext: opmon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
Resources: osdProps.resources,
}
}
......@@ -744,7 +750,7 @@ func (c *Cluster) getPVCInitContainerActivate(mountPath string, osdProps osdProp
},
},
VolumeMounts: []v1.VolumeMount{getPvcOSDBridgeMountActivate(mountPath, osdProps.pvc.ClaimName)},
SecurityContext: opmon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
Resources: osdProps.resources,
}
}
......@@ -843,7 +849,7 @@ func (c *Cluster) generateEncryptionCopyBlockContainer(resources v1.ResourceRequ
// volumeMountPVCName is crucial, especially when the block we copy is the metadata block
// its value must be the name of the block PV so that all init containers use the same bridge (the emptyDir shared by all the init containers)
VolumeMounts: []v1.VolumeMount{getPvcOSDBridgeMountActivate(mountPath, volumeMountPVCName), getDeviceMapperMount()},
SecurityContext: opmon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
Resources: resources,
}
}
......@@ -889,7 +895,7 @@ func (c *Cluster) getPVCMetadataInitContainer(mountPath string, osdProps osdProp
Name: fmt.Sprintf("%s-bridge", osdProps.metadataPVC.ClaimName),
},
},
SecurityContext: opmon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
Resources: osdProps.resources,
}
}
......@@ -922,7 +928,7 @@ func (c *Cluster) getPVCMetadataInitContainerActivate(mountPath string, osdProps
// We need to call getPvcOSDBridgeMountActivate() so that we can copy the metadata block into the "main" empty dir
// This empty dir is passed along every init container
VolumeMounts: []v1.VolumeMount{getPvcOSDBridgeMountActivate(mountPath, osdProps.pvc.ClaimName)},
SecurityContext: opmon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
Resources: osdProps.resources,
}
}
......@@ -947,7 +953,7 @@ func (c *Cluster) getPVCWalInitContainer(mountPath string, osdProps osdPropertie
Name: fmt.Sprintf("%s-bridge", osdProps.walPVC.ClaimName),
},
},
SecurityContext: opmon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
Resources: osdProps.resources,
}
}
......@@ -980,7 +986,7 @@ func (c *Cluster) getPVCWalInitContainerActivate(mountPath string, osdProps osdP
// We need to call getPvcOSDBridgeMountActivate() so that we can copy the wal block into the "main" empty dir
// This empty dir is passed along every init container
VolumeMounts: []v1.VolumeMount{getPvcOSDBridgeMountActivate(mountPath, osdProps.pvc.ClaimName)},
SecurityContext: opmon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
Resources: osdProps.resources,
}
}
......
......@@ -20,7 +20,6 @@ import (
"fmt"
cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
"github.com/rook/rook/pkg/operator/ceph/cluster/mon"
"github.com/rook/rook/pkg/operator/ceph/config"
"github.com/rook/rook/pkg/operator/ceph/controller"
"github.com/rook/rook/pkg/operator/k8sutil"
......@@ -48,6 +47,14 @@ func (r *ReconcileCephRBDMirror) makeDeployment(daemonConfig *daemonConfig, rbdM
PriorityClassName: rbdMirror.Spec.PriorityClassName,
},
}
// If the log collector is enabled we add the side-car container
if r.cephClusterSpec.LogCollector.Enabled {
shareProcessNamespace := true
podSpec.Spec.ShareProcessNamespace = &shareProcessNamespace
podSpec.Spec.Containers = append(podSpec.Spec.Containers, *controller.LogCollectorContainer(fmt.Sprintf("ceph-client.rbd-mirror.%s", daemonConfig.DaemonID), r.clusterInfo.Namespace, *r.cephClusterSpec))
}
// Replace default unreachable node toleration
k8sutil.AddUnreachableNodeToleration(&podSpec.Spec)
rbdMirror.Spec.Annotations.ApplyToObjectMeta(&podSpec.ObjectMeta)
......@@ -102,7 +109,7 @@ func (r *ReconcileCephRBDMirror) makeChownInitContainer(daemonConfig *daemonConf
r.cephClusterSpec.CephVersion.Image,
controller.DaemonVolumeMounts(daemonConfig.DataPathMap, daemonConfig.ResourceName),
rbdMirror.Spec.Resources,
mon.PodSecurityContext(),
controller.PodSecurityContext(),
)
}
......@@ -121,7 +128,7 @@ func (r *ReconcileCephRBDMirror) makeMirroringDaemonContainer(daemonConfig *daem
VolumeMounts: controller.DaemonVolumeMounts(daemonConfig.DataPathMap, daemonConfig.ResourceName),
Env: controller.DaemonEnvVars(r.cephClusterSpec.CephVersion.Image),
Resources: rbdMirror.Spec.Resources,
SecurityContext: mon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
// TODO:
// Not implemented at this point since the socket name is '/run/ceph/ceph-client.rbd-mirror.a.1.94362516231272.asok'
// Also the command to run will be:
......
......@@ -103,7 +103,7 @@ func NewFlag(key, value string) string {
func SetDefaultConfigs(
context *clusterd.Context,
clusterInfo *cephclient.ClusterInfo,
networkSpec cephv1.NetworkSpec,
clusterSpec cephv1.ClusterSpec,
) error {
// ceph.conf is never used. All configurations are made in the centralized mon config database,
// or they are specified on the commandline when daemons are called.
......@@ -113,14 +113,37 @@ func SetDefaultConfigs(
return errors.Wrapf(err, "failed to apply default Ceph configurations")
}
// When enabled the collector will logrotate logs from files
if clusterSpec.LogCollector.Enabled {
// Override "log file" for existing clusters since it is empty
logOptions := []Option{
configOverride("global", "log file", "/var/log/ceph/$cluster-$name.log"),
configOverride("global", "log to file", "true"),
}
if err := monStore.SetAll(logOptions...); err != nil {
return errors.Wrapf(err, "failed to apply logging configuration for log collector")
}
// If the log collector is disabled we do not log to file since we collect nothing
} else {
logOptions := []Option{
configOverride("global", "log file", ""),
configOverride("global", "log to file", "false"),
}
if err := monStore.SetAll(logOptions...); err != nil {
return errors.Wrapf(err, "failed to apply logging configuration")
}
}
if err := monStore.SetAll(DefaultLegacyConfigs()...); err != nil {
return errors.Wrapf(err, "failed to apply legacy config overrides")
}
// Apply Multus if needed
if networkSpec.IsMultus() {
if clusterSpec.Network.IsMultus() {
logger.Info("configuring ceph network(s) with multus")
cephNetworks, err := generateNetworkSettings(context, clusterInfo.Namespace, networkSpec.Selectors)
cephNetworks, err := generateNetworkSettings(context, clusterInfo.Namespace, clusterSpec.Network.Selectors)
if err != nil {
return errors.Wrap(err, "failed to generate network settings")
}
......
......@@ -60,7 +60,6 @@ func LoggingFlags() []string {
func DefaultCentralizedConfigs(cephVersion version.CephVersion) []Option {
overrides := []Option{
configOverride("global", "mon allow pool delete", "true"),
configOverride("global", "log file", ""),
configOverride("global", "mon cluster log file", ""),
}
......
......@@ -19,7 +19,9 @@ package controller
import (
"fmt"
"os"
"path"
"strings"
"github.com/coreos/pkg/capnslog"
"github.com/pkg/errors"
......@@ -42,6 +44,7 @@ const (
daemonSocketDir = "/run/ceph"
initialDelaySecondsNonOSDDaemon int32 = 10
initialDelaySecondsOSDDaemon int32 = 45
logCollector = "log-collector"
)
type daemonConfig struct {
......@@ -51,6 +54,32 @@ type daemonConfig struct {
var logger = capnslog.NewPackageLogger("github.com/rook/rook", "ceph-spec")
var (
cronLogRotate = `
set -xe
CEPH_CLIENT_ID=%s
PERIODICITY=%s
LOG_ROTATE_CEPH_FILE=/etc/logrotate.d/ceph
if [ -z "$PERIODICITY" ]; then
PERIODICITY=24h
fi
# edit the logrotate file to only rotate a specific daemon log
# otherwise we will logrotate log files without reloading certain daemons
# this might happen when multiple daemons run on the same machine
sed -i "s|*.log|$CEPH_CLIENT_ID.log|" "$LOG_ROTATE_CEPH_FILE"
while true; do
sleep "$PERIODICITY"
echo "starting log rotation"
logrotate --verbose --force "$LOG_ROTATE_CEPH_FILE"
echo "I am going to sleep now, see you in $PERIODICITY"
done
`
)
// return the volume and matching volume mount for mounting the config override ConfigMap into
// containers as "/etc/ceph/ceph.conf".
func configOverrideConfigMapVolumeAndMount() (v1.Volume, v1.VolumeMount) {
......@@ -538,3 +567,35 @@ func (c *daemonConfig) buildAdminSocketCommand() string {
return command
}
// PodSecurityContext detects if the pod needs privileges to run
func PodSecurityContext() *v1.SecurityContext {
privileged := false
if os.Getenv("ROOK_HOSTPATH_REQUIRES_PRIVILEGED") == "true" {
privileged = true
}
return &v1.SecurityContext{
Privileged: &privileged,
}
}
// LogCollectorContainer runs a cron job to rotate logs
func LogCollectorContainer(daemonID, ns string, c cephv1.ClusterSpec) *v1.Container {
return &v1.Container{
Name: logCollectorContainerName(daemonID),
Command: []string{
"/bin/bash",
"-c",
fmt.Sprintf(cronLogRotate, daemonID, c.LogCollector.Periodicity),
},
Image: c.CephVersion.Image,
VolumeMounts: DaemonVolumeMounts(config.NewDatalessDaemonDataPathMap(ns, c.DataDirHostPath), ""),
SecurityContext: PodSecurityContext(),
Resources: cephv1.GetLogCollectorResources(c.Resources),
}
}
func logCollectorContainerName(daemon string) string {
return fmt.Sprintf("%s-%s", strings.Replace(daemon, ".", "-", -1), logCollector)
}
......@@ -22,7 +22,6 @@ import (
"github.com/pkg/errors"
"github.com/rook/rook/pkg/clusterd"
"github.com/rook/rook/pkg/operator/ceph/cluster/mon"
"github.com/rook/rook/pkg/operator/ceph/config"
"github.com/rook/rook/pkg/operator/ceph/controller"
"github.com/rook/rook/pkg/operator/k8sutil"
......@@ -60,6 +59,13 @@ func (c *Cluster) makeDeployment(mdsConfig *mdsConfig) (*apps.Deployment, error)
// Replace default unreachable node toleration
k8sutil.AddUnreachableNodeToleration(&podSpec.Spec)
// If the log collector is enabled we add the side-car container
if c.clusterSpec.LogCollector.Enabled {
shareProcessNamespace := true
podSpec.Spec.ShareProcessNamespace = &shareProcessNamespace
podSpec.Spec.Containers = append(podSpec.Spec.Containers, *controller.LogCollectorContainer(fmt.Sprintf("ceph-mds.%s", mdsConfig.DaemonID), c.clusterInfo.Namespace, *c.clusterSpec))
}
c.fs.Spec.MetadataServer.Annotations.ApplyToObjectMeta(&podSpec.ObjectMeta)
c.fs.Spec.MetadataServer.Labels.ApplyToObjectMeta(&podSpec.ObjectMeta)
c.fs.Spec.MetadataServer.Placement.ApplyToPodSpec(&podSpec.Spec)
......@@ -105,7 +111,7 @@ func (c *Cluster) makeChownInitContainer(mdsConfig *mdsConfig) v1.Container {
c.clusterSpec.CephVersion.Image,
controller.DaemonVolumeMounts(mdsConfig.DataPathMap, mdsConfig.ResourceName),
c.fs.Spec.MetadataServer.Resources,
mon.PodSecurityContext(),
controller.PodSecurityContext(),
)
}
......@@ -125,7 +131,7 @@ func (c *Cluster) makeMdsDaemonContainer(mdsConfig *mdsConfig) v1.Container {
VolumeMounts: controller.DaemonVolumeMounts(mdsConfig.DataPathMap, mdsConfig.ResourceName),
Env: controller.DaemonEnvVars(c.clusterSpec.CephVersion.Image),
Resources: c.fs.Spec.MetadataServer.Resources,
SecurityContext: mon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
LivenessProbe: controller.GenerateLivenessProbeExecDaemon(config.MdsType, mdsConfig.DaemonID),
}
......
......@@ -22,7 +22,6 @@ import (
"github.com/pkg/errors"
cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
cephclient "github.com/rook/rook/pkg/daemon/ceph/client"
"github.com/rook/rook/pkg/operator/ceph/cluster/mon"
"github.com/rook/rook/pkg/operator/ceph/config/keyring"
"github.com/rook/rook/pkg/operator/ceph/controller"
"github.com/rook/rook/pkg/operator/k8sutil"
......@@ -185,7 +184,7 @@ func (r *ReconcileCephNFS) connectionConfigInitContainer(nfs *cephv1.CephNFS, na
keyring.VolumeMount().Resource(instanceName(nfs, name)),
},
nfs.Spec.Server.Resources,
mon.PodSecurityContext(),
controller.PodSecurityContext(),
)
}
......@@ -218,7 +217,7 @@ func (r *ReconcileCephNFS) daemonContainer(nfs *cephv1.CephNFS, cfg daemonConfig
},
Env: controller.DaemonEnvVars(r.cephClusterSpec.CephVersion.Image),
Resources: nfs.Spec.Server.Resources,
SecurityContext: mon.PodSecurityContext(),
SecurityContext: controller.PodSecurityContext(),
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment