From a9df6d7c8f659709297c52bf50fcb0ea273660b6 Mon Sep 17 00:00:00 2001
From: Kris Hicks <khicks@hashicorp.com>
Date: Mon, 8 Mar 2021 12:40:20 -0800
Subject: [PATCH] WIP

---
 drivers/docker/driver.go | 63 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go
index 2fa3cc24db..b823327e69 100644
--- a/drivers/docker/driver.go
+++ b/drivers/docker/driver.go
@@ -28,6 +28,9 @@ import (
 	"github.com/hashicorp/nomad/plugins/base"
 	"github.com/hashicorp/nomad/plugins/drivers"
 	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
+	"github.com/opencontainers/runc/libcontainer/cgroups"
+	cgroupsfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
+	libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/ryanuber/go-glob"
 )
 
@@ -244,6 +247,10 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
 	return nil
 }
 
+func writeCGroupFile(path string, val int) error {
+	return ioutil.WriteFile(path, []byte(strconv.Itoa(val)), 0700)
+}
+
 func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) {
 	if _, ok := d.tasks.Get(cfg.ID); ok {
 		return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID)
@@ -289,6 +296,58 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drive
 		return nil, nil, fmt.Errorf("Failed to create container configuration for image %q (%q): %v", driverConfig.Image, id, err)
 	}
 
+	// container cgroup settings
+	if containerCfg.HostConfig.CgroupParent != "" {
+		cpusetMount, err := cgroups.FindCgroupMountpoint("", "cpuset")
+		if err != nil {
+			return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("failed to find cpuset mountpoint: %v", err), err)
+		}
+
+		cgroupParent := filepath.Join(cpusetMount, containerCfg.HostConfig.CgroupParent)
+
+		err = os.MkdirAll(cgroupParent, 0755)
+		if err != nil {
+			return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("failed to create cgroup parent %s: %v", cgroupParent, err), err)
+		}
+
+		// get cpuset.mems from nomad to set on subgroup
+		// this requires bumping libcontainer; cgroups.Stats.CPUSetStats is missing in the current version
+		// nomadCPUSetStats := new(cgroups.Stats)
+		// err = cpusetGroup.GetStats(filepath.Dir(cgroupParent), nomadCPUSetStats)
+		// if err != nil {
+		// 	return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("failed to get nomad CPUSetStats: %v", err), err)
+		// }
+
+		cgroupConfig := &libcontainerconfigs.Cgroup{
+			Resources: &libcontainerconfigs.Resources{
+				// this should come from some kind of CPUManager thingy that can hand
+				// out cores based on the requested number of cores
+				CpusetCpus: "0",
+
+				// this should come from nomad/cpuset.mems, available via
+				// nomadCPUSetStats.CPUSetStats.Mems above, which FYI is a []int16
+				CpusetMems: "0",
+
+				// this requires PRing libcontainer; see below
+				// CPUExclusive: "1",
+			},
+		}
+
+		cpusetGroup := new(cgroupsfs.CpusetGroup)
+		err = cpusetGroup.Set(cgroupParent, cgroupConfig)
+		if err != nil {
+			return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("failed to write cgroup config: %v", err), err)
+		}
+
+		// libcontainer's CpusetGroup.Set does _not_ set CPUExclusive. Maybe PR?
+		// https://github.com/opencontainers/runc/blob/6c85f6389e479764bf28269253331524b3787708/libcontainer/cgroups/fs/cpuset.go#L30
+		// also, where should this value come from? jobspec? docker plugin config?
+		// err = writeCGroupFile(filepath.Join(cgroupParent, "cpuset.cpu_exclusive"), 1)
+		// if err != nil {
+		// 	return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("failed to write cpuset.mems: %v", err), err)
+		// }
+	}
+
 	startAttempts := 0
 CREATE:
 	container, err := d.createContainer(client, containerCfg, driverConfig.Image)
@@ -836,6 +895,10 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T
 		hostConfig.CPUSetCPUs = driverConfig.CPUSetCPUs
 	}
 
+	// if driverConfig.CPUCores != "" {
+	hostConfig.CgroupParent = fmt.Sprintf("nomad/%s-%s", task.JobName, task.Name)
+	// }
+
 	// Calculate CPU Quota
 	// cfs_quota_us is the time per core, so we must
 	// multiply the time by the number of cores available
-- 
GitLab