Commit 57f40c7e authored by Alex Dadgar's avatar Alex Dadgar
Browse files

Device manager

Introduce a device manager that manages the lifecycle of device plugins
on the client. It fingerprints, collects stats, and forwards Reserve
requests to the correct plugin. The manager, also handles device plugins
failing and validates their output.
parent 4351c499
Showing with 1238 additions and 112 deletions
+1238 -112
......@@ -191,10 +191,18 @@ checkscripts: ## Lint shell scripts
@echo "==> Linting scripts..."
@shellcheck ./scripts/*
.PHONY: generate
generate: LOCAL_PACKAGES = $(shell go list ./... | grep -v '/vendor/')
generate: ## Update generated code
generate: proto ## Update generated code
@go generate $(LOCAL_PACKAGES)
.PHONY: proto
proto:
@for file in $$(git ls-files "*.proto" | grep -v "vendor\/.*.proto"); do \
protoc -I . -I ../../.. --go_out=plugins=grpc:. $$file; \
done
vendorfmt:
@echo "--> Formatting vendor/vendor.json"
test -x $(GOPATH)/bin/vendorfmt || go get -u github.com/magiconair/vendorfmt/cmd/vendorfmt
......
package client
import (
"context"
"errors"
"fmt"
"io/ioutil"
......@@ -16,31 +17,32 @@ import (
metrics "github.com/armon/go-metrics"
consulapi "github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/lib"
hclog "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
arstate "github.com/hashicorp/nomad/client/allocrunner/state"
consulApi "github.com/hashicorp/nomad/client/consul"
cstructs "github.com/hashicorp/nomad/client/structs"
hstats "github.com/hashicorp/nomad/helper/stats"
nconfig "github.com/hashicorp/nomad/nomad/structs/config"
vaultapi "github.com/hashicorp/vault/api"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/allocrunner"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
arstate "github.com/hashicorp/nomad/client/allocrunner/state"
"github.com/hashicorp/nomad/client/allocwatcher"
"github.com/hashicorp/nomad/client/config"
consulApi "github.com/hashicorp/nomad/client/consul"
"github.com/hashicorp/nomad/client/devicemanager"
"github.com/hashicorp/nomad/client/servers"
"github.com/hashicorp/nomad/client/state"
"github.com/hashicorp/nomad/client/stats"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/client/vaultclient"
"github.com/hashicorp/nomad/command/agent/consul"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/pool"
hstats "github.com/hashicorp/nomad/helper/stats"
"github.com/hashicorp/nomad/helper/tlsutil"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/structs"
nconfig "github.com/hashicorp/nomad/nomad/structs/config"
vaultapi "github.com/hashicorp/vault/api"
"github.com/kr/pretty"
"github.com/shirou/gopsutil/host"
)
......@@ -190,6 +192,10 @@ type Client struct {
shutdownCh chan struct{}
shutdownLock sync.Mutex
// ctx is cancelled at the same time as the shutdownCh
ctx context.Context
ctxCancel context.CancelFunc
// vaultClient is used to interact with Vault for token and secret renewals
vaultClient vaultclient.VaultClient
......@@ -205,6 +211,9 @@ type Client struct {
endpoints rpcEndpoints
streamingRpcs *structs.StreamingRpcRegistry
// devicemanger is responsible for managing device plugins.
devicemanager devicemanager.Manager
// baseLabels are used when emitting tagged metrics. All client metrics will
// have these tags, and optionally more.
baseLabels []metrics.Label
......@@ -235,6 +244,9 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
// Create the logger
logger := cfg.Logger.ResetNamed("client")
// Create a context
ctx, cancel := context.WithCancel(context.Background())
// Create the client
c := &Client{
config: cfg,
......@@ -249,6 +261,8 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
allocs: make(map[string]AllocRunner),
allocUpdates: make(chan *structs.Allocation, 64),
shutdownCh: make(chan struct{}),
ctx: ctx,
ctxCancel: cancel,
triggerDiscoveryCh: make(chan struct{}),
triggerNodeUpdate: make(chan struct{}, 8),
triggerEmitNodeEvent: make(chan *structs.NodeEvent, 8),
......@@ -306,6 +320,20 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic
return nil, fmt.Errorf("fingerprinting failed: %v", err)
}
// Setup the device manager
devConfig := &devicemanager.Config{
Logger: c.logger,
Loader: c.configCopy.PluginSingletonLoader,
PluginConfig: c.configCopy.NomadPluginConfig(),
Updater: func(devs []*structs.NodeDeviceResource) {
c.logger.Debug("Device Updater called", "count", len(devs), "devices", hclog.Fmt("% #v", pretty.Formatter(devs)))
},
StatsInterval: c.configCopy.StatsCollectionInterval,
State: c.stateDB,
}
c.devicemanager = devicemanager.New(devConfig)
go c.devicemanager.Run()
// Set the preconfigured list of static servers
c.configLock.RLock()
if len(c.configCopy.Servers) > 0 {
......@@ -533,6 +561,9 @@ func (c *Client) Shutdown() error {
}
}()
// Shutdown the device manager
c.devicemanager.Shutdown()
// Stop renewing tokens and secrets
if c.vaultClient != nil {
c.vaultClient.Stop()
......@@ -553,6 +584,7 @@ func (c *Client) Shutdown() error {
c.shutdown = true
close(c.shutdownCh)
c.ctxCancel()
c.connPool.Shutdown()
return nil
}
......
package devicemanager
import (
"context"
"fmt"
"sync"
"time"
log "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/device"
"github.com/hashicorp/nomad/plugins/shared/loader"
"github.com/hashicorp/nomad/plugins/shared/singleton"
)
const (
// statsBackoffBaseline is the baseline time for exponential backoff while
// collecting device stats.
statsBackoffBaseline = 5 * time.Second
// statsBackoffLimit is the limit of the exponential backoff for collecting
// device statistics.
statsBackoffLimit = 30 * time.Minute
)
// instanceManagerConfig configures a device instance manager
type instanceManagerConfig struct {
// Logger is the logger used by the device instance manager
Logger log.Logger
// Ctx is used to shutdown the device instance manager
Ctx context.Context
// Loader is the plugin loader
Loader loader.PluginCatalog
// StoreReattach is used to store a plugins reattach config
StoreReattach StorePluginReattachFn
// PluginConfig is the config passed to the launched plugins
PluginConfig *base.ClientAgentConfig
// Id is the ID of the plugin being managed
Id *loader.PluginID
// FingerprintOutCh is used to emit new fingerprinted devices
FingerprintOutCh chan<- struct{}
// StatsInterval is the interval at which we collect statistics.
StatsInterval time.Duration
}
// instanceManager is used to manage a single device plugin
type instanceManager struct {
// logger is the logger used by the device instance manager
logger log.Logger
// ctx is used to shutdown the device manager
ctx context.Context
// cancel is used to shutdown management of this device plugin
cancel context.CancelFunc
// loader is the plugin loader
loader loader.PluginCatalog
// storeReattach is used to store a plugins reattach config
storeReattach StorePluginReattachFn
// pluginConfig is the config passed to the launched plugins
pluginConfig *base.ClientAgentConfig
// id is the ID of the plugin being managed
id *loader.PluginID
// fingerprintOutCh is used to emit new fingerprinted devices
fingerprintOutCh chan<- struct{}
// plugin is the plugin instance being managed
plugin loader.PluginInstance
// device is the device plugin being managed
device device.DevicePlugin
// pluginLock locks access to the device and plugin
pluginLock sync.Mutex
// shutdownLock is used to serialize attempts to shutdown
shutdownLock sync.Mutex
// devices is the set of fingerprinted devices
devices []*device.DeviceGroup
deviceLock sync.RWMutex
// statsInterval is the interval at which we collect statistics.
statsInterval time.Duration
// deviceStats is the set of statistics objects per devices
deviceStats []*device.DeviceGroupStats
deviceStatsLock sync.RWMutex
// firstFingerprintCh is used to trigger that we have successfully
// fingerprinted once. It is used to gate launching the stats collection.
firstFingerprintCh chan struct{}
hasFingerprinted bool
// wg is used to track the launched goroutines
wg sync.WaitGroup
}
// newInstanceManager returns a new device instance manager. It is expected that
// the context passed in the configuration is cancelled in order to shutdown
// launched goroutines.
func newInstanceManager(c *instanceManagerConfig) *instanceManager {
ctx, cancel := context.WithCancel(c.Ctx)
i := &instanceManager{
logger: c.Logger.With("plugin", c.Id.Name),
ctx: ctx,
cancel: cancel,
loader: c.Loader,
storeReattach: c.StoreReattach,
pluginConfig: c.PluginConfig,
id: c.Id,
fingerprintOutCh: c.FingerprintOutCh,
statsInterval: c.StatsInterval,
firstFingerprintCh: make(chan struct{}),
}
go i.run()
return i
}
// HasDevices returns if the instance is managing the passed devices
func (i *instanceManager) HasDevices(d *structs.AllocatedDeviceResource) bool {
i.deviceLock.Lock()
defer i.deviceLock.Unlock()
OUTER:
for _, dev := range i.devices {
if dev.Name != d.Name || dev.Type != d.Type || dev.Vendor != d.Vendor {
continue
}
// Check that we have all the requested devices
ids := make(map[string]struct{}, len(dev.Devices))
for _, inst := range dev.Devices {
ids[inst.ID] = struct{}{}
}
for _, reqID := range d.DeviceIDs {
if _, ok := ids[reqID]; !ok {
continue OUTER
}
}
return true
}
return false
}
// AllStats returns all the device statistics returned by the device plugin.
func (i *instanceManager) AllStats() []*device.DeviceGroupStats {
i.deviceStatsLock.Lock()
defer i.deviceStatsLock.Unlock()
return i.deviceStats
}
// DeviceStats returns the device statistics for the request devices.
func (i *instanceManager) DeviceStats(d *structs.AllocatedDeviceResource) *device.DeviceGroupStats {
i.deviceStatsLock.Lock()
defer i.deviceStatsLock.Unlock()
// Find the device in question and then gather the instance statistics we
// are interested in
for _, group := range i.deviceStats {
if group.Vendor != d.Vendor || group.Type != d.Type || group.Name != d.Name {
continue
}
// We found the group we want so now grab the instance stats
out := &device.DeviceGroupStats{
Vendor: d.Vendor,
Type: d.Type,
Name: d.Name,
InstanceStats: make(map[string]*device.DeviceStats, len(d.DeviceIDs)),
}
for _, id := range d.DeviceIDs {
out.InstanceStats[id] = group.InstanceStats[id]
}
return out
}
return nil
}
// Reserve reserves the given devices
func (i *instanceManager) Reserve(d *structs.AllocatedDeviceResource) (*device.ContainerReservation, error) {
// Get a device plugin
devicePlugin, err := i.dispense()
if err != nil {
i.logger.Error("dispensing plugin failed", "error", err)
return nil, err
}
// Send the reserve request
return devicePlugin.Reserve(d.DeviceIDs)
}
// Devices returns the detected devices.
func (i *instanceManager) Devices() []*device.DeviceGroup {
i.deviceLock.Lock()
defer i.deviceLock.Unlock()
return i.devices
}
// WaitForFirstFingerprint waits until either the plugin fingerprints, the
// passed context is done, or the plugin instance manager is shutdown.
func (i *instanceManager) WaitForFirstFingerprint(ctx context.Context) {
select {
case <-i.ctx.Done():
case <-ctx.Done():
case <-i.firstFingerprintCh:
}
}
// run is a long lived goroutine that starts the fingerprinting and stats
// collection goroutine and then shutsdown the plugin on exit.
func (i *instanceManager) run() {
// Dispense once to ensure we are given a valid plugin
if _, err := i.dispense(); err != nil {
i.logger.Error("dispensing initial plugin failed", "error", err)
return
}
// Start the fingerprinter
i.wg.Add(1)
go i.fingerprint()
// Wait for a valid result before starting stats collection
select {
case <-i.ctx.Done():
goto DONE
case <-i.firstFingerprintCh:
}
// Start stats
i.wg.Add(1)
go i.collectStats()
// Do a final cleanup
DONE:
i.wg.Wait()
i.cleanup()
}
// dispense is used to dispense a plugin.
func (i *instanceManager) dispense() (plugin device.DevicePlugin, err error) {
i.pluginLock.Lock()
defer i.pluginLock.Unlock()
// See if we already have a running instance
if i.plugin != nil && !i.plugin.Exited() {
return i.device, nil
}
// Get an instance of the plugin
pluginInstance, err := i.loader.Dispense(i.id.Name, i.id.PluginType, i.pluginConfig, i.logger)
if err != nil {
// Retry as the error just indicates the singleton has exited
if err == singleton.SingletonPluginExited {
pluginInstance, err = i.loader.Dispense(i.id.Name, i.id.PluginType, i.pluginConfig, i.logger)
}
// If we still have an error there is a real problem
if err != nil {
return nil, fmt.Errorf("failed to start plugin: %v", err)
}
}
// Convert to a fingerprint plugin
device, ok := pluginInstance.Plugin().(device.DevicePlugin)
if !ok {
pluginInstance.Kill()
return nil, fmt.Errorf("plugin loaded does not implement the driver interface")
}
// Store the plugin and device
i.plugin = pluginInstance
i.device = device
// Store the reattach config
if c, ok := pluginInstance.ReattachConfig(); ok {
i.storeReattach(c)
}
return device, nil
}
// cleanup shutsdown the plugin
func (i *instanceManager) cleanup() {
i.shutdownLock.Lock()
defer i.shutdownLock.Unlock()
if i.plugin != nil && !i.plugin.Exited() {
i.plugin.Kill()
i.storeReattach(nil)
}
}
// fingerprint is a long lived routine used to fingerprint the device
func (i *instanceManager) fingerprint() {
defer i.wg.Done()
START:
// Get a device plugin
devicePlugin, err := i.dispense()
if err != nil {
i.logger.Error("dispensing plugin failed", "error", err)
i.cancel()
return
}
// Start fingerprinting
fingerprintCh, err := devicePlugin.Fingerprint(i.ctx)
if err != nil {
i.logger.Error("fingerprinting failed", "error", err)
i.handleFingerprintError()
return
}
var fresp *device.FingerprintResponse
var ok bool
for {
select {
case <-i.ctx.Done():
return
case fresp, ok = <-fingerprintCh:
}
if !ok {
i.logger.Debug("exiting since fingerprinting gracefully shutdown")
i.handleFingerprintError()
return
}
// Guard against error by the plugin
if fresp == nil {
continue
}
// Handle any errors
if fresp.Error != nil {
if fresp.Error == base.ErrPluginShutdown {
i.logger.Error("plugin exited unexpectedly")
goto START
}
i.logger.Error("fingerprinting returned an error", "error", err)
i.handleFingerprintError()
return
}
if err := i.handleFingerprint(fresp); err != nil {
// Cancel the context so we cleanup all goroutines
i.logger.Error("returned devices failed fingerprinting", "error", err)
i.handleFingerprintError()
}
}
}
// handleFingerprintError exits the manager and shutsdown the plugin.
func (i *instanceManager) handleFingerprintError() {
// Clear out the devices and trigger a node update
i.deviceLock.Lock()
defer i.deviceLock.Unlock()
// If we have fingerprinted before clear it out
if i.hasFingerprinted {
// Store the new devices
i.devices = nil
// Trigger that the we have new devices
select {
case i.fingerprintOutCh <- struct{}{}:
default:
}
}
// Cancel the context so we cleanup all goroutines
i.cancel()
return
}
// handleFingerprint stores the new devices and triggers the fingerprint output
// channel. An error is returned if the passed devices don't pass validation.
func (i *instanceManager) handleFingerprint(f *device.FingerprintResponse) error {
// Safety check
if f.Devices == nil {
return nil
}
// Validate the received devices
var validationErr multierror.Error
for i, d := range f.Devices {
if err := d.Validate(); err != nil {
multierror.Append(&validationErr, multierror.Prefix(err, fmt.Sprintf("device group %d: ", i)))
}
}
if err := validationErr.ErrorOrNil(); err != nil {
return err
}
i.deviceLock.Lock()
defer i.deviceLock.Unlock()
// Store the new devices
i.devices = f.Devices
// Mark that we have received data
if !i.hasFingerprinted {
close(i.firstFingerprintCh)
i.hasFingerprinted = true
}
// Trigger that we have data to pull
select {
case i.fingerprintOutCh <- struct{}{}:
default:
}
return nil
}
// collectStats is a long lived goroutine for collecting device statistics. It
// handles errors by backing off exponentially and retrying.
func (i *instanceManager) collectStats() {
defer i.wg.Done()
attempt := 0
START:
// Get a device plugin
devicePlugin, err := i.dispense()
if err != nil {
i.logger.Error("dispensing plugin failed", "error", err)
i.cancel()
return
}
// Start stats collection
statsCh, err := devicePlugin.Stats(i.ctx, i.statsInterval)
if err != nil {
i.logger.Error("stats collection failed", "error", err)
return
}
var sresp *device.StatsResponse
var ok bool
for {
select {
case <-i.ctx.Done():
return
case sresp, ok = <-statsCh:
}
if !ok {
i.logger.Debug("exiting since stats gracefully shutdown")
return
}
// Guard against error by the plugin
if sresp == nil {
continue
}
// Handle any errors
if sresp.Error != nil {
if sresp.Error == base.ErrPluginShutdown {
i.logger.Error("plugin exited unexpectedly")
goto START
}
// Retry with an exponential backoff
backoff := (1 << (2 * uint64(attempt))) * statsBackoffBaseline
if backoff > statsBackoffLimit {
backoff = statsBackoffLimit
}
attempt++
i.logger.Error("stats returned an error", "error", err, "retry", backoff)
select {
case <-i.ctx.Done():
return
case <-time.After(backoff):
goto START
}
}
// Reset the attempt since we got statistics
attempt = 0
// Store the new stats
if sresp.Groups != nil {
i.deviceStatsLock.Lock()
i.deviceStats = sresp.Groups
i.deviceStatsLock.Unlock()
}
}
}
// Package devicemanager is used to manage device plugins
package devicemanager
import (
"context"
"fmt"
"sync"
"time"
log "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
plugin "github.com/hashicorp/go-plugin"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/device"
"github.com/hashicorp/nomad/plugins/shared"
"github.com/hashicorp/nomad/plugins/shared/loader"
)
// Manaager is the interface used to manage device plugins
type Manager interface {
// Run starts the device manager
Run()
// Shutdown shutsdown the manager and all launched plugins
Shutdown()
// Reserve is used to reserve a set of devices
Reserve(d *structs.AllocatedDeviceResource) (*device.ContainerReservation, error)
// AllStats is used to retrieve all the latest statistics for all devices.
AllStats() []*device.DeviceGroupStats
// DeviceStats returns the device statistics for the given device.
DeviceStats(d *structs.AllocatedDeviceResource) (*device.DeviceGroupStats, error)
}
// StateStorage is used to persist the device managers state across
// agent restarts.
type StateStorage interface {
// GetDevicePluginState is used to retrieve the device manager's plugin
// state.
GetDevicePluginState() (*PluginState, error)
// PutDevicePluginState is used to store the device manager's plugin
// state.
PutDevicePluginState(state *PluginState) error
}
// UpdateNodeDevices is a callback for updating the set of devices on a node.
type UpdateNodeDevices func(devices []*structs.NodeDeviceResource)
// StorePluginReattachFn is used to store plugin reattachment configurations.
type StorePluginReattachFn func(*plugin.ReattachConfig) error
// Config is used to configure a device manager
type Config struct {
// Logger is the logger used by the device manager
Logger log.Logger
// Loader is the plugin loader
Loader loader.PluginCatalog
// PluginConfig is the config passed to the launched plugins
PluginConfig *base.ClientAgentConfig
// Updater is used to update the node when device information changes
Updater UpdateNodeDevices
// StatsInterval is the interval at which to collect statistics
StatsInterval time.Duration
// State is used to manage the device managers state
State StateStorage
}
// manager is used to manage a set of device plugins
type manager struct {
// logger is the logger used by the device manager
logger log.Logger
// state is used to manage the device managers state
state StateStorage
// ctx is used to shutdown the device manager
ctx context.Context
cancel context.CancelFunc
// loader is the plugin loader
loader loader.PluginCatalog
// pluginConfig is the config passed to the launched plugins
pluginConfig *base.ClientAgentConfig
// updater is used to update the node when device information changes
updater UpdateNodeDevices
// statsInterval is the duration at which to collect statistics
statsInterval time.Duration
// fingerprintResCh is used to be triggered that there are new devices
fingerprintResCh chan struct{}
// instances is the list of managed devices
instances map[loader.PluginID]*instanceManager
// reattachConfigs stores the plugin reattach configs
reattachConfigs map[loader.PluginID]*shared.ReattachConfig
reattachConfigLock sync.Mutex
}
// New returns a new device manager
func New(c *Config) *manager {
ctx, cancel := context.WithCancel(context.Background())
return &manager{
logger: c.Logger.Named("device_mgr"),
state: c.State,
ctx: ctx,
cancel: cancel,
loader: c.Loader,
pluginConfig: c.PluginConfig,
updater: c.Updater,
instances: make(map[loader.PluginID]*instanceManager),
reattachConfigs: make(map[loader.PluginID]*shared.ReattachConfig),
fingerprintResCh: make(chan struct{}, 1),
}
}
// Run starts thed device manager. The manager will shutdown any previously
// launched plugin and then begin fingerprinting and stats collection on all new
// device plugins.
func (m *manager) Run() {
// Check if there are any plugins that didn't get cleanly shutdown before
// and if there are shut them down.
m.cleanupStalePlugins()
// Get device plugins
devices := m.loader.Catalog()[base.PluginTypeDevice]
for _, d := range devices {
id := loader.PluginInfoID(d)
storeFn := func(c *plugin.ReattachConfig) error {
id := id
return m.storePluginReattachConfig(id, c)
}
m.instances[id] = newInstanceManager(&instanceManagerConfig{
Logger: m.logger,
Ctx: m.ctx,
Loader: m.loader,
StoreReattach: storeFn,
PluginConfig: m.pluginConfig,
Id: &id,
FingerprintOutCh: m.fingerprintResCh,
StatsInterval: m.statsInterval,
})
}
// XXX we should eventually remove this and have it be done in the client
// Give all the fingerprinters a chance to run at least once before we
// update the node. This prevents initial fingerprinting from causing too
// many server side updates.
ctx, cancel := context.WithTimeout(m.ctx, 5*time.Second)
for _, i := range m.instances {
i.WaitForFirstFingerprint(ctx)
}
cancel()
// Now start the fingerprint handler
for {
select {
case <-m.ctx.Done():
return
case <-m.fingerprintResCh:
}
// Collect the data
var fingerprinted []*device.DeviceGroup
for _, i := range m.instances {
fingerprinted = append(fingerprinted, i.Devices()...)
}
// Convert and update
out := make([]*structs.NodeDeviceResource, len(fingerprinted))
for i, f := range fingerprinted {
out[i] = convertDeviceGroup(f)
}
// Call the updater
m.updater(out)
}
}
// Shutdown cleans up all the plugins
func (m *manager) Shutdown() {
// Cancel the context to stop any requests
m.cancel()
// Go through and shut everything down
for _, i := range m.instances {
i.cleanup()
}
}
// Reserve reserves the given allocated device. If the device is unknown, an
// UnknownDeviceErr is returned.
func (m *manager) Reserve(d *structs.AllocatedDeviceResource) (*device.ContainerReservation, error) {
// Go through each plugin and see if it can reserve the resources
for _, i := range m.instances {
if !i.HasDevices(d) {
continue
}
// We found a match so reserve
return i.Reserve(d)
}
return nil, UnknownDeviceErrFromAllocated("failed to reserve devices", d)
}
// AllStats returns statistics for all the devices
func (m *manager) AllStats() []*device.DeviceGroupStats {
// Go through each plugin and collect stats
var stats []*device.DeviceGroupStats
for _, i := range m.instances {
stats = append(stats, i.AllStats()...)
}
return stats
}
// DeviceStats returns the statistics for the passed devices. If the device is unknown, an
// UnknownDeviceErr is returned.
func (m *manager) DeviceStats(d *structs.AllocatedDeviceResource) (*device.DeviceGroupStats, error) {
// Go through each plugin and see if it has the requested devices
for _, i := range m.instances {
if !i.HasDevices(d) {
continue
}
// We found a match so reserve
return i.DeviceStats(d), nil
}
return nil, UnknownDeviceErrFromAllocated("failed to collect statistics", d)
}
// cleanupStalePlugins reads the device managers state and shuts down any
// previously launched plugin.
func (m *manager) cleanupStalePlugins() error {
// Read the old plugin state
s, err := m.state.GetDevicePluginState()
if err != nil {
return fmt.Errorf("failed to read plugin state: %v", err)
}
// No state was stored so there is nothing to do.
if s == nil {
return nil
}
// For each plugin go through and try to shut it down
var mErr multierror.Error
for name, c := range s.ReattachConfigs {
rc, err := shared.ReattachConfigToGoPlugin(c)
if err != nil {
multierror.Append(&mErr, fmt.Errorf("failed to convert reattach config: %v", err))
continue
}
instance, err := m.loader.Reattach(name, base.PluginTypeDevice, rc)
if err != nil {
multierror.Append(&mErr, fmt.Errorf("failed to reattach to plugin %q: %v", name, err))
continue
}
// Kill the instance
instance.Kill()
}
return mErr.ErrorOrNil()
}
// storePluginReattachConfig is used as a callback to the instance managers and
// persists thhe plugin reattach configurations.
func (m *manager) storePluginReattachConfig(id loader.PluginID, c *plugin.ReattachConfig) error {
m.reattachConfigLock.Lock()
defer m.reattachConfigLock.Unlock()
// Store the new reattach config
m.reattachConfigs[id] = shared.ReattachConfigFromGoPlugin(c)
// Persist the state
s := &PluginState{
ReattachConfigs: make(map[string]*shared.ReattachConfig, len(m.reattachConfigs)),
}
for id, c := range m.reattachConfigs {
s.ReattachConfigs[id.Name] = c
}
return m.state.PutDevicePluginState(s)
}
package devicemanager
import (
"errors"
"fmt"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/device"
"github.com/hashicorp/nomad/plugins/shared"
psstructs "github.com/hashicorp/nomad/plugins/shared/structs"
)
// PluginState is used to store the device managers state across restarts of the
// agent
type PluginState struct {
// ReattachConfigs are the set of reattach configs for plugin's launched by
// the device manager
ReattachConfigs map[string]*shared.ReattachConfig
}
// UnknownDeviceError is returned when an operation is attempted on an unknown
// device.
type UnknownDeviceError struct {
Err error
Name string
Vendor string
Type string
IDs []string
}
// NewUnknownDeviceError returns a new UnknownDeviceError for the given device.
func NewUnknownDeviceError(err error, name, vendor, devType string, ids []string) *UnknownDeviceError {
return &UnknownDeviceError{
Err: err,
Name: name,
Vendor: vendor, Type: devType,
IDs: ids,
}
}
// Error returns an error formatting that reveals which unknown devices were
// requested
func (u *UnknownDeviceError) Error() string {
return fmt.Sprintf("operation on unknown device(s) \"%s/%s/%s\" (%v): %v",
u.Vendor, u.Type, u.Name, u.IDs, u.Err)
}
// UnknownDeviceErrFromAllocated is a helper that returns an UnknownDeviceError
// populating it via the AllocatedDeviceResource struct.
func UnknownDeviceErrFromAllocated(err string, d *structs.AllocatedDeviceResource) *UnknownDeviceError {
return NewUnknownDeviceError(errors.New(err), d.Name, d.Vendor, d.Type, d.DeviceIDs)
}
// convertDeviceGroup converts a device group to a structs NodeDeviceResource
func convertDeviceGroup(d *device.DeviceGroup) *structs.NodeDeviceResource {
if d == nil {
return nil
}
return &structs.NodeDeviceResource{
Vendor: d.Vendor,
Type: d.Type,
Name: d.Name,
Instances: convertDevices(d.Devices),
Attributes: psstructs.CopyMapStringAttribute(d.Attributes),
}
}
func convertDevices(devs []*device.Device) []*structs.NodeDevice {
if devs == nil {
return nil
}
out := make([]*structs.NodeDevice, len(devs))
for i, dev := range devs {
out[i] = convertDevice(dev)
}
return out
}
func convertDevice(dev *device.Device) *structs.NodeDevice {
if dev == nil {
return nil
}
return &structs.NodeDevice{
ID: dev.ID,
Healthy: dev.Healthy,
HealthDescription: dev.HealthDesc,
Locality: convertHwLocality(dev.HwLocality),
}
}
func convertHwLocality(l *device.DeviceLocality) *structs.NodeDeviceLocality {
if l == nil {
return nil
}
return &structs.NodeDeviceLocality{
PciBusID: l.PciBusID,
}
}
// Code generated by protoc-gen-go. DO NOT EDIT.
// source: logmon.proto
// source: client/logmon/proto/logmon.proto
package proto
......@@ -40,7 +40,7 @@ func (m *StartRequest) Reset() { *m = StartRequest{} }
func (m *StartRequest) String() string { return proto.CompactTextString(m) }
func (*StartRequest) ProtoMessage() {}
func (*StartRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_logmon_0690d412b596ec9e, []int{0}
return fileDescriptor_logmon_6dbff459851a9ae9, []int{0}
}
func (m *StartRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StartRequest.Unmarshal(m, b)
......@@ -119,7 +119,7 @@ func (m *StartResponse) Reset() { *m = StartResponse{} }
func (m *StartResponse) String() string { return proto.CompactTextString(m) }
func (*StartResponse) ProtoMessage() {}
func (*StartResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_logmon_0690d412b596ec9e, []int{1}
return fileDescriptor_logmon_6dbff459851a9ae9, []int{1}
}
func (m *StartResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StartResponse.Unmarshal(m, b)
......@@ -149,7 +149,7 @@ func (m *StopRequest) Reset() { *m = StopRequest{} }
func (m *StopRequest) String() string { return proto.CompactTextString(m) }
func (*StopRequest) ProtoMessage() {}
func (*StopRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_logmon_0690d412b596ec9e, []int{2}
return fileDescriptor_logmon_6dbff459851a9ae9, []int{2}
}
func (m *StopRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StopRequest.Unmarshal(m, b)
......@@ -179,7 +179,7 @@ func (m *StopResponse) Reset() { *m = StopResponse{} }
func (m *StopResponse) String() string { return proto.CompactTextString(m) }
func (*StopResponse) ProtoMessage() {}
func (*StopResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_logmon_0690d412b596ec9e, []int{3}
return fileDescriptor_logmon_6dbff459851a9ae9, []int{3}
}
func (m *StopResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StopResponse.Unmarshal(m, b)
......@@ -308,31 +308,33 @@ var _LogMon_serviceDesc = grpc.ServiceDesc{
},
},
Streams: []grpc.StreamDesc{},
Metadata: "logmon.proto",
}
func init() { proto.RegisterFile("logmon.proto", fileDescriptor_logmon_0690d412b596ec9e) }
var fileDescriptor_logmon_0690d412b596ec9e = []byte{
// 314 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x94, 0x90, 0x31, 0x6f, 0xf2, 0x30,
0x10, 0x86, 0xbf, 0xf0, 0x41, 0x28, 0x07, 0xa1, 0xc8, 0x4b, 0x23, 0x3a, 0x14, 0xa5, 0x43, 0x99,
0xa2, 0x42, 0xff, 0x41, 0x55, 0x75, 0x2a, 0x1d, 0x60, 0xeb, 0x12, 0x19, 0xb8, 0x04, 0x4b, 0x71,
0x2e, 0xb5, 0x8d, 0x84, 0x58, 0xfb, 0x6b, 0xfb, 0x2f, 0xaa, 0x18, 0x13, 0x65, 0x84, 0x29, 0xca,
0xdd, 0xf3, 0xea, 0x1e, 0xbf, 0x30, 0xc8, 0x29, 0x93, 0x54, 0xc4, 0xa5, 0x22, 0x43, 0xec, 0x71,
0xc7, 0xf5, 0x4e, 0x6c, 0x48, 0x95, 0x71, 0x41, 0x92, 0x6f, 0xe3, 0x4d, 0x2e, 0xb0, 0x30, 0x71,
0x13, 0x8a, 0x7e, 0x5a, 0x30, 0x58, 0x19, 0xae, 0xcc, 0x12, 0xbf, 0xf7, 0xa8, 0x0d, 0xbb, 0x83,
0x6e, 0x4e, 0x59, 0xb2, 0x15, 0x2a, 0xf4, 0x26, 0xde, 0xb4, 0xb7, 0xf4, 0x73, 0xca, 0xde, 0x84,
0x62, 0x53, 0x18, 0x69, 0xb3, 0xa5, 0xbd, 0x49, 0x52, 0x91, 0x63, 0x52, 0x70, 0x89, 0x61, 0xcb,
0x12, 0xc3, 0xd3, 0xfc, 0x5d, 0xe4, 0xf8, 0xc9, 0x25, 0x3a, 0x12, 0x95, 0x6a, 0x90, 0xff, 0x6b,
0x12, 0x95, 0xaa, 0xc9, 0x7b, 0xe8, 0x49, 0x7e, 0xb0, 0x98, 0x0e, 0xdb, 0x13, 0x6f, 0x1a, 0x2c,
0x6f, 0x24, 0x3f, 0x54, 0x7b, 0xcd, 0x9e, 0x60, 0x74, 0x5e, 0x26, 0x5a, 0x1c, 0x31, 0x91, 0xeb,
0xb0, 0x63, 0x99, 0xc0, 0x31, 0x2b, 0x71, 0xc4, 0xc5, 0x9a, 0x3d, 0x40, 0xbf, 0x36, 0x4b, 0x29,
0xf4, 0xed, 0x29, 0x38, 0x4b, 0xa5, 0xe4, 0x80, 0x93, 0x50, 0x4a, 0x61, 0xb7, 0x06, 0xac, 0x4b,
0x4a, 0xd1, 0x2d, 0x04, 0xae, 0x04, 0x5d, 0x52, 0xa1, 0x31, 0x0a, 0xa0, 0xbf, 0x32, 0x54, 0xba,
0x52, 0xa2, 0x61, 0x55, 0x52, 0xf5, 0x7b, 0x5a, 0xcf, 0x7f, 0x3d, 0xf0, 0x3f, 0x28, 0x5b, 0x50,
0xc1, 0x4a, 0xe8, 0xd8, 0x28, 0x9b, 0xc5, 0x17, 0xf4, 0x1d, 0x37, 0xbb, 0x1e, 0xcf, 0xaf, 0x89,
0x38, 0xb3, 0x7f, 0x4c, 0x42, 0xbb, 0x92, 0x61, 0xcf, 0x17, 0xa6, 0xeb, 0x67, 0x8c, 0x67, 0x57,
0x24, 0xce, 0xe7, 0x5e, 0xbb, 0x5f, 0x1d, 0x3b, 0x5f, 0xfb, 0xf6, 0xf3, 0xf2, 0x17, 0x00, 0x00,
0xff, 0xff, 0x1b, 0x8f, 0x8b, 0x51, 0x66, 0x02, 0x00, 0x00,
Metadata: "client/logmon/proto/logmon.proto",
}
func init() {
proto.RegisterFile("client/logmon/proto/logmon.proto", fileDescriptor_logmon_6dbff459851a9ae9)
}
var fileDescriptor_logmon_6dbff459851a9ae9 = []byte{
// 320 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x94, 0x90, 0x31, 0x6f, 0xc2, 0x30,
0x10, 0x85, 0x1b, 0x0a, 0xa1, 0x1c, 0x0d, 0x45, 0x5e, 0x1a, 0xd1, 0xa1, 0x28, 0x1d, 0xca, 0x14,
0x0a, 0xfd, 0x07, 0x55, 0xd5, 0xa9, 0x74, 0x80, 0xad, 0x4b, 0x64, 0xc0, 0x09, 0x96, 0x62, 0x5f,
0x6a, 0x1b, 0x09, 0xb1, 0xf6, 0xd7, 0xf6, 0x5f, 0x54, 0x71, 0x4c, 0x94, 0x11, 0x26, 0xeb, 0xee,
0x7d, 0x4f, 0xf7, 0xfc, 0x60, 0xbc, 0xc9, 0x39, 0x93, 0x66, 0x9a, 0x63, 0x26, 0x50, 0x4e, 0x0b,
0x85, 0x06, 0xdd, 0x10, 0xdb, 0x81, 0x3c, 0xed, 0xa8, 0xde, 0xf1, 0x0d, 0xaa, 0x22, 0x96, 0x28,
0xe8, 0x36, 0xae, 0x1c, 0x71, 0x13, 0x8a, 0x7e, 0x5b, 0x70, 0xbb, 0x32, 0x54, 0x99, 0x25, 0xfb,
0xd9, 0x33, 0x6d, 0xc8, 0x3d, 0x74, 0x73, 0xcc, 0x92, 0x2d, 0x57, 0xa1, 0x37, 0xf6, 0x26, 0xbd,
0xa5, 0x9f, 0x63, 0xf6, 0xce, 0x15, 0x99, 0xc0, 0x50, 0x9b, 0x2d, 0xee, 0x4d, 0x92, 0xf2, 0x9c,
0x25, 0x92, 0x0a, 0x16, 0xb6, 0x2c, 0x31, 0xa8, 0xf6, 0x1f, 0x3c, 0x67, 0x5f, 0x54, 0x30, 0x47,
0x32, 0xa5, 0x1a, 0xe4, 0x75, 0x4d, 0x32, 0xa5, 0x6a, 0xf2, 0x01, 0x7a, 0x82, 0x1e, 0x2c, 0xa6,
0xc3, 0xf6, 0xd8, 0x9b, 0x04, 0xcb, 0x1b, 0x41, 0x0f, 0xa5, 0xae, 0xc9, 0x33, 0x0c, 0x4f, 0x62,
0xa2, 0xf9, 0x91, 0x25, 0x62, 0x1d, 0x76, 0x2c, 0x13, 0x38, 0x66, 0xc5, 0x8f, 0x6c, 0xb1, 0x26,
0x8f, 0xd0, 0xaf, 0x93, 0xa5, 0x18, 0xfa, 0xf6, 0x14, 0x9c, 0x42, 0xa5, 0xe8, 0x80, 0x2a, 0x50,
0x8a, 0x61, 0xb7, 0x06, 0x6c, 0x96, 0x14, 0xa3, 0x3b, 0x08, 0x5c, 0x09, 0xba, 0x40, 0xa9, 0x59,
0x14, 0x40, 0x7f, 0x65, 0xb0, 0x70, 0xa5, 0x44, 0x83, 0xb2, 0xa4, 0x72, 0xac, 0xe4, 0xf9, 0x9f,
0x07, 0xfe, 0x27, 0x66, 0x0b, 0x94, 0xa4, 0x80, 0x8e, 0xb5, 0x92, 0x59, 0x7c, 0x46, 0xdf, 0x71,
0xb3, 0xeb, 0xd1, 0xfc, 0x12, 0x8b, 0x4b, 0x76, 0x45, 0x04, 0xb4, 0xcb, 0x30, 0xe4, 0xe5, 0x4c,
0x77, 0xfd, 0x8d, 0xd1, 0xec, 0x02, 0xc7, 0xe9, 0xdc, 0x5b, 0xf7, 0xbb, 0x63, 0xf7, 0x6b, 0xdf,
0x3e, 0xaf, 0xff, 0x01, 0x00, 0x00, 0xff, 0xff, 0xd8, 0xb4, 0x87, 0xfc, 0x7a, 0x02, 0x00, 0x00,
}
......@@ -7,6 +7,7 @@ import (
"testing"
trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
"github.com/hashicorp/nomad/client/devicemanager"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/kr/pretty"
......@@ -190,3 +191,28 @@ func TestStateDB_TaskState(t *testing.T) {
require.Nil(ts)
})
}
// TestStateDB_DeviceManager asserts the behavior of device manager state related StateDB
// methods.
func TestStateDB_DeviceManager(t *testing.T) {
t.Parallel()
testDB(t, func(t *testing.T, db StateDB) {
require := require.New(t)
// Getting nonexistent state should return nils
ps, err := db.GetDevicePluginState()
require.NoError(err)
require.Nil(ps)
// Putting PluginState should work
state := &devicemanager.PluginState{}
require.NoError(db.PutDevicePluginState(state))
// Getting should return the available state
ps, err = db.GetDevicePluginState()
require.NoError(err)
require.NotNil(ps)
require.Equal(state, ps)
})
}
......@@ -2,6 +2,7 @@ package state
import (
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
"github.com/hashicorp/nomad/client/devicemanager"
"github.com/hashicorp/nomad/nomad/structs"
)
......@@ -41,6 +42,14 @@ type StateDB interface {
// exists. No error is returned if it does not exist.
DeleteAllocationBucket(allocID string) error
// GetDevicePluginState is used to retrieve the device manager's plugin
// state.
GetDevicePluginState() (*devicemanager.PluginState, error)
// PutDevicePluginState is used to store the device manager's plugin
// state.
PutDevicePluginState(state *devicemanager.PluginState) error
// Close the database. Unsafe for further use after calling regardless
// of return value.
Close() error
......
......@@ -4,6 +4,7 @@ import (
"sync"
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
"github.com/hashicorp/nomad/client/devicemanager"
"github.com/hashicorp/nomad/nomad/structs"
)
......@@ -17,6 +18,9 @@ type MemDB struct {
localTaskState map[string]map[string]*state.LocalState
taskState map[string]map[string]*structs.TaskState
// devicemanager -> plugin-state
devManagerPs *devicemanager.PluginState
mu sync.RWMutex
}
......@@ -131,6 +135,21 @@ func (m *MemDB) DeleteAllocationBucket(allocID string) error {
return nil
}
func (m *MemDB) PutDevicePluginState(ps *devicemanager.PluginState) error {
m.mu.Lock()
defer m.mu.Unlock()
m.devManagerPs = ps
return nil
}
// GetDevicePluginState stores the device manager's plugin state or returns an
// error.
func (m *MemDB) GetDevicePluginState() (*devicemanager.PluginState, error) {
m.mu.Lock()
defer m.mu.Unlock()
return m.devManagerPs, nil
}
func (m *MemDB) Close() error {
m.mu.Lock()
defer m.mu.Unlock()
......
......@@ -2,6 +2,7 @@ package state
import (
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
"github.com/hashicorp/nomad/client/devicemanager"
"github.com/hashicorp/nomad/nomad/structs"
)
......@@ -40,6 +41,14 @@ func (n NoopDB) DeleteAllocationBucket(allocID string) error {
return nil
}
func (n NoopDB) PutDevicePluginState(ps *devicemanager.PluginState) error {
return nil
}
func (n NoopDB) GetDevicePluginState() (*devicemanager.PluginState, error) {
return nil, nil
}
func (n NoopDB) Close() error {
return nil
}
......@@ -5,12 +5,13 @@ import (
"path/filepath"
trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state"
"github.com/hashicorp/nomad/client/devicemanager"
"github.com/hashicorp/nomad/helper/boltdd"
"github.com/hashicorp/nomad/nomad/structs"
)
/*
The client has a boltDB backed state store. The schema as of 0.6 looks as follows:
The client has a boltDB backed state store. The schema as of 0.9 looks as follows:
allocations/ (bucket)
|--> <alloc-id>/ (bucket)
......@@ -18,6 +19,9 @@ allocations/ (bucket)
|--> alloc_runner persisted objects (k/v)
|--> <task-name>/ (bucket)
|--> task_runner persisted objects (k/v)
devicemanager/
|--> plugin-state -> *devicemanager.PluginState
*/
var (
......@@ -36,6 +40,14 @@ var (
// allocations -> $allocid -> $taskname -> the keys below
taskLocalStateKey = []byte("local_state")
taskStateKey = []byte("task_state")
// devManagerBucket is the bucket name containing all device manager related
// data
devManagerBucket = []byte("devicemanager")
// devManagerPluginStateKey is the key serialized device manager
// plugin state is stored at
devManagerPluginStateKey = []byte("plugin-state")
)
// NewStateDBFunc creates a StateDB given a state directory.
......@@ -354,3 +366,50 @@ func getTaskBucket(tx *boltdd.Tx, allocID, taskName string) (*boltdd.Bucket, err
return task, nil
}
// PutDevicePluginState stores the device manager's plugin state or returns an
// error.
func (s *BoltStateDB) PutDevicePluginState(ps *devicemanager.PluginState) error {
return s.db.Update(func(tx *boltdd.Tx) error {
// Retrieve the root device manager bucket
devBkt, err := tx.CreateBucketIfNotExists(devManagerBucket)
if err != nil {
return err
}
return devBkt.Put(devManagerPluginStateKey, ps)
})
}
// GetDevicePluginState stores the device manager's plugin state or returns an
// error.
func (s *BoltStateDB) GetDevicePluginState() (*devicemanager.PluginState, error) {
var ps *devicemanager.PluginState
err := s.db.View(func(tx *boltdd.Tx) error {
devBkt := tx.Bucket(devManagerBucket)
if devBkt == nil {
// No state, return
return nil
}
// Restore Plugin State if it exists
ps = &devicemanager.PluginState{}
if err := devBkt.Get(devManagerPluginStateKey, ps); err != nil {
if !boltdd.IsErrNotFound(err) {
return fmt.Errorf("failed to read device manager plugin state: %v", err)
}
// Key not found, reset ps to nil
ps = nil
}
return nil
})
if err != nil {
return nil, err
}
return ps, nil
}
......@@ -118,6 +118,7 @@ func (c *Command) readConfig() *Config {
}), "consul-auto-advertise", "")
flags.StringVar(&cmdConfig.Consul.CAFile, "consul-ca-file", "", "")
flags.StringVar(&cmdConfig.Consul.CertFile, "consul-cert-file", "", "")
flags.StringVar(&cmdConfig.Consul.KeyFile, "consul-key-file", "", "")
flags.Var((flaghelper.FuncBoolVar)(func(b bool) error {
cmdConfig.Consul.ChecksUseAdvertise = &b
return nil
......@@ -128,7 +129,6 @@ func (c *Command) readConfig() *Config {
}), "consul-client-auto-join", "")
flags.StringVar(&cmdConfig.Consul.ClientServiceName, "consul-client-service-name", "", "")
flags.StringVar(&cmdConfig.Consul.ClientHTTPCheckName, "consul-client-http-check-name", "", "")
flags.StringVar(&cmdConfig.Consul.KeyFile, "consul-key-file", "", "")
flags.StringVar(&cmdConfig.Consul.ServerServiceName, "consul-server-service-name", "", "")
flags.StringVar(&cmdConfig.Consul.ServerHTTPCheckName, "consul-server-http-check-name", "", "")
flags.StringVar(&cmdConfig.Consul.ServerSerfCheckName, "consul-server-serf-check-name", "", "")
......@@ -454,7 +454,59 @@ func (c *Command) AutocompleteFlags() complete.Flags {
complete.PredictFiles("*.hcl"))
return map[string]complete.Predictor{
"-config": configFilePredictor,
"-dev": complete.PredictNothing,
"-server": complete.PredictNothing,
"-client": complete.PredictNothing,
"-bootstrap-expect": complete.PredictAnything,
"-encrypt": complete.PredictAnything,
"-raft-protocol": complete.PredictAnything,
"-rejoin": complete.PredictNothing,
"-join": complete.PredictAnything,
"-retry-join": complete.PredictAnything,
"-retry-max": complete.PredictAnything,
"-state-dir": complete.PredictDirs("*"),
"-alloc-dir": complete.PredictDirs("*"),
"-node-class": complete.PredictAnything,
"-servers": complete.PredictAnything,
"-meta": complete.PredictAnything,
"-config": configFilePredictor,
"-bind": complete.PredictAnything,
"-region": complete.PredictAnything,
"-data-dir": complete.PredictDirs("*"),
"-plugin-dir": complete.PredictDirs("*"),
"-dc": complete.PredictAnything,
"-log-level": complete.PredictAnything,
"-node": complete.PredictAnything,
"-consul-auth": complete.PredictAnything,
"-consul-auto-advertise": complete.PredictNothing,
"-consul-ca-file": complete.PredictAnything,
"-consul-cert-file": complete.PredictAnything,
"-consul-key-file": complete.PredictAnything,
"-consul-checks-use-advertise": complete.PredictNothing,
"-consul-client-auto-join": complete.PredictNothing,
"-consul-client-service-name": complete.PredictAnything,
"-consul-client-http-check-name": complete.PredictAnything,
"-consul-server-service-name": complete.PredictAnything,
"-consul-server-http-check-name": complete.PredictAnything,
"-consul-server-serf-check-name": complete.PredictAnything,
"-consul-server-rpc-check-name": complete.PredictAnything,
"-consul-server-auto-join": complete.PredictNothing,
"-consul-ssl": complete.PredictNothing,
"-consul-verify-ssl": complete.PredictNothing,
"-consul-address": complete.PredictAnything,
"-vault-enabled": complete.PredictNothing,
"-vault-allow-unauthenticated": complete.PredictNothing,
"-vault-token": complete.PredictAnything,
"-vault-address": complete.PredictAnything,
"-vault-create-from-role": complete.PredictAnything,
"-vault-ca-file": complete.PredictAnything,
"-vault-ca-path": complete.PredictAnything,
"-vault-cert-file": complete.PredictAnything,
"-vault-key-file": complete.PredictAnything,
"-vault-tls-skip-verify": complete.PredictNothing,
"-vault-tls-server-name": complete.PredictAnything,
"-acl-enabled": complete.PredictNothing,
"-acl-replication-token": complete.PredictAnything,
}
}
......
......@@ -12,7 +12,6 @@ The configuration should be passed via an HCL file that begins with a top level
config {
ignored_gpu_ids = ["uuid1", "uuid2"]
fingerprint_period = "5s"
stats_period = "5s"
}
```
......@@ -20,4 +19,3 @@ The valid configuration options are:
* `ignored_gpu_ids` (`list(string)`: `[]`): list of GPU UUIDs strings that should not be exposed to nomad
* `fingerprint_period` (`string`: `"5s"`): The interval to repeat fingerprint process to identify possible changes.
* `stats_period` (`string`: `"5s"`): The interval at which to emit statistics about the devices.
......@@ -8,7 +8,6 @@ import (
"time"
log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/devices/gpu/nvidia/nvml"
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/device"
......@@ -52,11 +51,7 @@ var (
),
"fingerprint_period": hclspec.NewDefault(
hclspec.NewAttr("fingerprint_period", "string", false),
hclspec.NewLiteral("\"5s\""),
),
"stats_period": hclspec.NewDefault(
hclspec.NewAttr("stats_period", "string", false),
hclspec.NewLiteral("\"5s\""),
hclspec.NewLiteral("\"1m\""),
),
})
)
......@@ -65,7 +60,6 @@ var (
type Config struct {
IgnoredGPUIDs []string `codec:"ignored_gpu_ids"`
FingerprintPeriod string `codec:"fingerprint_period"`
StatsPeriod string `codec:"stats_period"`
}
// NvidiaDevice contains all plugin specific data
......@@ -137,13 +131,6 @@ func (d *NvidiaDevice) SetConfig(data []byte, cfg *base.ClientAgentConfig) error
}
d.fingerprintPeriod = period
// Convert the stats period
speriod, err := time.ParseDuration(config.StatsPeriod)
if err != nil {
return fmt.Errorf("failed to parse stats period %q: %v", config.StatsPeriod, err)
}
d.statsPeriod = speriod
return nil
}
......@@ -202,8 +189,8 @@ func (d *NvidiaDevice) Reserve(deviceIDs []string) (*device.ContainerReservation
}
// Stats streams statistics for the detected devices.
func (d *NvidiaDevice) Stats(ctx context.Context) (<-chan *device.StatsResponse, error) {
func (d *NvidiaDevice) Stats(ctx context.Context, interval time.Duration) (<-chan *device.StatsResponse, error) {
outCh := make(chan *device.StatsResponse)
go d.stats(ctx, outCh)
go d.stats(ctx, outCh, interval)
return outCh, nil
}
......@@ -31,12 +31,10 @@ func (d *NvidiaDevice) fingerprint(ctx context.Context, devices chan<- *device.F
if d.initErr.Error() != nvml.UnavailableLib.Error() {
d.logger.Error("exiting fingerprinting due to problems with NVML loading", "error", d.initErr)
devices <- device.NewFingerprintError(d.initErr)
} else {
// write empty fingerprint response to let server know that there are
// no working Nvidia GPU units
devices <- device.NewFingerprint()
}
// Just close the channel to let server know that there are no working
// Nvidia GPU units
return
}
......
......@@ -50,7 +50,7 @@ const (
)
// stats is the long running goroutine that streams device statistics
func (d *NvidiaDevice) stats(ctx context.Context, stats chan<- *device.StatsResponse) {
func (d *NvidiaDevice) stats(ctx context.Context, stats chan<- *device.StatsResponse, interval time.Duration) {
defer close(stats)
if d.initErr != nil {
......@@ -70,7 +70,7 @@ func (d *NvidiaDevice) stats(ctx context.Context, stats chan<- *device.StatsResp
case <-ctx.Done():
return
case <-ticker.C:
ticker.Reset(d.statsPeriod)
ticker.Reset(interval)
}
d.writeStatsToChannel(stats, time.Now())
......
// Code generated by protoc-gen-go. DO NOT EDIT.
// source: docker_logger.proto
// source: drivers/docker/docklog/proto/docker_logger.proto
package proto
......@@ -40,7 +40,7 @@ func (m *StartRequest) Reset() { *m = StartRequest{} }
func (m *StartRequest) String() string { return proto.CompactTextString(m) }
func (*StartRequest) ProtoMessage() {}
func (*StartRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_docker_logger_54dce3369d4ecf54, []int{0}
return fileDescriptor_docker_logger_0aa5a411831bd10e, []int{0}
}
func (m *StartRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StartRequest.Unmarshal(m, b)
......@@ -119,7 +119,7 @@ func (m *StartResponse) Reset() { *m = StartResponse{} }
func (m *StartResponse) String() string { return proto.CompactTextString(m) }
func (*StartResponse) ProtoMessage() {}
func (*StartResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_docker_logger_54dce3369d4ecf54, []int{1}
return fileDescriptor_docker_logger_0aa5a411831bd10e, []int{1}
}
func (m *StartResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StartResponse.Unmarshal(m, b)
......@@ -149,7 +149,7 @@ func (m *StopRequest) Reset() { *m = StopRequest{} }
func (m *StopRequest) String() string { return proto.CompactTextString(m) }
func (*StopRequest) ProtoMessage() {}
func (*StopRequest) Descriptor() ([]byte, []int) {
return fileDescriptor_docker_logger_54dce3369d4ecf54, []int{2}
return fileDescriptor_docker_logger_0aa5a411831bd10e, []int{2}
}
func (m *StopRequest) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StopRequest.Unmarshal(m, b)
......@@ -179,7 +179,7 @@ func (m *StopResponse) Reset() { *m = StopResponse{} }
func (m *StopResponse) String() string { return proto.CompactTextString(m) }
func (*StopResponse) ProtoMessage() {}
func (*StopResponse) Descriptor() ([]byte, []int) {
return fileDescriptor_docker_logger_54dce3369d4ecf54, []int{3}
return fileDescriptor_docker_logger_0aa5a411831bd10e, []int{3}
}
func (m *StopResponse) XXX_Unmarshal(b []byte) error {
return xxx_messageInfo_StopResponse.Unmarshal(m, b)
......@@ -308,31 +308,34 @@ var _DockerLogger_serviceDesc = grpc.ServiceDesc{
},
},
Streams: []grpc.StreamDesc{},
Metadata: "docker_logger.proto",
}
func init() { proto.RegisterFile("docker_logger.proto", fileDescriptor_docker_logger_54dce3369d4ecf54) }
var fileDescriptor_docker_logger_54dce3369d4ecf54 = []byte{
// 319 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x50, 0xbd, 0x4e, 0xe3, 0x40,
0x10, 0x3e, 0xe7, 0x62, 0x27, 0x37, 0x49, 0xee, 0xa4, 0x3d, 0x21, 0x4c, 0x1a, 0xc0, 0x15, 0x05,
0x72, 0x01, 0x15, 0xa1, 0x03, 0x84, 0x84, 0xa0, 0x4a, 0x3a, 0x1a, 0xcb, 0x78, 0x27, 0xc9, 0x2a,
0xc6, 0x63, 0x66, 0x27, 0x48, 0xa9, 0x90, 0x78, 0x06, 0x9e, 0x8e, 0xa7, 0x41, 0x5e, 0x1b, 0x8b,
0x36, 0xa9, 0x76, 0xe7, 0xfb, 0x19, 0xcd, 0xf7, 0xc1, 0x7f, 0x4d, 0xd9, 0x0a, 0x39, 0xc9, 0x69,
0xb1, 0x40, 0x8e, 0x4b, 0x26, 0x21, 0x75, 0xba, 0x4c, 0xed, 0xd2, 0x64, 0xc4, 0x65, 0x5c, 0xd0,
0x73, 0xaa, 0x63, 0xcd, 0xe6, 0x15, 0xd9, 0xc6, 0xb5, 0xd8, 0x3d, 0x39, 0x2d, 0x6a, 0x75, 0xf4,
0xe9, 0xc1, 0x70, 0x26, 0x29, 0xcb, 0x14, 0x5f, 0xd6, 0x68, 0x45, 0x8d, 0xa1, 0x8f, 0x85, 0x2e,
0xc9, 0x14, 0x12, 0x7a, 0x47, 0xde, 0xc9, 0x9f, 0x69, 0x3b, 0xab, 0x63, 0x18, 0x66, 0x54, 0x48,
0x6a, 0x0a, 0xe4, 0xc4, 0xe8, 0xb0, 0xe3, 0xf8, 0x41, 0x8b, 0xdd, 0x69, 0x75, 0x08, 0x03, 0x2b,
0x9a, 0xd6, 0x92, 0xcc, 0xcd, 0x9c, 0xc2, 0xdf, 0x4e, 0x01, 0x35, 0x74, 0x6b, 0xe6, 0xd4, 0x08,
0x90, 0xb9, 0x16, 0x74, 0x5b, 0x01, 0x32, 0x3b, 0xc1, 0x01, 0xf4, 0x25, 0xb7, 0x49, 0x86, 0x2c,
0xa1, 0xef, 0xd8, 0x9e, 0xe4, 0xf6, 0x1a, 0x59, 0xd4, 0x3e, 0x54, 0xdf, 0x64, 0x85, 0x9b, 0x30,
0x70, 0x4c, 0x20, 0xb9, 0xbd, 0xc7, 0x8d, 0xda, 0x83, 0xc0, 0x79, 0xd2, 0xb0, 0xe7, 0x70, 0xbf,
0x72, 0xa4, 0xd1, 0x3f, 0x18, 0x35, 0xd9, 0x6c, 0x49, 0x85, 0xc5, 0x68, 0x04, 0x83, 0x99, 0x50,
0xd9, 0x64, 0x8d, 0xfe, 0x56, 0xd9, 0xab, 0xb1, 0xa6, 0xcf, 0x3e, 0x3a, 0x30, 0xbc, 0x71, 0x2d,
0x3d, 0xb8, 0x46, 0xd5, 0xbb, 0x07, 0xbe, 0xdb, 0xa0, 0x26, 0xf1, 0x36, 0xb5, 0xc6, 0x3f, 0x2b,
0x1d, 0x5f, 0xee, 0xe4, 0x6d, 0x4e, 0xfe, 0xa5, 0xde, 0xa0, 0x5b, 0x5d, 0xa9, 0x2e, 0xb6, 0x5d,
0xd3, 0x06, 0x1d, 0x4f, 0x76, 0xb1, 0x7e, 0x1f, 0x70, 0xd5, 0x7b, 0xf4, 0x1d, 0xfe, 0x14, 0xb8,
0xe7, 0xfc, 0x2b, 0x00, 0x00, 0xff, 0xff, 0x2e, 0xe1, 0xdf, 0xc9, 0x78, 0x02, 0x00, 0x00,
Metadata: "drivers/docker/docklog/proto/docker_logger.proto",
}
func init() {
proto.RegisterFile("drivers/docker/docklog/proto/docker_logger.proto", fileDescriptor_docker_logger_0aa5a411831bd10e)
}
var fileDescriptor_docker_logger_0aa5a411831bd10e = []byte{
// 328 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x50, 0xb1, 0x4e, 0xeb, 0x40,
0x10, 0x7c, 0xce, 0x8b, 0x9d, 0x70, 0x49, 0x40, 0x3a, 0x09, 0x61, 0xd2, 0x00, 0xae, 0x28, 0x90,
0x83, 0xa0, 0x22, 0x74, 0x80, 0x90, 0x10, 0x54, 0x49, 0x47, 0x63, 0x19, 0x7b, 0x93, 0x58, 0x31,
0x5e, 0xb3, 0xb7, 0x41, 0x4a, 0x85, 0xc4, 0x37, 0xf0, 0x75, 0x7c, 0x0d, 0xf2, 0xfa, 0xb0, 0x68,
0x93, 0xea, 0x6e, 0x67, 0x67, 0x46, 0x3b, 0xa3, 0xce, 0x53, 0xca, 0xde, 0x81, 0xcc, 0x28, 0xc5,
0x64, 0x09, 0x24, 0x4f, 0x8e, 0xf3, 0x51, 0x49, 0xc8, 0x68, 0xc1, 0x28, 0xc7, 0xf9, 0x1c, 0x28,
0x14, 0x4c, 0x9f, 0x2d, 0x62, 0xb3, 0xc8, 0x12, 0xa4, 0x32, 0x2c, 0xf0, 0x35, 0x4e, 0x43, 0xeb,
0x10, 0xd6, 0xe4, 0xd0, 0x3a, 0xd4, 0xec, 0xe0, 0xdb, 0x51, 0xfd, 0x29, 0xc7, 0xc4, 0x13, 0x78,
0x5b, 0x81, 0x61, 0x3d, 0x54, 0x5d, 0x28, 0xd2, 0x12, 0xb3, 0x82, 0x7d, 0xe7, 0xd8, 0x39, 0xdd,
0x99, 0x34, 0xb3, 0x3e, 0x51, 0xfd, 0x04, 0x0b, 0x8e, 0xb3, 0x02, 0x28, 0xca, 0x52, 0xbf, 0x25,
0xfb, 0x5e, 0x83, 0x3d, 0xa4, 0xfa, 0x48, 0xf5, 0x0c, 0xa7, 0xb8, 0xe2, 0x68, 0x96, 0xcd, 0xd0,
0xff, 0x2f, 0x0c, 0x55, 0x43, 0xf7, 0xd9, 0x0c, 0x2d, 0x01, 0x88, 0x6a, 0x42, 0xbb, 0x21, 0x00,
0x91, 0x10, 0x0e, 0x55, 0x97, 0x73, 0x13, 0x25, 0x40, 0xec, 0xbb, 0xb2, 0xed, 0x70, 0x6e, 0x6e,
0x81, 0x58, 0x1f, 0xa8, 0xea, 0x1b, 0x2d, 0x61, 0xed, 0x7b, 0xb2, 0xf1, 0x38, 0x37, 0x8f, 0xb0,
0xd6, 0xfb, 0xca, 0x13, 0x4d, 0xec, 0x77, 0x04, 0x77, 0x2b, 0x45, 0x1c, 0xec, 0xa9, 0x81, 0xcd,
0x66, 0x4a, 0x2c, 0x0c, 0x04, 0x03, 0xd5, 0x9b, 0x32, 0x96, 0x36, 0x6b, 0xb0, 0x5b, 0x65, 0xaf,
0xc6, 0x7a, 0x7d, 0xf1, 0xd5, 0x52, 0xfd, 0x3b, 0x69, 0xe9, 0x49, 0x1a, 0xd5, 0x9f, 0x8e, 0x72,
0xc5, 0x41, 0x8f, 0xc3, 0x4d, 0x6a, 0x0d, 0xff, 0x56, 0x3a, 0xbc, 0xde, 0x4a, 0x6b, 0x4f, 0xfe,
0xa7, 0x3f, 0x54, 0xbb, 0xba, 0x52, 0x5f, 0x6d, 0x6a, 0xd3, 0x04, 0x1d, 0x8e, 0xb7, 0x91, 0xfe,
0x1e, 0x70, 0xd3, 0x79, 0x76, 0x05, 0x7f, 0xf1, 0xe4, 0xb9, 0xfc, 0x09, 0x00, 0x00, 0xff, 0xff,
0x43, 0xfe, 0x38, 0xd2, 0x95, 0x02, 0x00, 0x00,
}
......@@ -17,6 +17,7 @@ import (
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/hashicorp/nomad/plugins/drivers/utils"
"github.com/hashicorp/nomad/plugins/shared"
"github.com/hashicorp/nomad/plugins/shared/hclspec"
"github.com/hashicorp/nomad/plugins/shared/loader"
"golang.org/x/net/context"
......@@ -108,7 +109,7 @@ type TaskConfig struct {
// StartTask. This information is needed to rebuild the task state and handler
// during recovery.
type TaskState struct {
ReattachConfig *utils.ReattachConfig
ReattachConfig *shared.ReattachConfig
TaskConfig *drivers.TaskConfig
Pid int
StartedAt time.Time
......@@ -187,7 +188,7 @@ func (d *ExecDriver) RecoverTask(handle *drivers.TaskHandle) error {
return fmt.Errorf("failed to decode state: %v", err)
}
plugRC, err := utils.ReattachConfigToGoPlugin(taskState.ReattachConfig)
plugRC, err := shared.ReattachConfigToGoPlugin(taskState.ReattachConfig)
if err != nil {
logger.Error("failed to build reattach config during task recovery", "error", err)
return fmt.Errorf("failed to build reattach config: %v", err)
......@@ -279,7 +280,7 @@ func (d *ExecDriver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *c
}
driverState := &TaskState{
ReattachConfig: utils.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
ReattachConfig: shared.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
Pid: ps.Pid,
TaskConfig: cfg,
StartedAt: h.startedAt,
......
......@@ -3,6 +3,7 @@ package qemu
import (
"errors"
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
......@@ -11,8 +12,6 @@ import (
"strings"
"time"
"net"
"github.com/coreos/go-semver/semver"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-plugin"
......@@ -23,6 +22,7 @@ import (
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/hashicorp/nomad/plugins/drivers/utils"
"github.com/hashicorp/nomad/plugins/shared"
"github.com/hashicorp/nomad/plugins/shared/hclspec"
"github.com/hashicorp/nomad/plugins/shared/loader"
"golang.org/x/net/context"
......@@ -118,7 +118,7 @@ type TaskConfig struct {
// This information is needed to rebuild the taskConfig state and handler
// during recovery.
type TaskState struct {
ReattachConfig *utils.ReattachConfig
ReattachConfig *shared.ReattachConfig
TaskConfig *drivers.TaskConfig
Pid int
StartedAt time.Time
......@@ -259,7 +259,7 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
return fmt.Errorf("failed to decode taskConfig state from handle: %v", err)
}
plugRC, err := utils.ReattachConfigToGoPlugin(taskState.ReattachConfig)
plugRC, err := shared.ReattachConfigToGoPlugin(taskState.ReattachConfig)
if err != nil {
d.logger.Error("failed to build ReattachConfig from taskConfig state", "error", err, "task_id", handle.Config.ID)
return fmt.Errorf("failed to build ReattachConfig from taskConfig state: %v", err)
......@@ -451,7 +451,7 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *cstru
}
qemuDriverState := TaskState{
ReattachConfig: utils.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
ReattachConfig: shared.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
Pid: ps.Pid,
TaskConfig: cfg,
StartedAt: h.startedAt,
......
......@@ -17,6 +17,7 @@ import (
"github.com/hashicorp/nomad/plugins/base"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/hashicorp/nomad/plugins/drivers/utils"
"github.com/hashicorp/nomad/plugins/shared"
"github.com/hashicorp/nomad/plugins/shared/hclspec"
"github.com/hashicorp/nomad/plugins/shared/loader"
"golang.org/x/net/context"
......@@ -144,7 +145,7 @@ type TaskConfig struct {
// StartTask. This information is needed to rebuild the task state and handler
// during recovery.
type TaskState struct {
ReattachConfig *utils.ReattachConfig
ReattachConfig *shared.ReattachConfig
TaskConfig *drivers.TaskConfig
Pid int
StartedAt time.Time
......@@ -261,7 +262,7 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
return fmt.Errorf("failed to decode task state from handle: %v", err)
}
plugRC, err := utils.ReattachConfigToGoPlugin(taskState.ReattachConfig)
plugRC, err := shared.ReattachConfigToGoPlugin(taskState.ReattachConfig)
if err != nil {
d.logger.Error("failed to build ReattachConfig from task state", "error", err, "task_id", handle.Config.ID)
return fmt.Errorf("failed to build ReattachConfig from task state: %v", err)
......@@ -347,7 +348,7 @@ func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *cstru
}
driverState := TaskState{
ReattachConfig: utils.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
ReattachConfig: shared.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
Pid: ps.Pid,
TaskConfig: cfg,
StartedAt: h.startedAt,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment