Commit 5694999c authored by Seth Hoenig's avatar Seth Hoenig
Browse files

cli: display nomad service check status output in CLI commands

This PR adds some NSD check status output to the CLI.

1. The 'nomad alloc status' command produces nsd check summary output (if present)
2. The 'nomad alloc checks' sub-command is added to produce complete nsd check output (if present)
parent f9f6bc40
Showing with 432 additions and 23 deletions
+432 -23
......@@ -72,13 +72,13 @@ func (a *Allocations) Info(allocID string, q *QueryOptions) (*Allocation, *Query
// the task environment.
//
// The parameters are:
// * ctx: context to set deadlines or timeout
// * allocation: the allocation to execute command inside
// * task: the task's name to execute command in
// * tty: indicates whether to start a pseudo-tty for the command
// * stdin, stdout, stderr: the std io to pass to command.
// If tty is true, then streams need to point to a tty that's alive for the whole process
// * terminalSizeCh: A channel to send new tty terminal sizes
// - ctx: context to set deadlines or timeout
// - allocation: the allocation to execute command inside
// - task: the task's name to execute command in
// - tty: indicates whether to start a pseudo-tty for the command
// - stdin, stdout, stderr: the std io to pass to command.
// If tty is true, then streams need to point to a tty that's alive for the whole process
// - terminalSizeCh: A channel to send new tty terminal sizes
//
// The call blocks until command terminates (or an error occurs), and returns the exit code.
//
......@@ -119,6 +119,17 @@ func (a *Allocations) Stats(alloc *Allocation, q *QueryOptions) (*AllocResourceU
return &resp, err
}
// Checks gets status information for nomad service checks that exist in the allocation.
//
// Note: for cluster topologies where API consumers don't have network access to
// Nomad clients, set api.ClientConnTimeout to a small value (ex 1ms) to avoid
// long pauses on this API call.
func (a *Allocations) Checks(allocID string, q *QueryOptions) (AllocCheckStatuses, error) {
var resp AllocCheckStatuses
_, err := a.client.query("/v1/client/allocation/"+allocID+"/checks", &resp, q)
return resp, err
}
// GC forces a garbage collection of client state for an allocation.
//
// Note: for cluster topologies where API consumers don't have network access to
......@@ -506,12 +517,12 @@ type ExecStreamingInput struct {
TTYSize *TerminalSize `json:"tty_size,omitempty"`
}
// ExecStreamingExitResults captures the exit code of just completed nomad exec command
// ExecStreamingExitResult captures the exit code of just completed nomad exec command
type ExecStreamingExitResult struct {
ExitCode int `json:"exit_code"`
}
// ExecStreamingInput represents an output streaming entity, e.g. stdout/stderr update or termination
// ExecStreamingOutput represents an output streaming entity, e.g. stdout/stderr update or termination
//
// At most one of these fields should be set: `Stdout`, `Stderr`, or `Result`.
// If `Exited` is true, then `Result` is non-nil, and other fields are nil.
......
......@@ -64,6 +64,24 @@ type AllocResourceUsage struct {
Timestamp int64
}
// AllocCheckStatus contains the current status of a nomad service discovery check.
type AllocCheckStatus struct {
ID string
Check string
Group string
Mode string
Output string
Service string
Task string
Status string
StatusCode int
Timestamp int64
}
// AllocCheckStatuses holds the set of nomad service discovery checks within
// the allocation (including group and task level service checks).
type AllocCheckStatuses map[string]AllocCheckStatus
// RestartPolicy defines how the Nomad client restarts
// tasks in a taskgroup when they fail
type RestartPolicy struct {
......
package command
import (
"fmt"
"strings"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/api/contexts"
"github.com/posener/complete"
)
type AllocChecksCommand struct {
Meta
}
func (c *AllocChecksCommand) Help() string {
helpText := `
Usage: nomad alloc checks [options] <allocation>
Alias: nomad checks
Outputs the latest health check status information for services in the allocation
using the Nomad service discovery provider.
General Options:
` + generalOptionsUsage(usageOptsDefault) + `
Checks Specific Options:
-verbose
Show full information.
`
return strings.TrimSpace(helpText)
}
func (c *AllocChecksCommand) Synopsis() string {
return "Outputs service health check status information."
}
func (c *AllocChecksCommand) AutocompleteFlags() complete.Flags {
return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
complete.Flags{
"-verbose": complete.PredictNothing,
})
}
func (c *AllocChecksCommand) AutocompleteArgs() complete.Predictor {
return complete.PredictFunc(func(a complete.Args) []string {
client, err := c.Meta.Client()
if err != nil {
return nil
}
resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Allocs, nil)
if err != nil {
return nil
}
return resp.Matches[contexts.Allocs]
})
}
func (c *AllocChecksCommand) Name() string {
return "alloc checks"
}
func (c *AllocChecksCommand) Run(args []string) int {
var verbose bool
flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
flags.Usage = func() { c.Ui.Output(c.Help()) }
flags.BoolVar(&verbose, "verbose", false, "")
if err := flags.Parse(args); err != nil {
return 1
}
args = flags.Args()
if numArgs := len(args); numArgs < 1 {
c.Ui.Error("An allocation ID is required")
c.Ui.Error(commandErrorText(c))
return 1
} else if numArgs > 1 {
c.Ui.Error("This command takes one argument (allocation ID)")
c.Ui.Error(commandErrorText(c))
return 1
}
client, err := c.Meta.Client()
if err != nil {
c.Ui.Error(fmt.Sprintf("Error initializing client: %v", err))
return 1
}
allocID := args[0]
// Truncate the id unless full length is requested
length := shortId
if verbose {
length = fullId
}
// Query the allocation info
if len(allocID) == 1 {
c.Ui.Error("Alloc ID must contain at least two characters.")
return 1
}
allocID = sanitizeUUIDPrefix(allocID)
allocations, _, err := client.Allocations().PrefixList(allocID)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
return 1
}
if len(allocations) == 0 {
c.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
return 1
}
if len(allocations) > 1 {
out := formatAllocListStubs(allocations, verbose, length)
c.Ui.Error(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", out))
return 1
}
// prefix lookup matched single allocation (happy path), lookup the checks
q := &api.QueryOptions{Namespace: allocations[0].Namespace}
checks, err := client.Allocations().Checks(allocations[0].ID, q)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error querying allocation checks: %s", err))
return 1
}
c.Ui.Output(fmt.Sprintf("Status of %d Nomad Service Checks", len(checks)))
c.Ui.Output("")
pair := func(key, value string) string { return fmt.Sprintf("%s|=|%s", key, value) }
taskFmt := func(s string) string {
if s == "" {
return "(group)"
}
return s
}
for _, check := range checks {
list := []string{
pair("ID", check.ID),
pair("Name", check.Check),
pair("Group", check.Group),
pair("Task", taskFmt(check.Task)),
pair("Service", check.Service),
pair("Status", check.Status),
}
if check.StatusCode > 0 {
list = append(list, pair("StatusCode", fmt.Sprintf("%d", check.StatusCode)))
}
list = append(list,
pair("Mode", check.Mode),
pair("Timestamp", formatTaskTimes(time.Unix(check.Timestamp, 0))),
pair("Output", check.Output),
)
c.Ui.Output(formatList(list))
c.Ui.Output("")
}
return 0
}
package command
import (
"testing"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/cli"
"github.com/posener/complete"
"github.com/shoenig/test/must"
)
func TestAllocChecksCommand_Implements(t *testing.T) {
ci.Parallel(t)
var _ cli.Command = (*AllocChecksCommand)(nil)
}
func TestAllocChecksCommand_Fails(t *testing.T) {
ci.Parallel(t)
srv, _, url := testServer(t, false, nil)
t.Cleanup(func() {
_ = srv.Shutdown()
})
ui := cli.NewMockUi()
cmd := &AllocChecksCommand{Meta: Meta{Ui: ui}}
// fails on misuse t.Run("fails on misuse", func(t *testing.T) {
code := cmd.Run([]string{"some", "bad", "args"})
must.One(t, code)
out := ui.ErrorWriter.String()
must.StrContains(t, out, commandErrorText(cmd))
ui.ErrorWriter.Reset()
// fails on connection failure
code = cmd.Run([]string{"-address=nope", "foobar"})
must.One(t, code)
out = ui.ErrorWriter.String()
must.StrContains(t, out, "Error querying allocation")
ui.ErrorWriter.Reset()
// fails on missing allocation
code = cmd.Run([]string{"-address=" + url, "26470238-5CF2-438F-8772-DC67CFB0705C"})
must.One(t, code)
out = ui.ErrorWriter.String()
must.StrContains(t, out, "No allocation(s) with prefix or id")
ui.ErrorWriter.Reset()
// fails on prefix with too few characters
code = cmd.Run([]string{"-address=" + url, "2"})
must.One(t, code)
out = ui.ErrorWriter.String()
must.StrContains(t, out, "must contain at least two characters.")
ui.ErrorWriter.Reset()
}
func TestAllocChecksCommand_AutocompleteArgs(t *testing.T) {
ci.Parallel(t)
srv, _, url := testServer(t, true, nil)
defer stopTestAgent(srv)
ui := cli.NewMockUi()
cmd := &AllocChecksCommand{Meta: Meta{Ui: ui, flagAddress: url}}
// Create a fake alloc
state := srv.Agent.Server().State()
a := mock.Alloc()
must.NoError(t, state.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{a}))
prefix := a.ID[:5]
args := complete.Args{Last: prefix}
predictor := cmd.AutocompleteArgs()
res := predictor.Predict(args)
must.Len(t, 1, res)
must.Eq(t, a.ID, res[0])
}
func TestAllocChecksCommand_Run(t *testing.T) {
ci.Parallel(t)
srv, client, url := testServer(t, true, nil)
defer stopTestAgent(srv)
// wait for nodes
waitForNodes(t, client)
jobID := "job1_checks"
job1 := testNomadServiceJob(jobID)
resp, _, err := client.Jobs().Register(job1, nil)
must.NoError(t, err)
// wait for registration success
ui := cli.NewMockUi()
code := waitForSuccess(ui, client, fullId, t, resp.EvalID)
must.Zero(t, code)
// Get an alloc id
allocID := getAllocFromJob(t, client, jobID)
// do not wait for alloc running - it will stay pending because the
// health-check will never pass
// Run command
cmd := &AllocChecksCommand{Meta: Meta{Ui: ui, flagAddress: url}}
code = cmd.Run([]string{"-address=" + url, allocID})
must.Zero(t, code)
// check output
out := ui.OutputWriter.String()
must.StrContains(t, out, `Name = check1`)
must.StrContains(t, out, `Group = job1_checks.group1[0]`)
must.StrContains(t, out, `Task = (group)`)
must.StrContains(t, out, `Service = service1`)
must.StrContains(t, out, `Mode = healthiness`)
}
......@@ -114,20 +114,12 @@ func TestAllocSignalCommand_Run(t *testing.T) {
code := waitForSuccess(ui, client, fullId, t, resp.EvalID)
must.Zero(t, code)
// get an alloc id
allocID := ""
if allocs, _, err := client.Jobs().Allocations(jobID, false, nil); err == nil {
if len(allocs) > 0 {
allocID = allocs[0].ID
}
}
must.NotEq(t, "", allocID)
// Get an alloc id
allocID := getAllocFromJob(t, client, jobID)
// Wait for alloc to be running
waitForAllocRunning(t, client, allocID)
code = cmd.Run([]string{"-address=" + url, allocID})
must.Zero(t, code)
ui.OutputWriter.Reset()
}
......@@ -199,10 +199,17 @@ func (c *AllocStatusCommand) Run(args []string) int {
}
c.Ui.Output(output)
// add allocation network addresses
if alloc.AllocatedResources != nil && len(alloc.AllocatedResources.Shared.Networks) > 0 && alloc.AllocatedResources.Shared.Networks[0].HasPorts() {
c.Ui.Output("")
c.Ui.Output(formatAllocNetworkInfo(alloc))
}
// add allocation nomad service discovery checks
if checkOutput := formatAllocNomadServiceChecks(alloc.ID, client); checkOutput != "" {
c.Ui.Output("")
c.Ui.Output(checkOutput)
}
}
if short {
......@@ -355,7 +362,28 @@ func formatAllocNetworkInfo(alloc *api.Allocation) string {
mode = fmt.Sprintf(" (mode = %q)", nw.Mode)
}
return fmt.Sprintf("Allocation Addresses%s\n%s", mode, formatList(addrs))
return fmt.Sprintf("Allocation Addresses%s:\n%s", mode, formatList(addrs))
}
func formatAllocNomadServiceChecks(allocID string, client *api.Client) string {
statuses, err := client.Allocations().Checks(allocID, nil)
if err != nil {
return ""
} else if len(statuses) == 0 {
return ""
}
results := []string{"Service|Task|Name|Mode|Status"}
for _, status := range statuses {
task := "(group)"
if status.Task != "" {
task = status.Task
}
// check | group | mode | status
s := fmt.Sprintf("%s|%s|%s|%s|%s", status.Service, task, status.Check, status.Mode, status.Status)
results = append(results, s)
}
sort.Strings(results[1:])
return fmt.Sprintf("Nomad Service Checks:\n%s", formatList(results))
}
// futureEvalTimePretty returns when the eval is eligible to reschedule
......@@ -553,7 +581,7 @@ func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task str
return
}
c.Ui.Output("Task Resources")
c.Ui.Output("Task Resources:")
var addr []string
for _, nw := range resource.Networks {
ports := append(nw.DynamicPorts, nw.ReservedPorts...) //nolint:gocritic
......
......@@ -194,6 +194,9 @@ func TestAllocStatusCommand_Run(t *testing.T) {
out = ui.OutputWriter.String()
must.StrContains(t, out, allocID)
// make sure nsd checks status output is elided if none exist
must.StrNotContains(t, out, `Nomad Service Checks:`)
}
func TestAllocStatusCommand_RescheduleInfo(t *testing.T) {
......@@ -441,3 +444,40 @@ func TestAllocStatusCommand_CSIVolumes(t *testing.T) {
must.StrContains(t, out, fmt.Sprintf("%s minnie", vol0))
must.StrNotContains(t, out, "Host Volumes")
}
func TestAllocStatusCommand_NSD_Checks(t *testing.T) {
ci.Parallel(t)
srv, client, url := testServer(t, true, nil)
defer stopTestAgent(srv)
// wait for nodes
waitForNodes(t, client)
jobID := "job1_checks"
job1 := testNomadServiceJob(jobID)
resp, _, err := client.Jobs().Register(job1, nil)
must.NoError(t, err)
// wait for registration success
ui := cli.NewMockUi()
code := waitForSuccess(ui, client, fullId, t, resp.EvalID)
must.Zero(t, code)
// Get an alloc id
allocID := getAllocFromJob(t, client, jobID)
// do not wait for alloc running - it will stay pending because the
// health-check will never pass
// Run command
cmd := &AllocStatusCommand{Meta: Meta{Ui: ui, flagAddress: url}}
code = cmd.Run([]string{"-address=" + url, allocID})
must.Zero(t, code)
// check output
out := ui.OutputWriter.String()
must.StrContains(t, out, `Nomad Service Checks:`)
must.RegexMatch(t, regexp.MustCompile(`Service\s+Task\s+Name\s+Mode\s+Status`), out)
must.RegexMatch(t, regexp.MustCompile(`service1\s+\(group\)\s+check1\s+healthiness\s+(pending|failure)`), out)
}
......@@ -177,6 +177,11 @@ func Commands(metaPtr *Meta, agentUi cli.Ui) map[string]cli.CommandFactory {
Meta: meta,
}, nil
},
"alloc checks": func() (cli.Command, error) {
return &AllocChecksCommand{
Meta: meta,
}, nil
},
"alloc status": func() (cli.Command, error) {
return &AllocStatusCommand{
Meta: meta,
......
......@@ -4,6 +4,7 @@ import (
"fmt"
"os"
"testing"
"time"
"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/command/agent"
......@@ -22,7 +23,7 @@ func testServer(t *testing.T, runClient bool, cb func(*agent.Config)) (*agent.Te
cb(config)
}
})
t.Cleanup(func() { a.Shutdown() })
t.Cleanup(func() { _ = a.Shutdown() })
c := a.Client()
return a, c, a.HTTPAddr()
......@@ -37,7 +38,7 @@ func testClient(t *testing.T, name string, cb func(*agent.Config)) (*agent.TestA
cb(config)
}
})
t.Cleanup(func() { a.Shutdown() })
t.Cleanup(func() { _ = a.Shutdown() })
c := a.Client()
t.Logf("Waiting for client %s to join server(s) %s", name, a.GetConfig().Client.Servers)
......@@ -73,6 +74,25 @@ func testJob(jobID string) *api.Job {
return job
}
func testNomadServiceJob(jobID string) *api.Job {
j := testJob(jobID)
j.TaskGroups[0].Services = []*api.Service{{
Name: "service1",
PortLabel: "1000",
AddressMode: "",
Address: "127.0.0.1",
Checks: []api.ServiceCheck{{
Name: "check1",
Type: "http",
Path: "/",
Interval: 1 * time.Second,
Timeout: 1 * time.Second,
}},
Provider: "nomad",
}}
return j
}
func testMultiRegionJob(jobID, region, datacenter string) *api.Job {
task := api.NewTask("task1", "mock_driver").
SetConfig("kill_after", "10s").
......@@ -144,6 +164,17 @@ func waitForAllocRunning(t *testing.T, client *api.Client, allocID string) {
})
}
func getAllocFromJob(t *testing.T, client *api.Client, jobID string) string {
var allocID string
if allocations, _, err := client.Jobs().Allocations(jobID, false, nil); err == nil {
if len(allocations) > 0 {
allocID = allocations[0].ID
}
}
must.NotEq(t, "", allocID, must.Sprint("expected to find an evaluation after running job", jobID))
return allocID
}
func stopTestAgent(a *agent.TestAgent) {
_ = a.Shutdown()
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment