Unverified Commit d4642a58 authored by hc-github-team-nomad-core's avatar hc-github-team-nomad-core Committed by GitHub
Browse files

fix merge conflict (#13321)

Co-authored-by: default avatarDerek Strickland <1111455+DerekStrickland@users.noreply.github.com>
parent f52d5706
Branches unavailable v1.1.18 v1.1.17 v1.1.16 v1.1.15
No related merge requests found
Showing with 175 additions and 16 deletions
+175 -16
```release-note:bug
lifecycle: fixed a bug where sidecar tasks were not being stopped last
```
...@@ -511,21 +511,21 @@ func (ar *allocRunner) handleTaskStateUpdates() { ...@@ -511,21 +511,21 @@ func (ar *allocRunner) handleTaskStateUpdates() {
states := make(map[string]*structs.TaskState, trNum) states := make(map[string]*structs.TaskState, trNum)
for name, tr := range ar.tasks { for name, tr := range ar.tasks {
state := tr.TaskState() taskState := tr.TaskState()
states[name] = state states[name] = taskState
if tr.IsPoststopTask() { if tr.IsPoststopTask() {
continue continue
} }
// Capture live task runners in case we need to kill them // Capture live task runners in case we need to kill them
if state.State != structs.TaskStateDead { if taskState.State != structs.TaskStateDead {
liveRunners = append(liveRunners, tr) liveRunners = append(liveRunners, tr)
continue continue
} }
// Task is dead, determine if other tasks should be killed // Task is dead, determine if other tasks should be killed
if state.Failed { if taskState.Failed {
// Only set failed event if no event has been // Only set failed event if no event has been
// set yet to give dead leaders priority. // set yet to give dead leaders priority.
if killEvent == nil { if killEvent == nil {
...@@ -612,16 +612,16 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState { ...@@ -612,16 +612,16 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
ar.logger.Warn("error stopping leader task", "error", err, "task_name", name) ar.logger.Warn("error stopping leader task", "error", err, "task_name", name)
} }
state := tr.TaskState() taskState := tr.TaskState()
states[name] = state states[name] = taskState
break break
} }
// Kill the rest concurrently // Kill the rest non-sidecar or poststop tasks concurrently
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
for name, tr := range ar.tasks { for name, tr := range ar.tasks {
// Filter out poststop tasks so they run after all the other tasks are killed // Filter out poststop and sidecar tasks so that they stop after all the other tasks are killed
if tr.IsLeader() || tr.IsPoststopTask() { if tr.IsLeader() || tr.IsPoststopTask() || tr.IsSidecarTask() {
continue continue
} }
...@@ -635,9 +635,33 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState { ...@@ -635,9 +635,33 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState {
ar.logger.Warn("error stopping task", "error", err, "task_name", name) ar.logger.Warn("error stopping task", "error", err, "task_name", name)
} }
state := tr.TaskState() taskState := tr.TaskState()
mu.Lock()
states[name] = taskState
mu.Unlock()
}(name, tr)
}
wg.Wait()
// Kill the sidecar tasks last.
for name, tr := range ar.tasks {
if !tr.IsSidecarTask() || tr.IsLeader() || tr.IsPoststopTask() {
continue
}
wg.Add(1)
go func(name string, tr *taskrunner.TaskRunner) {
defer wg.Done()
taskEvent := structs.NewTaskEvent(structs.TaskKilling)
taskEvent.SetKillTimeout(tr.Task().KillTimeout)
err := tr.Kill(context.TODO(), taskEvent)
if err != nil && err != taskrunner.ErrTaskNotRunning {
ar.logger.Warn("error stopping sidecar task", "error", err, "task_name", name)
}
taskState := tr.TaskState()
mu.Lock() mu.Lock()
states[name] = state states[name] = taskState
mu.Unlock() mu.Unlock()
}(name, tr) }(name, tr)
} }
......
...@@ -1573,3 +1573,132 @@ func TestAllocRunner_PersistState_Destroyed(t *testing.T) { ...@@ -1573,3 +1573,132 @@ func TestAllocRunner_PersistState_Destroyed(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
require.Nil(t, ts) require.Nil(t, ts)
} }
// TestAllocRunner_Lifecycle_Shutdown_Order asserts that a service job with 3
// lifecycle hooks (1 sidecar, 1 ephemeral, 1 poststop) starts all 4 tasks, and shuts down
// the sidecar after main, but before poststop.
func TestAllocRunner_Lifecycle_Shutdown_Order(t *testing.T) {
alloc := mock.LifecycleAllocWithPoststopDeploy()
alloc.Job.Type = structs.JobTypeService
mainTask := alloc.Job.TaskGroups[0].Tasks[0]
mainTask.Config["run_for"] = "100s"
sidecarTask := alloc.Job.TaskGroups[0].Tasks[1]
sidecarTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart
sidecarTask.Config["run_for"] = "100s"
poststopTask := alloc.Job.TaskGroups[0].Tasks[2]
ephemeralTask := alloc.Job.TaskGroups[0].Tasks[3]
alloc.Job.TaskGroups[0].Tasks = []*structs.Task{mainTask, ephemeralTask, sidecarTask, poststopTask}
conf, cleanup := testAllocRunnerConfig(t, alloc)
defer cleanup()
ar, err := NewAllocRunner(conf)
require.NoError(t, err)
defer destroy(ar)
go ar.Run()
upd := conf.StateUpdater.(*MockStateUpdater)
// Wait for main and sidecar tasks to be running, and that the
// ephemeral task ran and exited.
testutil.WaitForResult(func() (bool, error) {
last := upd.Last()
if last == nil {
return false, fmt.Errorf("No updates")
}
if last.ClientStatus != structs.AllocClientStatusRunning {
return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus)
}
if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateRunning {
return false, fmt.Errorf("expected main task to be running not %s", s)
}
if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateRunning {
return false, fmt.Errorf("expected sidecar task to be running not %s", s)
}
if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateDead {
return false, fmt.Errorf("expected ephemeral task to be dead not %s", s)
}
if last.TaskStates[ephemeralTask.Name].Failed {
return false, fmt.Errorf("expected ephemeral task to be successful not failed")
}
return true, nil
}, func(err error) {
t.Fatalf("error waiting for initial state:\n%v", err)
})
// Tell the alloc to stop
stopAlloc := alloc.Copy()
stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop
ar.Update(stopAlloc)
// Wait for tasks to stop.
testutil.WaitForResult(func() (bool, error) {
last := upd.Last()
if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateDead {
return false, fmt.Errorf("expected ephemeral task to be dead not %s", s)
}
if last.TaskStates[ephemeralTask.Name].Failed {
return false, fmt.Errorf("expected ephemeral task to be successful not failed")
}
if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateDead {
return false, fmt.Errorf("expected main task to be dead not %s", s)
}
if last.TaskStates[mainTask.Name].Failed {
return false, fmt.Errorf("expected main task to be successful not failed")
}
if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateDead {
return false, fmt.Errorf("expected sidecar task to be dead not %s", s)
}
if last.TaskStates[sidecarTask.Name].Failed {
return false, fmt.Errorf("expected sidecar task to be successful not failed")
}
if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStateRunning {
return false, fmt.Errorf("expected poststop task to be running not %s", s)
}
return true, nil
}, func(err error) {
t.Fatalf("error waiting for kill state:\n%v", err)
})
last := upd.Last()
require.True(t, last.TaskStates[ephemeralTask.Name].FinishedAt.Before(last.TaskStates[mainTask.Name].FinishedAt))
require.True(t, last.TaskStates[mainTask.Name].FinishedAt.Before(last.TaskStates[sidecarTask.Name].FinishedAt))
// Wait for poststop task to stop.
testutil.WaitForResult(func() (bool, error) {
last := upd.Last()
if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStateDead {
return false, fmt.Errorf("expected poststop task to be dead not %s", s)
}
if last.TaskStates[poststopTask.Name].Failed {
return false, fmt.Errorf("expected poststop task to be successful not failed")
}
return true, nil
}, func(err error) {
t.Fatalf("error waiting for poststop state:\n%v", err)
})
last = upd.Last()
require.True(t, last.TaskStates[sidecarTask.Name].FinishedAt.Before(last.TaskStates[poststopTask.Name].FinishedAt))
}
...@@ -179,8 +179,7 @@ func (c *taskHookCoordinator) StartPoststopTasks() { ...@@ -179,8 +179,7 @@ func (c *taskHookCoordinator) StartPoststopTasks() {
// hasNonSidecarTasks returns false if all the passed tasks are sidecar tasks // hasNonSidecarTasks returns false if all the passed tasks are sidecar tasks
func hasNonSidecarTasks(tasks []*taskrunner.TaskRunner) bool { func hasNonSidecarTasks(tasks []*taskrunner.TaskRunner) bool {
for _, tr := range tasks { for _, tr := range tasks {
lc := tr.Task().Lifecycle if !tr.IsSidecarTask() {
if lc == nil || !lc.Sidecar {
return true return true
} }
} }
...@@ -188,11 +187,10 @@ func hasNonSidecarTasks(tasks []*taskrunner.TaskRunner) bool { ...@@ -188,11 +187,10 @@ func hasNonSidecarTasks(tasks []*taskrunner.TaskRunner) bool {
return false return false
} }
// hasSidecarTasks returns true if all the passed tasks are sidecar tasks // hasSidecarTasks returns true if any of the passed tasks are sidecar tasks
func hasSidecarTasks(tasks map[string]*taskrunner.TaskRunner) bool { func hasSidecarTasks(tasks map[string]*taskrunner.TaskRunner) bool {
for _, tr := range tasks { for _, tr := range tasks {
lc := tr.Task().Lifecycle if tr.IsSidecarTask() {
if lc != nil && lc.Sidecar {
return true return true
} }
} }
......
...@@ -33,6 +33,11 @@ func (tr *TaskRunner) IsPoststopTask() bool { ...@@ -33,6 +33,11 @@ func (tr *TaskRunner) IsPoststopTask() bool {
return tr.Task().Lifecycle != nil && tr.Task().Lifecycle.Hook == structs.TaskLifecycleHookPoststop return tr.Task().Lifecycle != nil && tr.Task().Lifecycle.Hook == structs.TaskLifecycleHookPoststop
} }
// IsSidecarTask returns true if this task is a sidecar task in its task group.
func (tr *TaskRunner) IsSidecarTask() bool {
return tr.Task().Lifecycle != nil && tr.Task().Lifecycle.Sidecar
}
func (tr *TaskRunner) Task() *structs.Task { func (tr *TaskRunner) Task() *structs.Task {
tr.taskLock.RLock() tr.taskLock.RLock()
defer tr.taskLock.RUnlock() defer tr.taskLock.RUnlock()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment