Commit 3ca45e04 authored by Jake Hyde's avatar Jake Hyde Committed by rancherbot
Browse files

Retry query job status when addon is removing during apply

parent 3b73870d
Showing with 19 additions and 2 deletions
+19 -2
......@@ -16,6 +16,7 @@ import (
type JobStatus struct {
Completed bool
Created bool
Removing bool
}
func ApplyK8sSystemJob(jobYaml, kubeConfigPath string, k8sWrapTransport transport.WrapperFunc, timeout int, addonUpdated bool) error {
......@@ -30,10 +31,24 @@ func ApplyK8sSystemJob(jobYaml, kubeConfigPath string, k8sWrapTransport transpor
if err != nil {
return err
}
jobStatus, err := GetK8sJobStatus(k8sClient, job.Name, job.Namespace)
if err != nil {
var jobStatus JobStatus
// If the job is still removing, attempt to wait until it has been deleted
// If the job is "stuck", apply will never succeed and requires outside intervention
if err := retryToWithTimeout(func(clientset *kubernetes.Clientset, i interface{}) error {
if jobStatus, err = GetK8sJobStatus(k8sClient, job.Name, job.Namespace); err != nil {
return err
}
if !jobStatus.Removing {
return nil
}
logrus.Debugf("[k8s] waiting for job %s to delete..", job.Name)
return fmt.Errorf("[k8s] Job [%s] deletion timed out. Consider increasing addon_job_timeout value", job.Name)
}, k8sClient, job, timeout); err != nil {
return err
}
// if the addon configMap is updated, or the previous job is not completed,
// I will remove the existing job first, if any
if addonUpdated || (jobStatus.Created && !jobStatus.Completed) {
......@@ -132,11 +147,13 @@ func GetK8sJobStatus(k8sClient *kubernetes.Clientset, name, namespace string) (J
return JobStatus{
Created: true,
Completed: true,
Removing: existingJob.DeletionTimestamp != nil,
}, err
}
}
return JobStatus{
Created: true,
Completed: false,
Removing: existingJob.DeletionTimestamp != nil,
}, nil
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment