Unverified Commit b9b29be0 authored by Rajashree Mandaogane's avatar Rajashree Mandaogane Committed by GitHub
Browse files

Merge pull request #1943 from mrajashree/rename_label

Rename ignore label and return error on controlplane failure
parents 7549e499 c61d531a
Showing with 23 additions and 12 deletions
+23 -12
......@@ -89,8 +89,9 @@ func (c *Cluster) FindHostsLabeledToIgnoreUpgrade(ctx context.Context) {
return
}
for _, node := range nodes.Items {
if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == "true" {
if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == k8s.IgnoreLabelValue {
host := hosts.Host{RKEConfigNode: v3.RKEConfigNode{Address: node.Annotations[k8s.ExternalAddressAnnotation]}}
logrus.Infof("Host %v is labeled to ignore upgrade", host.Address)
c.HostsLabeledToIgnoreUpgrade[host.Address] = true
}
}
......@@ -174,7 +175,7 @@ func (c *Cluster) CalculateMaxUnavailable() (int, int, error) {
}
// maxUnavailable should be calculated against all hosts provided in cluster.yml except the ones labelled to be ignored for upgrade
workerHosts += len(inactiveWorkerHosts)
maxUnavailableWorker, err := services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableWorker, workerHosts)
maxUnavailableWorker, err := services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableWorker, workerHosts, services.WorkerRole)
if err != nil {
return maxUnavailableWorker, maxUnavailableControl, err
}
......@@ -185,7 +186,7 @@ func (c *Cluster) CalculateMaxUnavailable() (int, int, error) {
controlHosts++
}
controlHosts += len(inactiveControlPlaneHosts)
maxUnavailableControl, err = services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableControlplane, controlHosts)
maxUnavailableControl, err = services.CalculateMaxUnavailable(c.UpgradeStrategy.MaxUnavailableControlplane, controlHosts, services.ControlRole)
if err != nil {
return maxUnavailableWorker, maxUnavailableControl, err
}
......
......@@ -17,7 +17,8 @@ const (
HostnameLabel = "kubernetes.io/hostname"
InternalAddressAnnotation = "rke.cattle.io/internal-ip"
ExternalAddressAnnotation = "rke.cattle.io/external-ip"
IgnoreHostDuringUpgradeLabel = "rke.cattle.io/ignore-during-upgrade"
IgnoreHostDuringUpgradeLabel = "user.cattle.io/upgrade-policy"
IgnoreLabelValue = "prevent"
AWSCloudProvider = "aws"
MaxRetries = 5
RetryInterval = 5
......
......@@ -80,10 +80,8 @@ func UpgradeControlPlaneNodes(ctx context.Context, kubeClient *kubernetes.Client
upgradeStrategy, newHosts, inactiveHosts, maxUnavailable, drainHelper)
if err != nil {
logrus.Errorf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
if len(hostsFailedToUpgrade) >= maxUnavailable {
return errMsgMaxUnavailableNotFailed, err
}
errMsgMaxUnavailableNotFailed = fmt.Sprintf("Failed to upgrade hosts: %v with error %v", strings.Join(hostsFailedToUpgrade, ","), err)
return errMsgMaxUnavailableNotFailed, err
}
log.Infof(ctx, "[%s] Successfully upgraded Controller Plane..", ControlRole)
return errMsgMaxUnavailableNotFailed, nil
......
......@@ -3,6 +3,7 @@ package services
import (
"bytes"
"fmt"
"strings"
"sync"
"time"
......@@ -79,7 +80,7 @@ func getNodeListForUpgrade(kubeClient *kubernetes.Clientset, hostsFailed *sync.M
if inactiveHosts[node.Labels[k8s.HostnameLabel]] {
continue
}
if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == "true" {
if val, ok := node.Labels[k8s.IgnoreHostDuringUpgradeLabel]; ok && val == k8s.IgnoreLabelValue {
continue
}
nodeList = append(nodeList, node)
......@@ -87,10 +88,19 @@ func getNodeListForUpgrade(kubeClient *kubernetes.Clientset, hostsFailed *sync.M
return nodeList, nil
}
func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int) (int, error) {
func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int, role string) (int, error) {
// if maxUnavailable is given in percent, round down
maxUnavailableParsed := k8sutil.Parse(maxUnavailableVal)
logrus.Debugf("Provided value for maxUnavailable: %v", maxUnavailableParsed)
if maxUnavailableParsed.Type == k8sutil.Int {
if maxUnavailableParsed.IntVal <= 0 {
return 0, fmt.Errorf("invalid input for max_unavailable_%s: %v, value must be > 0", role, maxUnavailableParsed.IntVal)
}
} else {
if strings.HasPrefix(maxUnavailableParsed.StrVal, "-") || maxUnavailableParsed.StrVal == "0%" {
return 0, fmt.Errorf("invalid input for max_unavailable_%s: %v, value must be > 0", role, maxUnavailableParsed.StrVal)
}
}
maxUnavailable, err := k8sutil.GetValueFromIntOrPercent(&maxUnavailableParsed, numHosts, false)
if err != nil {
logrus.Errorf("Unable to parse max_unavailable, should be a number or percentage of nodes, error: %v", err)
......@@ -98,6 +108,7 @@ func CalculateMaxUnavailable(maxUnavailableVal string, numHosts int) (int, error
}
if maxUnavailable == 0 {
// In case there is only one node and rounding down maxUnvailable percentage led to 0
logrus.Infof("max_unavailable_%s got rounded down to 0, resetting to 1", role)
maxUnavailable = 1
}
logrus.Debugf("Parsed value of maxUnavailable: %v", maxUnavailable)
......@@ -114,11 +125,11 @@ func resetMaxUnavailable(maxUnavailable, lenInactiveHosts int, component string)
}
if lenInactiveHosts > 0 {
if maxUnavailable == lenInactiveHosts {
return 0, fmt.Errorf("cannot proceed with upgrade of %s since %v host(s) are found to be inactive prior to upgrade", component, lenInactiveHosts)
if lenInactiveHosts >= maxUnavailable {
return 0, fmt.Errorf("cannot proceed with upgrade of %s since %v host(s) cannot be reached prior to upgrade", component, lenInactiveHosts)
}
maxUnavailable -= lenInactiveHosts
logrus.Infof("Resetting %s to %v since %v host(s) are found to be inactive prior to upgrade", "max_unavailable_"+component, maxUnavailable, lenInactiveHosts)
logrus.Infof("Resetting %s to %v since %v host(s) cannot be reached prior to upgrade", "max_unavailable_"+component, maxUnavailable, lenInactiveHosts)
}
return maxUnavailable, nil
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment