Commit bdf91846 authored by Mengxin Liu's avatar Mengxin Liu Committed by oilbeater
Browse files

feat: ko support cluster operations status/kick/backup

parent 9cc07385
master acl acl-doc add_unknown_to_lsp allow-subnet bandwidth base/enable-dbg centralized-policy-route centralized_nat chore/show-gw-error chore/size ci/base-update ci/retry ci/trivy ci/update-kind cni crd-print db-monitor debug delete-qos delete-qos-queue delete_ip doc/optimization doc/vip docs/corigine docs/optimize docs/optimize-cilium ecmp ecmp_static_route encap-ip env-check fdb feat/ko feat/log feat/mcast feat/multicast feat/update-ovn feat/vpc-lb fix-base fix-dnat fix-resubmit-limit fix/avx512 fix/ecmp-hash fix/gw-del fix/iptables fix/ipv6-svc fix/metrics-name fix/np-log fix/ovn-northd-flipflop fix/pod-del fix/src-priority fix/subnet-without-protocol fix/uninstall fix_make_kind_reload gc-vm-lsp internal-port ip join klog/v2 log/rotate ls-dnat-mod-dl-dst lsp-address lsp-ipam mahz-master monitor/metrics multus multus-cni-update namespace nat-gw nbctl networkpolicy nodeport np_master ns-subnet ovn-controller ovn-db-recover ovs-nonstop ovs-win64-ci perf/4.18 perf/alias perf/libovsdb perf/optimization perf/route-port-address perf/stt perf/tuning-guide poc policy-route policy-route-1.8 port-group push-img qos qos-e2e qos-query refactor/other_config reflactor_note release-1.10 release-1.8 release-1.8-kubevirt release-1.8-lint release-1.8-monitor release-1.9 release-1.9-monitor release/prepare-1.9 remove_no_need_parms_svcAsName restore revert-1094-vpc-lb revert-1264-yd-master revert-1309-fixcni security/ubuntu-update security/update-ubuntu sg-acl stspod subnet subnet_ips svc sync-ovn-db test/fix-flaky testing update-ovs update/1.7-1.8 update/1.8.2 upgrade-ovs vm-migrate vm-static-ip vpc-nat-gw webhook v1.10.7 v1.10.6 v1.10.5 v1.10.4 v1.10.3 v1.10.2 v1.10.1 v1.10.0 v1.9.14 v1.9.13 v1.9.12 v1.9.10 v1.9.9 v1.9.8 v1.9.7 v1.9.6 v1.9.5 v1.9.4 v1.9.3 v1.9.2 v1.9.1 v1.9.0 v1.8.14 v1.8.12 v1.8.11 v1.8.9 v1.8.8 v1.8.7 v1.8.6 v1.8.5 v1.8.4 v1.8.3 v1.8.2 v1.8.1 v1.8.0
No related merge requests found
Showing with 235 additions and 8 deletions
+235 -8
......@@ -2069,6 +2069,7 @@ OVN_SB_POD=
showHelp(){
echo "kubectl ko {subcommand} [option...]"
echo "Available Subcommands:"
echo " [nb|sb] [status|kick|backup] ovn-db operations show cluster status, kick stale server or backup database"
echo " nbctl [ovn-nbctl options ...] invoke ovn-nbctl"
echo " sbctl [ovn-sbctl options ...] invoke ovn-sbctl"
echo " vsctl {nodeName} [ovs-vsctl options ...] invoke ovs-vsctl on selected node"
......@@ -2111,7 +2112,6 @@ tcpdump(){
echo "nic doesn't exist on node $nodeName"
exit 1
fi
podNicType=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/pod_nic_type})
podNetNs=$(kubectl exec "$ovnCni" -n $KUBE_OVN_NS -- ovs-vsctl --data=bare --no-heading get interface "$nicName" external-ids:pod_netns | tr -d '\r')
set -x
......@@ -2220,6 +2220,22 @@ xxctl(){
kubectl exec "$ovsPod" -n $KUBE_OVN_NS -- ovs-$subcommand "$@"
}
checkLeader(){
component="$1"; shift
count=$(kubectl get ep ovn-$component -n $KUBE_OVN_NS -o yaml | grep ip | wc -l)
if [ $count -eq 0 ]; then
echo "no ovn-$component exists !!"
exit 1
fi
if [ $count -gt 1 ]; then
echo "ovn-$component has more than one leader !!"
exit 1
fi
echo "ovn-$component leader check ok"
}
diagnose(){
kubectl get crd vpcs.kubeovn.io
kubectl get crd vpc-nat-gateways.kubeovn.io
......@@ -2245,6 +2261,11 @@ diagnose(){
checkDaemonSet kube-ovn-cni
checkDaemonSet ovs-ovn
checkDeployment coredns
checkLeader nb
checkLeader sb
checkLeader northd
type="$1"
case $type in
all)
......@@ -2341,19 +2362,65 @@ checkDeployment(){
checkKubeProxy(){
dsMode=`kubectl get ds -n kube-system | grep kube-proxy || true`
if [ -z "$dsMode" ]; then
nodeIps=`kubectl get node -o wide --no-headers | awk '{print $6}'`
nodeIps=`kubectl get node -o wide | grep -v "INTERNAL-IP" | awk '{print $6}'`
for node in $nodeIps
do
healthResult=`curl -g -6 -sL --connect-timeout 5 -w %{http_code} http://[$node]:10256/healthz -o /dev/null | grep -v 200 || true`
healthResult=`curl -g -6 -sL -w %{http_code} http://[$node]:10256/healthz -o /dev/null | grep -v 200 || true`
if [ -n "$healthResult" ]; then
echo "$node kube-proxy's health check failed"
exit 1
fi
done
echo "kube-proxy ready"
else
checkDaemonSet kube-proxy
fi
echo "kube-proxy ready"
}
dbtool(){
suffix=$(date +%m%d%H%M%s)
component="$1"; shift
action="$1"; shift
case $component in
nb)
case $action in
status)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/status OVN_Northbound
;;
kick)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/kick OVN_Northbound "$1"
;;
backup)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /etc/ovn/ovnnb_db.$suffix.backup /etc/ovn/ovnnb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_NB_POD:/etc/ovn/ovnnb_db.$suffix.backup $(pwd)/ovnnb_db.$suffix.backup
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnnb_db.$suffix.backup
echo "backup $component to $(pwd)/ovnnb_db.$suffix.backup"
;;
*)
echo "unknown action $action"
esac
;;
sb)
case $action in
status)
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/status OVN_Southbound
;;
kick)
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/kick OVN_Southbound "$1"
;;
backup)
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /etc/ovn/ovnsb_db.$suffix.backup /etc/ovn/ovnsb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_SB_POD:/etc/ovn/ovnsb_db.$suffix.backup $(pwd)/ovnsb_db.$suffix.backup
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnsb_db.$suffix.backup
echo "backup $component to $(pwd)/ovnsb_db.$suffix.backup"
;;
*)
echo "unknown action $action"
esac
;;
*)
echo "unknown subcommand $component"
esac
}
if [ $# -lt 1 ]; then
......@@ -2375,6 +2442,9 @@ case $subcommand in
vsctl|ofctl|dpctl|appctl)
xxctl "$subcommand" "$@"
;;
nb|sb)
dbtool "$subcommand" "$@"
;;
tcpdump)
tcpdump "$@"
;;
......@@ -2388,6 +2458,7 @@ case $subcommand in
showHelp
;;
esac
EOF
chmod +x /usr/local/bin/kubectl-ko
......
......@@ -2107,6 +2107,7 @@ OVN_SB_POD=
showHelp(){
echo "kubectl ko {subcommand} [option...]"
echo "Available Subcommands:"
echo " [nb|sb] [status|kick|backup] ovn-db operations show cluster status, kick stale server or backup database"
echo " nbctl [ovn-nbctl options ...] invoke ovn-nbctl"
echo " sbctl [ovn-sbctl options ...] invoke ovn-sbctl"
echo " vsctl {nodeName} [ovs-vsctl options ...] invoke ovs-vsctl on selected node"
......@@ -2149,7 +2150,6 @@ tcpdump(){
echo "nic doesn't exist on node $nodeName"
exit 1
fi
podNicType=$(kubectl get pod "$podName" -n "$namespace" -o jsonpath={.metadata.annotations.ovn\\.kubernetes\\.io/pod_nic_type})
podNetNs=$(kubectl exec "$ovnCni" -n $KUBE_OVN_NS -- ovs-vsctl --data=bare --no-heading get interface "$nicName" external-ids:pod_netns | tr -d '\r')
set -x
......@@ -2258,6 +2258,22 @@ xxctl(){
kubectl exec "$ovsPod" -n $KUBE_OVN_NS -- ovs-$subcommand "$@"
}
checkLeader(){
component="$1"; shift
count=$(kubectl get ep ovn-$component -n $KUBE_OVN_NS -o yaml | grep ip | wc -l)
if [ $count -eq 0 ]; then
echo "no ovn-$component exists !!"
exit 1
fi
if [ $count -gt 1 ]; then
echo "ovn-$component has more than one leader !!"
exit 1
fi
echo "ovn-$component leader check ok"
}
diagnose(){
kubectl get crd vpcs.kubeovn.io
kubectl get crd vpc-nat-gateways.kubeovn.io
......@@ -2283,6 +2299,11 @@ diagnose(){
checkDaemonSet kube-ovn-cni
checkDaemonSet ovs-ovn
checkDeployment coredns
checkLeader nb
checkLeader sb
checkLeader northd
type="$1"
case $type in
all)
......@@ -2379,19 +2400,65 @@ checkDeployment(){
checkKubeProxy(){
dsMode=`kubectl get ds -n kube-system | grep kube-proxy || true`
if [ -z "$dsMode" ]; then
nodeIps=`kubectl get node -o wide --no-headers | awk '{print $6}'`
nodeIps=`kubectl get node -o wide | grep -v "INTERNAL-IP" | awk '{print $6}'`
for node in $nodeIps
do
healthResult=`curl -g -6 -sL --connect-timeout 5 -w %{http_code} http://[$node]:10256/healthz -o /dev/null | grep -v 200 || true`
healthResult=`curl -g -6 -sL -w %{http_code} http://[$node]:10256/healthz -o /dev/null | grep -v 200 || true`
if [ -n "$healthResult" ]; then
echo "$node kube-proxy's health check failed"
exit 1
fi
done
echo "kube-proxy ready"
else
checkDaemonSet kube-proxy
fi
echo "kube-proxy ready"
}
dbtool(){
suffix=$(date +%m%d%H%M%s)
component="$1"; shift
action="$1"; shift
case $component in
nb)
case $action in
status)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/status OVN_Northbound
;;
kick)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/kick OVN_Northbound "$1"
;;
backup)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /etc/ovn/ovnnb_db.$suffix.backup /etc/ovn/ovnnb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_NB_POD:/etc/ovn/ovnnb_db.$suffix.backup $(pwd)/ovnnb_db.$suffix.backup
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnnb_db.$suffix.backup
echo "backup $component to $(pwd)/ovnnb_db.$suffix.backup"
;;
*)
echo "unknown action $action"
esac
;;
sb)
case $action in
status)
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/status OVN_Southbound
;;
kick)
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/kick OVN_Southbound "$1"
;;
backup)
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /etc/ovn/ovnsb_db.$suffix.backup /etc/ovn/ovnsb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_SB_POD:/etc/ovn/ovnsb_db.$suffix.backup $(pwd)/ovnsb_db.$suffix.backup
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /etc/ovn/ovnsb_db.$suffix.backup
echo "backup $component to $(pwd)/ovnsb_db.$suffix.backup"
;;
*)
echo "unknown action $action"
esac
;;
*)
echo "unknown subcommand $component"
esac
}
if [ $# -lt 1 ]; then
......@@ -2413,6 +2480,9 @@ case $subcommand in
vsctl|ofctl|dpctl|appctl)
xxctl "$subcommand" "$@"
;;
nb|sb)
dbtool "$subcommand" "$@"
;;
tcpdump)
tcpdump "$@"
;;
......@@ -2426,6 +2496,7 @@ case $subcommand in
showHelp
;;
esac
EOF
chmod +x /usr/local/bin/kubectl-ko
......
......@@ -8,6 +8,7 @@ OVN_SB_POD=
showHelp(){
echo "kubectl ko {subcommand} [option...]"
echo "Available Subcommands:"
echo " [nb|sb] [status|kick|backup] ovn-db operations show cluster status, kick stale server or backup database"
echo " nbctl [ovn-nbctl options ...] invoke ovn-nbctl"
echo " sbctl [ovn-sbctl options ...] invoke ovn-sbctl"
echo " vsctl {nodeName} [ovs-vsctl options ...] invoke ovs-vsctl on selected node"
......@@ -158,6 +159,22 @@ xxctl(){
kubectl exec "$ovsPod" -n $KUBE_OVN_NS -- ovs-$subcommand "$@"
}
checkLeader(){
component="$1"; shift
count=$(kubectl get ep ovn-$component -n $KUBE_OVN_NS -o yaml | grep ip | wc -l)
if [ $count -eq 0 ]; then
echo "no ovn-$component exists !!"
exit 1
fi
if [ $count -gt 1 ]; then
echo "ovn-$component has more than one leader !!"
exit 1
fi
echo "ovn-$component leader check ok"
}
diagnose(){
kubectl get crd vpcs.kubeovn.io
kubectl get crd vpc-nat-gateways.kubeovn.io
......@@ -183,6 +200,11 @@ diagnose(){
checkDaemonSet kube-ovn-cni
checkDaemonSet ovs-ovn
checkDeployment coredns
checkLeader nb
checkLeader sb
checkLeader northd
type="$1"
case $type in
all)
......@@ -294,6 +316,52 @@ checkKubeProxy(){
echo "kube-proxy ready"
}
dbtool(){
suffix=$(date +%m%d%H%M%s)
component="$1"; shift
action="$1"; shift
case $component in
nb)
case $action in
status)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/status OVN_Northbound
;;
kick)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnnb_db.ctl cluster/kick OVN_Northbound "$1"
;;
backup)
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /tmp/ovnnb_db.$suffix.backup /etc/ovn/ovnnb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_NB_POD:/tmp/ovnnb_db.$suffix.backup $(pwd)/ovnnb_db.$suffix.backup
kubectl exec "$OVN_NB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /tmp/ovnnb_db.$suffix.backup
echo "backup ovn-$component db to $(pwd)/ovnnb_db.$suffix.backup"
;;
*)
echo "unknown action $action"
esac
;;
sb)
case $action in
status)
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/status OVN_Southbound
;;
kick)
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovs-appctl -t /var/run/ovn/ovnsb_db.ctl cluster/kick OVN_Southbound "$1"
;;
backup)
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- ovsdb-tool cluster-to-standalone /tmp/ovnsb_db.$suffix.backup /etc/ovn/ovnsb_db.db
kubectl cp $KUBE_OVN_NS/$OVN_SB_POD:/tmp/ovnsb_db.$suffix.backup $(pwd)/ovnsb_db.$suffix.backup
kubectl exec "$OVN_SB_POD" -n $KUBE_OVN_NS -c ovn-central -- rm -f /tmp/ovnsb_db.$suffix.backup
echo "backup ovn-$component db to $(pwd)/ovnsb_db.$suffix.backup"
;;
*)
echo "unknown action $action"
esac
;;
*)
echo "unknown subcommand $component"
esac
}
if [ $# -lt 1 ]; then
showHelp
exit 0
......@@ -313,6 +381,9 @@ case $subcommand in
vsctl|ofctl|dpctl|appctl)
xxctl "$subcommand" "$@"
;;
nb|sb)
dbtool "$subcommand" "$@"
;;
tcpdump)
tcpdump "$@"
;;
......
......@@ -82,4 +82,18 @@ var _ = Describe("[kubectl-ko]", func() {
output, err = exec.Command("kubectl", "ko", "trace", fmt.Sprintf("kube-system/%s", pod.Name), "114.114.114.114", "udp", "53").CombinedOutput()
Expect(err).NotTo(HaveOccurred(), string(output))
})
It("nb/sb operation", func() {
output, err := exec.Command("kubectl", "ko", "nb", "status").CombinedOutput()
Expect(err).NotTo(HaveOccurred(), string(output))
output, err = exec.Command("kubectl", "ko", "sb", "status").CombinedOutput()
Expect(err).NotTo(HaveOccurred(), string(output))
output, err = exec.Command("kubectl", "ko", "nb", "backup").CombinedOutput()
Expect(err).NotTo(HaveOccurred(), string(output))
output, err = exec.Command("kubectl", "ko", "sb", "backup").CombinedOutput()
Expect(err).NotTo(HaveOccurred(), string(output))
})
})
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment