Unverified Commit 05c50f41 authored by Snow Pettersen's avatar Snow Pettersen Committed by GitHub
Browse files

xds: add helper that ensures Envoy is connected to the control plane (#1181)

Adds a test helper that blocks until Envoy is able to re-establish its xDS stream.
This should help de-flake individual test runs and avoid having to account for the
exponential backoff in other test assertions.
parent a73641a8
Showing with 97 additions and 39 deletions
+97 -39
......@@ -3,8 +3,10 @@ package envoytest
import (
"bytes"
"errors"
"io/ioutil"
"net"
"net/http"
"strings"
"time"
bootstrap "github.com/envoyproxy/go-control-plane/envoy/config/bootstrap/v3"
......@@ -17,6 +19,9 @@ import (
apimock "github.com/lyft/clutch/backend/mock/api"
)
const EcdsStatPrefix = "http.ingress_http.extension_config_discovery.envoy.extension_config"
const RuntimeStatPrefix = "runtime"
const baseConfig = `
node:
id: test
......@@ -59,7 +64,7 @@ static_resources:
- endpoint:
address:
socket_address:
address: 127.0.0.1
address: 0.0.0.0
port_value: 1234
`
......@@ -106,6 +111,54 @@ func (e *EnvoyHandle) MakeSimpleCall() (int, error) {
return resp.StatusCode, nil
}
// EnsureControlPlaneConnectivity polls the Envoy stats endpoint to ensure that Envoy
// has an active request against the control plane identified by the provided stat prefix.
// This is useful in ensuring that Envoy has been able to reconnect to the control plane,
// even after the exponential backoff that happens as Envoy is unable to connect to the
// control plane.
func (e *EnvoyHandle) EnsureControlPlaneConnectivity(prefix string) error {
client := &http.Client{}
r, err := http.NewRequest("GET", "http://envoy:9901/stats", nil)
if err != nil {
return err
}
timeout := time.NewTimer(20 * time.Second)
ticker := time.NewTicker(500 * time.Millisecond)
for {
select {
case <-timeout.C:
return errors.New("timed out waiting for control plane connectivity")
case <-ticker.C:
// TODO(snowp): Have this parse out a generic map of stats values to make it easier to query
// arbitrary stats.
// We intentionally ignore errors here, as the proxy might be periodically unavailable but we
// don't care as long as it recovers within the timeout.
resp, err := client.Do(r)
if err != nil {
continue
}
allStatsString, _ := ioutil.ReadAll(resp.Body)
resp.Body.Close()
splitStats := strings.Split(string(allStatsString), "\n")
for _, statString := range splitStats {
if !strings.HasPrefix(statString, prefix+".control_plane.connected_state") {
continue
}
nameAndValue := strings.Split(statString, ":")
if strings.TrimSpace(nameAndValue[1]) == "1" {
return nil
}
}
}
}
}
// EnvoyConfig provides a configuration builder that mirrors the upstream Envoy ConfigHelper:
// a base configuration is used which can be modified by a series of modifiers to create the
// final configuration.
......
......@@ -36,6 +36,9 @@ func TestEnvoyFaults(t *testing.T) {
e, err := envoytest.NewEnvoyHandle()
assert.NoError(t, err)
e.EnsureControlPlaneConnectivity(envoytest.RuntimeStatPrefix)
assert.NoError(t, err)
code, err := e.MakeSimpleCall()
assert.NoError(t, err)
assert.Equal(t, 503, code)
......@@ -59,6 +62,46 @@ func TestEnvoyFaults(t *testing.T) {
assert.NoError(t, err, "did not see faults reverted")
}
func TestEnvoyECDSFaults(t *testing.T) {
xdsConfig := &xdsconfigv1.Config{
RtdsLayerName: "rtds",
CacheRefreshInterval: ptypes.DurationProto(time.Second),
IngressFaultRuntimePrefix: "fault.http",
EgressFaultRuntimePrefix: "egress",
EcdsAllowList: &xdsconfigv1.Config_ECDSAllowList{EnabledClusters: []string{"test-cluster"}},
}
ts := xdstest.NewTestModuleServer(New, true, xdsConfig)
defer ts.Stop()
e, err := envoytest.NewEnvoyHandle()
assert.NoError(t, err)
e.EnsureControlPlaneConnectivity(envoytest.EcdsStatPrefix)
assert.NoError(t, err)
code, err := e.MakeSimpleCall()
assert.NoError(t, err)
assert.Equal(t, 503, code)
experiment := createTestExperiment(t, 404, ts.Storer)
err = awaitExpectedReturnValueForSimpleCall(t, e, awaitReturnValueParams{
timeout: 2 * time.Second,
expectedStatus: 404,
})
assert.NoError(t, err, "did not see faults enabled")
// TODO(kathan24): Test TTL by stopping the server instead of canceling the experiment. Currently, TTL is not not supported for ECDS in the upstream Envoy
ts.Storer.CancelExperimentRun(context.Background(), experiment.Id)
err = awaitExpectedReturnValueForSimpleCall(t, e, awaitReturnValueParams{
timeout: 10 * time.Second,
expectedStatus: 503,
})
assert.NoError(t, err, "did not see faults reverted")
}
func createTestExperiment(t *testing.T, faultHttpStatus int, storer *experimentstoremock.SimpleStorer) *experimentationv1.Experiment {
now := time.Now()
config := serverexperimentation.HTTPFaultConfig{
......@@ -122,41 +165,3 @@ func awaitExpectedReturnValueForSimpleCall(t *testing.T, e *envoytest.EnvoyHandl
return nil
}
func TestEnvoyECDSFaults(t *testing.T) {
xdsConfig := &xdsconfigv1.Config{
RtdsLayerName: "rtds",
CacheRefreshInterval: ptypes.DurationProto(time.Second),
IngressFaultRuntimePrefix: "fault.http",
EgressFaultRuntimePrefix: "egress",
EcdsAllowList: &xdsconfigv1.Config_ECDSAllowList{EnabledClusters: []string{"test-cluster"}},
}
ts := xdstest.NewTestModuleServer(New, true, xdsConfig)
defer ts.Stop()
e, err := envoytest.NewEnvoyHandle()
assert.NoError(t, err)
code, err := e.MakeSimpleCall()
assert.NoError(t, err)
assert.Equal(t, 503, code)
experiment := createTestExperiment(t, 404, ts.Storer)
err = awaitExpectedReturnValueForSimpleCall(t, e, awaitReturnValueParams{
// Timeout needs to be higher since envoy has exponential back-off request timeout
timeout: 4 * time.Second,
expectedStatus: 404,
})
assert.NoError(t, err, "did not see faults enabled")
// TODO(kathan24): Test TTL by stopping the server instead of canceling the experiment. Currently, TTL is not not supported for ECDS in the upstream Envoy
ts.Storer.CancelExperimentRun(context.Background(), experiment.Id)
err = awaitExpectedReturnValueForSimpleCall(t, e, awaitReturnValueParams{
timeout: 10 * time.Second,
expectedStatus: 503,
})
assert.NoError(t, err, "did not see faults reverted")
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment