Commit 2dde49e2 authored by Alex Dadgar's avatar Alex Dadgar
Browse files

Improve validation/defaulting, handle start-join

This commit:
* Improves how we combine the old retry-* fields and the new stanza and
how it is validated
* Handles the new stanza setting start_join
* Fixes integration test to not bind to the standard port and instead be
randomized.
* Simplifies parsing of the old retry_interval
* Fixes the errors from retry join being masked
* Flags get parsed into new server_join stanza
parent 13f8e91e
Showing with 128 additions and 135 deletions
+128 -135
......@@ -63,9 +63,11 @@ func (c *Command) readConfig() *Config {
Client: &ClientConfig{},
Consul: &config.ConsulConfig{},
Ports: &Ports{},
Server: &ServerConfig{},
Vault: &config.VaultConfig{},
ACL: &ACLConfig{},
Server: &ServerConfig{
ServerJoin: &ServerJoin{},
},
Vault: &config.VaultConfig{},
ACL: &ACLConfig{},
}
flags := flag.NewFlagSet("agent", flag.ContinueOnError)
......@@ -78,13 +80,16 @@ func (c *Command) readConfig() *Config {
// Server-only options
flags.IntVar(&cmdConfig.Server.BootstrapExpect, "bootstrap-expect", 0, "")
flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "")
flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.StartJoin), "join", "")
flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.RetryJoin), "retry-join", "")
flags.IntVar(&cmdConfig.Server.RetryMaxAttempts, "retry-max", 0, "")
flags.StringVar(&cmdConfig.Server.RetryInterval, "retry-interval", "", "")
flags.StringVar(&cmdConfig.Server.EncryptKey, "encrypt", "", "gossip encryption key")
flags.IntVar(&cmdConfig.Server.RaftProtocol, "raft-protocol", 0, "")
flags.BoolVar(&cmdConfig.Server.RejoinAfterLeave, "rejoin", false, "")
flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.ServerJoin.StartJoin), "join", "")
flags.Var((*flaghelper.StringFlag)(&cmdConfig.Server.ServerJoin.RetryJoin), "retry-join", "")
flags.IntVar(&cmdConfig.Server.ServerJoin.RetryMaxAttempts, "retry-max", 0, "")
flags.Var((flaghelper.FuncDurationVar)(func(d time.Duration) error {
cmdConfig.Server.ServerJoin.RetryInterval = d
return nil
}), "retry-interval", "")
// Client-only options
flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "")
......@@ -267,14 +272,6 @@ func (c *Command) readConfig() *Config {
}
}
// COMPAT: Remove in 0.10. Parse the RetryInterval
dur, err := time.ParseDuration(config.Server.RetryInterval)
if err != nil {
c.Ui.Error(fmt.Sprintf("Error parsing retry interval: %s", err))
return nil
}
config.Server.retryInterval = dur
// Check that the server is running in at least one mode.
if !(config.Server.Enabled || config.Client.Enabled) {
c.Ui.Error("Must specify either server, client or dev mode for the agent.")
......@@ -547,6 +544,17 @@ func (c *Command) Run(args []string) int {
logGate.Flush()
// Start retry join process
if err := c.handleRetryJoin(config); err != nil {
c.Ui.Error(err.Error())
return 1
}
// Wait for exit
return c.handleSignals()
}
// handleRetryJoin is used to start retry joining if it is configured.
func (c *Command) handleRetryJoin(config *Config) error {
c.retryJoinErrCh = make(chan struct{})
if config.Server.Enabled && len(config.Server.RetryJoin) != 0 {
......@@ -559,21 +567,30 @@ func (c *Command) Run(args []string) int {
}
if err := joiner.Validate(config); err != nil {
c.Ui.Error(err.Error())
return 1
return err
}
// COMPAT: Remove in 0.10 and only use ServerJoin
serverJoinInfo := &ServerJoin{
RetryJoin: config.Server.RetryJoin,
StartJoin: config.Server.StartJoin,
RetryMaxAttempts: config.Server.RetryMaxAttempts,
RetryInterval: config.Server.retryInterval,
// Remove the duplicate fields
if len(config.Server.RetryJoin) != 0 {
config.Server.ServerJoin.RetryJoin = config.Server.RetryJoin
config.Server.RetryJoin = nil
}
if config.Server.RetryMaxAttempts != 0 {
config.Server.ServerJoin.RetryMaxAttempts = config.Server.RetryMaxAttempts
config.Server.RetryMaxAttempts = 0
}
go joiner.RetryJoin(serverJoinInfo)
if config.Server.RetryInterval != 0 {
config.Server.ServerJoin.RetryInterval = config.Server.RetryInterval
config.Server.RetryInterval = 0
}
c.agent.logger.Printf("[WARN] agent: Using deprecated retry_join fields. Upgrade configuration to use server_join")
}
if config.Server.Enabled && config.Server.ServerJoin != nil {
if config.Server.Enabled &&
config.Server.ServerJoin != nil &&
len(config.Server.ServerJoin.RetryJoin) != 0 {
joiner := retryJoiner{
discover: &discover.Discover{},
errCh: c.retryJoinErrCh,
......@@ -583,18 +600,15 @@ func (c *Command) Run(args []string) int {
}
if err := joiner.Validate(config); err != nil {
c.Ui.Error(err.Error())
return 1
return err
}
go joiner.RetryJoin(config.Server.ServerJoin)
}
if config.Client.Enabled && config.Client.ServerJoin != nil {
// COMPAT: Remove in 0.10 set the default RetryInterval value, as the
// ServerJoin stanza is not part of a default config for an agent.
config.Client.ServerJoin.RetryInterval = time.Duration(30) * time.Second
if config.Client.Enabled &&
config.Client.ServerJoin != nil &&
len(config.Client.ServerJoin.RetryJoin) != 0 {
joiner := retryJoiner{
discover: &discover.Discover{},
errCh: c.retryJoinErrCh,
......@@ -604,15 +618,13 @@ func (c *Command) Run(args []string) int {
}
if err := joiner.Validate(config); err != nil {
c.Ui.Error(err.Error())
return 1
return err
}
go joiner.RetryJoin(config.Client.ServerJoin)
}
// Wait for exit
return c.handleSignals()
return nil
}
// handleSignals blocks until we get an exit-causing signal
......@@ -885,12 +897,34 @@ func (c *Command) setupTelemetry(config *Config) (*metrics.InmemSink, error) {
}
func (c *Command) startupJoin(config *Config) error {
if len(config.Server.StartJoin) == 0 || !config.Server.Enabled {
// Nothing to do
if !config.Server.Enabled {
return nil
}
// Validate both old and new aren't being set
old := len(config.Server.StartJoin)
var new int
if config.Server.ServerJoin != nil {
new = len(config.Server.ServerJoin.StartJoin)
}
if old != 0 && new != 0 {
return fmt.Errorf("server_join and start_join cannot both be defined; prefer setting the server_join stanza")
}
// Nothing to do
if old+new == 0 {
return nil
}
// Combine the lists and join
joining := config.Server.StartJoin
if new != 0 {
joining = append(joining, config.Server.ServerJoin.StartJoin...)
}
c.Ui.Output("Joining cluster...")
n, err := c.agent.server.Join(config.Server.StartJoin)
n, err := c.agent.server.Join(joining)
if err != nil {
return err
}
......
......@@ -331,8 +331,7 @@ type ServerConfig struct {
// attempts on agent start. The minimum allowed value is 1 second and
// the default is 30s.
// Deprecated in Nomad 0.10
RetryInterval string `mapstructure:"retry_interval"`
retryInterval time.Duration `mapstructure:"-"`
RetryInterval time.Duration `mapstructure:"retry_interval"`
// RejoinAfterLeave controls our interaction with the cluster after leave.
// When set to false (default), a leave causes Consul to not rejoin
......@@ -661,13 +660,20 @@ func DefaultConfig() *Config {
GCInodeUsageThreshold: 70,
GCMaxAllocs: 50,
NoHostUUID: helper.BoolToPtr(true),
ServerJoin: &ServerJoin{
RetryJoin: []string{},
RetryInterval: 30 * time.Second,
RetryMaxAttempts: 0,
},
},
Server: &ServerConfig{
Enabled: false,
StartJoin: []string{},
RetryJoin: []string{},
RetryInterval: "30s",
RetryMaxAttempts: 0,
Enabled: false,
StartJoin: []string{},
ServerJoin: &ServerJoin{
RetryJoin: []string{},
RetryInterval: 30 * time.Second,
RetryMaxAttempts: 0,
},
},
ACL: &ACLConfig{
Enabled: false,
......@@ -1096,9 +1102,8 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig {
if b.RetryMaxAttempts != 0 {
result.RetryMaxAttempts = b.RetryMaxAttempts
}
if b.RetryInterval != "" {
if b.RetryInterval != 0 {
result.RetryInterval = b.RetryInterval
result.retryInterval = b.retryInterval
}
if b.RejoinAfterLeave {
result.RejoinAfterLeave = true
......@@ -1116,9 +1121,6 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig {
result.EncryptKey = b.EncryptKey
}
if b.ServerJoin != nil {
// // COMPAT: Remove in 0.10 - ServerJoin is not defined by default on an
// agent config, this should be eventually moved to DefaultConfig
result.ServerJoin = getDefaultServerJoin()
result.ServerJoin = result.ServerJoin.Merge(b.ServerJoin)
}
......@@ -1138,12 +1140,6 @@ func (a *ServerConfig) Merge(b *ServerConfig) *ServerConfig {
return &result
}
func getDefaultServerJoin() *ServerJoin {
return &ServerJoin{
RetryInterval: time.Duration(30) * time.Second,
}
}
// Merge is used to merge two client configs together
func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig {
result := *a
......@@ -1235,9 +1231,6 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig {
}
if b.ServerJoin != nil {
// // COMPAT: Remove in 0.10 - ServerJoin is not defined by default on an
// agent config, this should be eventually moved to DefaultConfig
result.ServerJoin = getDefaultServerJoin()
result.ServerJoin = result.ServerJoin.Merge(b.ServerJoin)
}
......
......@@ -104,7 +104,7 @@ func TestConfig_Parse(t *testing.T) {
MaxHeartbeatsPerSecond: 11.0,
RetryJoin: []string{"1.1.1.1", "2.2.2.2"},
StartJoin: []string{"1.1.1.1", "2.2.2.2"},
RetryInterval: "15s",
RetryInterval: 15 * time.Second,
RejoinAfterLeave: true,
RetryMaxAttempts: 3,
NonVotingServer: true,
......
......@@ -265,8 +265,7 @@ func TestConfig_Merge(t *testing.T) {
RejoinAfterLeave: true,
StartJoin: []string{"1.1.1.1"},
RetryJoin: []string{"1.1.1.1"},
RetryInterval: "10s",
retryInterval: time.Second * 10,
RetryInterval: time.Second * 10,
NonVotingServer: true,
RedundancyZone: "bar",
UpgradeVersion: "bar",
......
......@@ -59,7 +59,7 @@ func (r *retryJoiner) Validate(config *Config) error {
// If retry_join is defined for the server, ensure that deprecated
// fields and the server_join stanza are not both set
if config.Server != nil && config.Server.ServerJoin != nil {
if config.Server != nil && config.Server.ServerJoin != nil && len(config.Server.ServerJoin.RetryJoin) != 0 {
if len(config.Server.RetryJoin) != 0 {
return fmt.Errorf("server_join and retry_join cannot both be defined; prefer setting the server_join stanza")
}
......@@ -69,11 +69,12 @@ func (r *retryJoiner) Validate(config *Config) error {
if config.Server.RetryMaxAttempts != 0 {
return fmt.Errorf("server_join and retry_max cannot both be defined; prefer setting the server_join stanza")
}
if config.Server.RetryInterval != "30s" {
if config.Server.RetryInterval != 0 {
return fmt.Errorf("server_join and retry_interval cannot both be defined; prefer setting the server_join stanza")
}
if len(config.Server.ServerJoin.RetryJoin) != 0 && len(config.Server.ServerJoin.StartJoin) != 0 {
if len(config.Server.ServerJoin.StartJoin) != 0 {
return fmt.Errorf("retry_join and start_join cannot both be defined")
}
}
......@@ -103,6 +104,7 @@ func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) {
for {
var addrs []string
var n int
var err error
for _, addr := range serverJoin.RetryJoin {
......@@ -121,14 +123,14 @@ func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) {
if len(addrs) > 0 {
if r.serverEnabled && r.serverJoin != nil {
n, err := r.serverJoin(addrs)
n, err = r.serverJoin(addrs)
if err == nil {
r.logger.Printf("[INFO] agent: Join completed. Server synced with %d initial servers", n)
return
}
}
if r.clientEnabled && r.clientJoin != nil {
n, err := r.clientJoin(addrs)
n, err = r.clientJoin(addrs)
if err == nil {
r.logger.Printf("[INFO] agent: Join completed. Client synced with %d initial servers", n)
return
......@@ -144,7 +146,7 @@ func (r *retryJoiner) RetryJoin(serverJoin *ServerJoin) {
}
if err != nil {
r.logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err,
r.logger.Printf("[WARN] agent: Join failed: %q, retrying in %v", err,
serverJoin.RetryInterval)
}
time.Sleep(serverJoin.RetryInterval)
......
......@@ -9,7 +9,6 @@ import (
"time"
"github.com/hashicorp/nomad/testutil"
"github.com/hashicorp/nomad/version"
"github.com/mitchellh/cli"
"github.com/stretchr/testify/require"
)
......@@ -31,43 +30,37 @@ func (m *MockDiscover) Names() []string {
func TestRetryJoin_Integration(t *testing.T) {
t.Parallel()
// Create two agents and have one retry join the other
agent := NewTestAgent(t, t.Name(), nil)
defer agent.Shutdown()
doneCh := make(chan struct{})
shutdownCh := make(chan struct{})
defer func() {
close(shutdownCh)
<-doneCh
}()
agent2 := NewTestAgent(t, t.Name(), func(c *Config) {
c.NodeName = "foo"
if c.Server.ServerJoin == nil {
c.Server.ServerJoin = &ServerJoin{}
}
c.Server.ServerJoin.RetryJoin = []string{agent.Config.normalizedAddrs.Serf}
c.Server.ServerJoin.RetryInterval = 1 * time.Second
})
defer agent2.Shutdown()
// Create a fake command and have it wrap the second agent and run the retry
// join handler
cmd := &Command{
Version: version.GetVersion(),
ShutdownCh: shutdownCh,
Ui: &cli.BasicUi{
Reader: os.Stdin,
Writer: os.Stdout,
ErrorWriter: os.Stderr,
},
agent: agent2.Agent,
}
serfAddr := agent.Config.normalizedAddrs.Serf
args := []string{
"-dev",
"-node", "foo",
"-retry-join", serfAddr,
"-retry-interval", "1s",
if err := cmd.handleRetryJoin(agent2.Config); err != nil {
t.Fatalf("handleRetryJoin failed: %v", err)
}
go func() {
if code := cmd.Run(args); code != 0 {
t.Logf("bad: %d", code)
}
close(doneCh)
}()
// Ensure the retry join occured.
testutil.WaitForResult(func() (bool, error) {
mem := agent.server.Members()
if len(mem) != 2 {
......@@ -205,8 +198,6 @@ func TestRetryJoin_Client(t *testing.T) {
func TestRetryJoin_Validate(t *testing.T) {
t.Parallel()
require := require.New(t)
type validateExpect struct {
config *Config
isValid bool
......@@ -225,7 +216,7 @@ func TestRetryJoin_Validate(t *testing.T) {
},
RetryJoin: []string{"127.0.0.1"},
RetryMaxAttempts: 0,
RetryInterval: "0",
RetryInterval: 0,
StartJoin: []string{},
},
},
......@@ -243,7 +234,7 @@ func TestRetryJoin_Validate(t *testing.T) {
},
StartJoin: []string{"127.0.0.1"},
RetryMaxAttempts: 0,
RetryInterval: "0",
RetryInterval: 0,
RetryJoin: []string{},
},
},
......@@ -261,7 +252,7 @@ func TestRetryJoin_Validate(t *testing.T) {
},
StartJoin: []string{},
RetryMaxAttempts: 1,
RetryInterval: "0",
RetryInterval: 0,
RetryJoin: []string{},
},
},
......@@ -279,8 +270,7 @@ func TestRetryJoin_Validate(t *testing.T) {
},
StartJoin: []string{},
RetryMaxAttempts: 0,
RetryInterval: "3s",
retryInterval: time.Duration(3),
RetryInterval: 3 * time.Second,
RetryJoin: []string{},
},
},
......@@ -333,51 +323,26 @@ func TestRetryJoin_Validate(t *testing.T) {
Server: &ServerConfig{
ServerJoin: &ServerJoin{
RetryJoin: []string{"127.0.0.1"},
RetryMaxAttempts: 0,
RetryInterval: 0,
RetryMaxAttempts: 1,
RetryInterval: 1,
StartJoin: []string{},
},
StartJoin: []string{},
RetryMaxAttempts: 0,
RetryInterval: "30s",
RetryJoin: []string{},
},
},
isValid: true,
reason: "server server_join should be valid",
},
{
config: &Config{
Server: &ServerConfig{
StartJoin: []string{"127.0.0.1"},
RetryMaxAttempts: 1,
RetryInterval: "0",
RetryJoin: []string{},
},
},
isValid: true,
reason: "server deprecated retry_join configuration should be valid",
},
{
config: &Config{
Server: &ServerConfig{
RetryInterval: "30s",
ServerJoin: &ServerJoin{
RetryJoin: []string{"127.0.0.1"},
RetryMaxAttempts: 0,
RetryInterval: time.Duration(20) * time.Second,
StartJoin: []string{},
},
},
},
isValid: true,
reason: "ignore default value for retry interval",
},
}
joiner := retryJoiner{}
for _, scenario := range scenarios {
err := joiner.Validate(scenario.config)
require.Equal(err == nil, scenario.isValid, scenario.reason)
t.Run(scenario.reason, func(t *testing.T) {
err := joiner.Validate(scenario.config)
if scenario.isValid {
require.NoError(t, err)
} else {
require.Error(t, err)
}
})
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment