From 80b75da5582cb61ae035ae6645d60965a00e88a6 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 25 Oct 2018 11:47:56 -0700 Subject: [PATCH] integration-cli/Test*Swarm*: use same args on restart When starting docker daemons for swarm testing, we disable iptables and use lo for communication (in order to avoid network conflicts). The problem is, these options are lost on restart, that can lead to any sorts of network conflicts and thus connectivity issues between swarm nodes. Fix this. This does not fix issues with swarm test failures, but it seems they appear are less often after this one. Signed-off-by: Kir Kolyshkin (cherry picked from commit 2ed512c7faea938b0b07e69187b8a132e2ecb66a) Signed-off-by: Sebastiaan van Stijn Upstream-commit: 553b09684cb68962403ebdc5495fb548364f778f Component: engine --- .../engine/integration-cli/check_test.go | 2 +- .../docker_api_swarm_node_test.go | 2 +- .../integration-cli/docker_api_swarm_test.go | 22 ++++++------- .../integration-cli/docker_cli_swarm_test.go | 32 +++++++++---------- .../engine/internal/test/daemon/swarm.go | 32 +++++++++++++------ 5 files changed, 49 insertions(+), 41 deletions(-) diff --git a/components/engine/integration-cli/check_test.go b/components/engine/integration-cli/check_test.go index 2282967ee5..10fe4e7646 100644 --- a/components/engine/integration-cli/check_test.go +++ b/components/engine/integration-cli/check_test.go @@ -333,7 +333,7 @@ func (s *DockerSwarmSuite) AddDaemon(c *check.C, joinSwarm, manager bool) *daemo d.StartAndSwarmInit(c) } } else { - d.StartWithBusybox(c, "--iptables=false", "--swarm-default-advertise-addr=lo") + d.StartNode(c) } s.portIndex++ diff --git a/components/engine/integration-cli/docker_api_swarm_node_test.go b/components/engine/integration-cli/docker_api_swarm_node_test.go index 191391620d..30c2285463 100644 --- a/components/engine/integration-cli/docker_api_swarm_node_test.go +++ b/components/engine/integration-cli/docker_api_swarm_node_test.go @@ -62,7 +62,7 @@ func (s *DockerSwarmSuite) TestAPISwarmNodeRemove(c *check.C) { c.Assert(len(nodes), checker.Equals, 2, check.Commentf("nodes: %#v", nodes)) // Restart the node that was removed - d2.Restart(c) + d2.RestartNode(c) // Give some time for the node to rejoin time.Sleep(1 * time.Second) diff --git a/components/engine/integration-cli/docker_api_swarm_test.go b/components/engine/integration-cli/docker_api_swarm_test.go index 83cf25e08b..fecbf1e860 100644 --- a/components/engine/integration-cli/docker_api_swarm_test.go +++ b/components/engine/integration-cli/docker_api_swarm_test.go @@ -67,8 +67,8 @@ func (s *DockerSwarmSuite) TestAPISwarmInit(c *check.C) { d1.Stop(c) d2.Stop(c) - d1.Start(c) - d2.Start(c) + d1.StartNode(c) + d2.StartNode(c) info = d1.SwarmInfo(c) c.Assert(info.ControlAvailable, checker.True) @@ -350,7 +350,7 @@ func (s *DockerSwarmSuite) TestAPISwarmLeaderElection(c *check.C) { stableleader := leader // add the d1, the initial leader, back - d1.Start(c) + d1.StartNode(c) // wait for possible election c.Logf("Waiting for possible election...") @@ -401,7 +401,7 @@ func (s *DockerSwarmSuite) TestAPISwarmRaftQuorum(c *check.C) { return err.Error(), nil }, checker.Contains, "Make sure more than half of the managers are online.") - d2.Start(c) + d2.StartNode(c) // make sure there is a leader waitAndAssert(c, defaultReconciliationTimeout, d1.CheckLeader, checker.IsNil) @@ -477,8 +477,7 @@ func (s *DockerSwarmSuite) TestAPISwarmRestoreOnPendingJoin(c *check.C) { waitAndAssert(c, defaultReconciliationTimeout, d.CheckLocalNodeState, checker.Equals, swarm.LocalNodeStatePending) - d.Stop(c) - d.Start(c) + d.RestartNode(c) info := d.SwarmInfo(c) c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) @@ -491,26 +490,23 @@ func (s *DockerSwarmSuite) TestAPISwarmManagerRestore(c *check.C) { id := d1.CreateService(c, simpleTestService, setInstances(instances)) d1.GetService(c, id) - d1.Stop(c) - d1.Start(c) + d1.RestartNode(c) d1.GetService(c, id) d2 := s.AddDaemon(c, true, true) d2.GetService(c, id) - d2.Stop(c) - d2.Start(c) + d2.RestartNode(c) d2.GetService(c, id) d3 := s.AddDaemon(c, true, true) d3.GetService(c, id) - d3.Stop(c) - d3.Start(c) + d3.RestartNode(c) d3.GetService(c, id) err := d3.Kill() assert.NilError(c, err) time.Sleep(1 * time.Second) // time to handle signal - d3.Start(c) + d3.StartNode(c) d3.GetService(c, id) } diff --git a/components/engine/integration-cli/docker_cli_swarm_test.go b/components/engine/integration-cli/docker_cli_swarm_test.go index 30a769b68d..3f307d7b9e 100644 --- a/components/engine/integration-cli/docker_cli_swarm_test.go +++ b/components/engine/integration-cli/docker_cli_swarm_test.go @@ -164,7 +164,7 @@ func (s *DockerSwarmSuite) TestSwarmIncompatibleDaemon(c *check.C) { c.Assert(err, checker.IsNil) c.Assert(string(content), checker.Contains, "--live-restore daemon configuration is incompatible with swarm mode") // restart for teardown - d.Start(c) + d.StartNode(c) } func (s *DockerSwarmSuite) TestSwarmServiceTemplatingHostname(c *check.C) { @@ -331,7 +331,7 @@ func (s *DockerSwarmSuite) TestSwarmContainerAutoStart(c *check.C) { c.Assert(err, checker.IsNil, check.Commentf("%s", out)) c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "") - d.Restart(c) + d.RestartNode(c) out, err = d.Cmd("ps", "-q") c.Assert(err, checker.IsNil, check.Commentf("%s", out)) @@ -1013,7 +1013,7 @@ func checkSwarmLockedToUnlocked(c *check.C, d *daemon.Daemon) { // Wait for the PEM file to become unencrypted waitAndAssert(c, defaultReconciliationTimeout, checkKeyIsEncrypted(d), checker.Equals, false) - d.Restart(c) + d.RestartNode(c) c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive) } @@ -1021,7 +1021,7 @@ func checkSwarmUnlockedToLocked(c *check.C, d *daemon.Daemon) { // Wait for the PEM file to become encrypted waitAndAssert(c, defaultReconciliationTimeout, checkKeyIsEncrypted(d), checker.Equals, true) - d.Restart(c) + d.RestartNode(c) c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateLocked) } @@ -1060,7 +1060,7 @@ func (s *DockerSwarmSuite) TestSwarmInitLocked(c *check.C) { c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive) // It starts off locked - d.Restart(c) + d.RestartNode(c) c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateLocked) cmd := d.Command("swarm", "unlock") @@ -1099,7 +1099,7 @@ func (s *DockerSwarmSuite) TestSwarmLeaveLocked(c *check.C) { c.Assert(err, checker.IsNil, check.Commentf("%s", outs)) // It starts off locked - d.Restart(c, "--swarm-default-advertise-addr=lo") + d.RestartNode(c) info := d.SwarmInfo(c) c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateLocked) @@ -1131,7 +1131,7 @@ func (s *DockerSwarmSuite) TestSwarmLockUnlockCluster(c *check.C) { d3 := s.AddDaemon(c, true, true) // they start off unlocked - d2.Restart(c) + d2.RestartNode(c) c.Assert(getNodeStatus(c, d2), checker.Equals, swarm.LocalNodeStateActive) // stop this one so it does not get autolock info @@ -1153,7 +1153,7 @@ func (s *DockerSwarmSuite) TestSwarmLockUnlockCluster(c *check.C) { } // d2 never got the cluster update, so it is still set to unlocked - d2.Start(c) + d2.StartNode(c) c.Assert(getNodeStatus(c, d2), checker.Equals, swarm.LocalNodeStateActive) // d2 is now set to lock @@ -1182,7 +1182,7 @@ func (s *DockerSwarmSuite) TestSwarmLockUnlockCluster(c *check.C) { // managers who join now are never set to locked in the first place d4 := s.AddDaemon(c, true, true) - d4.Restart(c) + d4.RestartNode(c) c.Assert(getNodeStatus(c, d4), checker.Equals, swarm.LocalNodeStateActive) } @@ -1196,7 +1196,7 @@ func (s *DockerSwarmSuite) TestSwarmJoinPromoteLocked(c *check.C) { // joined workers start off unlocked d2 := s.AddDaemon(c, true, false) - d2.Restart(c) + d2.RestartNode(c) c.Assert(getNodeStatus(c, d2), checker.Equals, swarm.LocalNodeStateActive) // promote worker @@ -1241,7 +1241,7 @@ func (s *DockerSwarmSuite) TestSwarmJoinPromoteLocked(c *check.C) { }, checker.Equals, "swarm-worker") // by now, it should *never* be locked on restart - d3.Restart(c) + d3.RestartNode(c) c.Assert(getNodeStatus(c, d3), checker.Equals, swarm.LocalNodeStateActive) } @@ -1261,7 +1261,7 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *check.C) { c.Assert(newUnlockKey, checker.Not(checker.Equals), "") c.Assert(newUnlockKey, checker.Not(checker.Equals), unlockKey) - d.Restart(c) + d.RestartNode(c) c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateLocked) outs, _ = d.Cmd("node", "ls") @@ -1282,7 +1282,7 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *check.C) { time.Sleep(3 * time.Second) - d.Restart(c) + d.RestartNode(c) cmd = d.Command("swarm", "unlock") cmd.Stdin = bytes.NewBufferString(unlockKey) @@ -1338,8 +1338,8 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *check.C) { c.Assert(newUnlockKey, checker.Not(checker.Equals), "") c.Assert(newUnlockKey, checker.Not(checker.Equals), unlockKey) - d2.Restart(c) - d3.Restart(c) + d2.RestartNode(c) + d3.RestartNode(c) for _, d := range []*daemon.Daemon{d2, d3} { c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateLocked) @@ -1362,7 +1362,7 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *check.C) { time.Sleep(3 * time.Second) - d.Restart(c) + d.RestartNode(c) cmd = d.Command("swarm", "unlock") cmd.Stdin = bytes.NewBufferString(unlockKey) diff --git a/components/engine/internal/test/daemon/swarm.go b/components/engine/internal/test/daemon/swarm.go index 4631222fcd..c526d3eca9 100644 --- a/components/engine/internal/test/daemon/swarm.go +++ b/components/engine/internal/test/daemon/swarm.go @@ -16,26 +16,38 @@ const ( defaultSwarmListenAddr = "0.0.0.0" ) -// StartAndSwarmInit starts the daemon (with busybox) and init the swarm -func (d *Daemon) StartAndSwarmInit(t testingT) { +var ( + startArgs = []string{"--iptables=false", "--swarm-default-advertise-addr=lo"} +) + +// StartNode starts daemon to be used as a swarm node +func (d *Daemon) StartNode(t testingT) { if ht, ok := t.(test.HelperT); ok { ht.Helper() } // avoid networking conflicts - args := []string{"--iptables=false", "--swarm-default-advertise-addr=lo"} - d.StartWithBusybox(t, args...) + d.StartWithBusybox(t, startArgs...) +} +// RestartNode restarts a daemon to be used as a swarm node +func (d *Daemon) RestartNode(t testingT) { + if ht, ok := t.(test.HelperT); ok { + ht.Helper() + } + // avoid networking conflicts + d.Stop(t) + d.StartWithBusybox(t, startArgs...) +} + +// StartAndSwarmInit starts the daemon (with busybox) and init the swarm +func (d *Daemon) StartAndSwarmInit(t testingT) { + d.StartNode(t) d.SwarmInit(t, swarm.InitRequest{}) } // StartAndSwarmJoin starts the daemon (with busybox) and join the specified swarm as worker or manager func (d *Daemon) StartAndSwarmJoin(t testingT, leader *Daemon, manager bool) { - if ht, ok := t.(test.HelperT); ok { - ht.Helper() - } - // avoid networking conflicts - args := []string{"--iptables=false", "--swarm-default-advertise-addr=lo"} - d.StartWithBusybox(t, args...) + d.StartNode(t) tokens := leader.JoinTokens(t) token := tokens.Worker