From f36cffe0bdc6f2ad6ce47a243b9f62d33407bbcc Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Tue, 23 May 2017 14:27:31 -0700 Subject: [PATCH] cluster: Only pass a join address when in the process of joining a cluster This code currently passes a random manager address when creating a new Node. This doesn't really make sense - we should only pass a join address on the initial join, or when retrying that join. An upcoming change to swarmkit will pay attention to JoinAddr significant when a node is already part of a cluster, so passing in the random value needs to be avoided. Signed-off-by: Aaron Lehmann Upstream-commit: 24477e70040019ca421ec1031dc553dc780c02f1 Component: engine --- .../engine/daemon/cluster/noderunner.go | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/components/engine/daemon/cluster/noderunner.go b/components/engine/daemon/cluster/noderunner.go index 2ec13b4639..c0c7529ed9 100644 --- a/components/engine/daemon/cluster/noderunner.go +++ b/components/engine/daemon/cluster/noderunner.go @@ -50,6 +50,9 @@ type nodeStartConfig struct { AdvertiseAddr string // DataPathAddr is the address that has to be used for the data path DataPathAddr string + // JoinInProgress is set to true if a join operation has started, but + // not completed yet. + JoinInProgress bool joinAddr string forceNewCluster bool @@ -98,6 +101,13 @@ func (n *nodeRunner) start(conf nodeStartConfig) error { control = filepath.Join(n.cluster.runtimeRoot, controlSocket) } + joinAddr := conf.joinAddr + if joinAddr == "" && conf.JoinInProgress { + // We must have been restarted while trying to join a cluster. + // Continue trying to join instead of forming our own cluster. + joinAddr = conf.RemoteAddr + } + // Hostname is not set here. Instead, it is obtained from // the node description that is reported periodically swarmnodeConfig := swarmnode.Config{ @@ -105,7 +115,7 @@ func (n *nodeRunner) start(conf nodeStartConfig) error { ListenControlAPI: control, ListenRemoteAPI: conf.ListenAddr, AdvertiseRemoteAPI: conf.AdvertiseAddr, - JoinAddr: conf.joinAddr, + JoinAddr: joinAddr, StateDir: n.cluster.root, JoinToken: conf.joinToken, Executor: container.NewExecutor(n.cluster.config.Backend), @@ -133,6 +143,9 @@ func (n *nodeRunner) start(conf nodeStartConfig) error { n.done = make(chan struct{}) n.ready = make(chan struct{}) n.swarmNode = node + if conf.joinAddr != "" { + conf.JoinInProgress = true + } n.config = conf savePersistentState(n.cluster.root, conf) @@ -216,6 +229,10 @@ func (n *nodeRunner) handleReadyEvent(ctx context.Context, node *swarmnode.Node, case <-node.Ready(): n.mu.Lock() n.err = nil + if n.config.JoinInProgress { + n.config.JoinInProgress = false + savePersistentState(n.cluster.root, n.config) + } n.mu.Unlock() close(ready) case <-ctx.Done(): @@ -306,7 +323,6 @@ func (n *nodeRunner) enableReconnectWatcher() { delayCtx, cancel := context.WithTimeout(context.Background(), n.reconnectDelay) n.cancelReconnect = cancel - config := n.config go func() { <-delayCtx.Done() if delayCtx.Err() != context.DeadlineExceeded { @@ -317,15 +333,8 @@ func (n *nodeRunner) enableReconnectWatcher() { if n.stopping { return } - remotes := n.cluster.getRemoteAddressList() - if len(remotes) > 0 { - config.RemoteAddr = remotes[0] - } else { - config.RemoteAddr = "" - } - config.joinAddr = config.RemoteAddr - if err := n.start(config); err != nil { + if err := n.start(n.config); err != nil { n.err = err } }()