From f36cffe0bdc6f2ad6ce47a243b9f62d33407bbcc Mon Sep 17 00:00:00 2001
From: Aaron Lehmann <aaron.lehmann@docker.com>
Date: Tue, 23 May 2017 14:27:31 -0700
Subject: [PATCH] cluster: Only pass a join address when in the process of
 joining a cluster

This code currently passes a random manager address when creating a new
Node. This doesn't really make sense - we should only pass a join
address on the initial join, or when retrying that join. An upcoming
change to swarmkit will pay attention to JoinAddr significant when a
node is already part of a cluster, so passing in the random value needs
to be avoided.

Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com>
Upstream-commit: 24477e70040019ca421ec1031dc553dc780c02f1
Component: engine
---
 .../engine/daemon/cluster/noderunner.go       | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/components/engine/daemon/cluster/noderunner.go b/components/engine/daemon/cluster/noderunner.go
index 2ec13b4639..c0c7529ed9 100644
--- a/components/engine/daemon/cluster/noderunner.go
+++ b/components/engine/daemon/cluster/noderunner.go
@@ -50,6 +50,9 @@ type nodeStartConfig struct {
 	AdvertiseAddr string
 	// DataPathAddr is the address that has to be used for the data path
 	DataPathAddr string
+	// JoinInProgress is set to true if a join operation has started, but
+	// not completed yet.
+	JoinInProgress bool
 
 	joinAddr        string
 	forceNewCluster bool
@@ -98,6 +101,13 @@ func (n *nodeRunner) start(conf nodeStartConfig) error {
 		control = filepath.Join(n.cluster.runtimeRoot, controlSocket)
 	}
 
+	joinAddr := conf.joinAddr
+	if joinAddr == "" && conf.JoinInProgress {
+		// We must have been restarted while trying to join a cluster.
+		// Continue trying to join instead of forming our own cluster.
+		joinAddr = conf.RemoteAddr
+	}
+
 	// Hostname is not set here. Instead, it is obtained from
 	// the node description that is reported periodically
 	swarmnodeConfig := swarmnode.Config{
@@ -105,7 +115,7 @@ func (n *nodeRunner) start(conf nodeStartConfig) error {
 		ListenControlAPI:   control,
 		ListenRemoteAPI:    conf.ListenAddr,
 		AdvertiseRemoteAPI: conf.AdvertiseAddr,
-		JoinAddr:           conf.joinAddr,
+		JoinAddr:           joinAddr,
 		StateDir:           n.cluster.root,
 		JoinToken:          conf.joinToken,
 		Executor:           container.NewExecutor(n.cluster.config.Backend),
@@ -133,6 +143,9 @@ func (n *nodeRunner) start(conf nodeStartConfig) error {
 	n.done = make(chan struct{})
 	n.ready = make(chan struct{})
 	n.swarmNode = node
+	if conf.joinAddr != "" {
+		conf.JoinInProgress = true
+	}
 	n.config = conf
 	savePersistentState(n.cluster.root, conf)
 
@@ -216,6 +229,10 @@ func (n *nodeRunner) handleReadyEvent(ctx context.Context, node *swarmnode.Node,
 	case <-node.Ready():
 		n.mu.Lock()
 		n.err = nil
+		if n.config.JoinInProgress {
+			n.config.JoinInProgress = false
+			savePersistentState(n.cluster.root, n.config)
+		}
 		n.mu.Unlock()
 		close(ready)
 	case <-ctx.Done():
@@ -306,7 +323,6 @@ func (n *nodeRunner) enableReconnectWatcher() {
 	delayCtx, cancel := context.WithTimeout(context.Background(), n.reconnectDelay)
 	n.cancelReconnect = cancel
 
-	config := n.config
 	go func() {
 		<-delayCtx.Done()
 		if delayCtx.Err() != context.DeadlineExceeded {
@@ -317,15 +333,8 @@ func (n *nodeRunner) enableReconnectWatcher() {
 		if n.stopping {
 			return
 		}
-		remotes := n.cluster.getRemoteAddressList()
-		if len(remotes) > 0 {
-			config.RemoteAddr = remotes[0]
-		} else {
-			config.RemoteAddr = ""
-		}
 
-		config.joinAddr = config.RemoteAddr
-		if err := n.start(config); err != nil {
+		if err := n.start(n.config); err != nil {
 			n.err = err
 		}
 	}()