From fe42bb3c566cde85a4ce6752fcd301c7e614e83f Mon Sep 17 00:00:00 2001 From: allencloud Date: Sun, 12 Feb 2017 02:53:03 +0800 Subject: [PATCH] create a new file swarm.go and move swarm part code from cluster.go into swarm.go Signed-off-by: allencloud Upstream-commit: 75a315d43e2577768ba5e0ef071ed9631e43ce8b Component: engine --- components/engine/daemon/cluster/cluster.go | 547 ------------------- components/engine/daemon/cluster/swarm.go | 562 ++++++++++++++++++++ 2 files changed, 562 insertions(+), 547 deletions(-) create mode 100644 components/engine/daemon/cluster/swarm.go diff --git a/components/engine/daemon/cluster/cluster.go b/components/engine/daemon/cluster/cluster.go index 21d639f78e..8d8aa0ac31 100644 --- a/components/engine/daemon/cluster/cluster.go +++ b/components/engine/daemon/cluster/cluster.go @@ -44,22 +44,15 @@ import ( "net" "os" "path/filepath" - "strings" "sync" "time" "github.com/Sirupsen/logrus" - apierrors "github.com/docker/docker/api/errors" - apitypes "github.com/docker/docker/api/types" - "github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/network" types "github.com/docker/docker/api/types/swarm" - "github.com/docker/docker/daemon/cluster/convert" executorpkg "github.com/docker/docker/daemon/cluster/executor" - "github.com/docker/docker/opts" "github.com/docker/docker/pkg/signal" swarmapi "github.com/docker/swarmkit/api" - "github.com/docker/swarmkit/manager/encryption" swarmnode "github.com/docker/swarmkit/node" "github.com/pkg/errors" "golang.org/x/net/context" @@ -237,408 +230,10 @@ func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) { return nr, nil } -// Init initializes new cluster from user provided request. -func (c *Cluster) Init(req types.InitRequest) (string, error) { - c.controlMutex.Lock() - defer c.controlMutex.Unlock() - c.mu.Lock() - if c.nr != nil { - if req.ForceNewCluster { - if err := c.nr.Stop(); err != nil { - c.mu.Unlock() - return "", err - } - } else { - c.mu.Unlock() - return "", errSwarmExists - } - } - c.mu.Unlock() - - if err := validateAndSanitizeInitRequest(&req); err != nil { - return "", apierrors.NewBadRequestError(err) - } - - listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) - if err != nil { - return "", err - } - - advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) - if err != nil { - return "", err - } - - localAddr := listenHost - - // If the local address is undetermined, the advertise address - // will be used as local address, if it belongs to this system. - // If the advertise address is not local, then we try to find - // a system address to use as local address. If this fails, - // we give up and ask the user to pass the listen address. - if net.ParseIP(localAddr).IsUnspecified() { - advertiseIP := net.ParseIP(advertiseHost) - - found := false - for _, systemIP := range listSystemIPs() { - if systemIP.Equal(advertiseIP) { - localAddr = advertiseIP.String() - found = true - break - } - } - - if !found { - ip, err := c.resolveSystemAddr() - if err != nil { - logrus.Warnf("Could not find a local address: %v", err) - return "", errMustSpecifyListenAddr - } - localAddr = ip.String() - } - } - - if !req.ForceNewCluster { - clearPersistentState(c.root) - } - - nr, err := c.newNodeRunner(nodeStartConfig{ - forceNewCluster: req.ForceNewCluster, - autolock: req.AutoLockManagers, - LocalAddr: localAddr, - ListenAddr: net.JoinHostPort(listenHost, listenPort), - AdvertiseAddr: net.JoinHostPort(advertiseHost, advertisePort), - availability: req.Availability, - }) - if err != nil { - return "", err - } - c.mu.Lock() - c.nr = nr - c.mu.Unlock() - - if err := <-nr.Ready(); err != nil { - if !req.ForceNewCluster { // if failure on first attempt don't keep state - if err := clearPersistentState(c.root); err != nil { - return "", err - } - } - if err != nil { - c.mu.Lock() - c.nr = nil - c.mu.Unlock() - } - return "", err - } - state := nr.State() - if state.swarmNode == nil { // should never happen but protect from panic - return "", errors.New("invalid cluster state for spec initialization") - } - if err := initClusterSpec(state.swarmNode, req.Spec); err != nil { - return "", err - } - return state.NodeID(), nil -} - -// Join makes current Cluster part of an existing swarm cluster. -func (c *Cluster) Join(req types.JoinRequest) error { - c.controlMutex.Lock() - defer c.controlMutex.Unlock() - c.mu.Lock() - if c.nr != nil { - c.mu.Unlock() - return errSwarmExists - } - c.mu.Unlock() - - if err := validateAndSanitizeJoinRequest(&req); err != nil { - return apierrors.NewBadRequestError(err) - } - - listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) - if err != nil { - return err - } - - var advertiseAddr string - if req.AdvertiseAddr != "" { - advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) - // For joining, we don't need to provide an advertise address, - // since the remote side can detect it. - if err == nil { - advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort) - } - } - - clearPersistentState(c.root) - - nr, err := c.newNodeRunner(nodeStartConfig{ - RemoteAddr: req.RemoteAddrs[0], - ListenAddr: net.JoinHostPort(listenHost, listenPort), - AdvertiseAddr: advertiseAddr, - joinAddr: req.RemoteAddrs[0], - joinToken: req.JoinToken, - availability: req.Availability, - }) - if err != nil { - return err - } - - c.mu.Lock() - c.nr = nr - c.mu.Unlock() - - select { - case <-time.After(swarmConnectTimeout): - return errSwarmJoinTimeoutReached - case err := <-nr.Ready(): - if err != nil { - c.mu.Lock() - c.nr = nil - c.mu.Unlock() - } - return err - } -} - -// GetUnlockKey returns the unlock key for the swarm. -func (c *Cluster) GetUnlockKey() (string, error) { - c.mu.RLock() - defer c.mu.RUnlock() - - state := c.currentNodeState() - if !state.IsActiveManager() { - return "", c.errNoManager(state) - } - - ctx, cancel := c.getRequestContext() - defer cancel() - - client := swarmapi.NewCAClient(state.grpcConn) - - r, err := client.GetUnlockKey(ctx, &swarmapi.GetUnlockKeyRequest{}) - if err != nil { - return "", err - } - - if len(r.UnlockKey) == 0 { - // no key - return "", nil - } - - return encryption.HumanReadableKey(r.UnlockKey), nil -} - -// UnlockSwarm provides a key to decrypt data that is encrypted at rest. -func (c *Cluster) UnlockSwarm(req types.UnlockRequest) error { - c.controlMutex.Lock() - defer c.controlMutex.Unlock() - - c.mu.RLock() - state := c.currentNodeState() - - if !state.IsActiveManager() { - // when manager is not active, - // unless it is locked, otherwise return error. - if err := c.errNoManager(state); err != errSwarmLocked { - c.mu.RUnlock() - return err - } - } else { - // when manager is active, return an error of "not locked" - c.mu.RUnlock() - return errors.New("swarm is not locked") - } - - // only when swarm is locked, code running reaches here - nr := c.nr - c.mu.RUnlock() - - key, err := encryption.ParseHumanReadableKey(req.UnlockKey) - if err != nil { - return err - } - - config := nr.config - config.lockKey = key - if err := nr.Stop(); err != nil { - return err - } - nr, err = c.newNodeRunner(config) - if err != nil { - return err - } - - c.mu.Lock() - c.nr = nr - c.mu.Unlock() - - if err := <-nr.Ready(); err != nil { - if errors.Cause(err) == errSwarmLocked { - return errors.New("swarm could not be unlocked: invalid key provided") - } - return fmt.Errorf("swarm component could not be started: %v", err) - } - return nil -} - -// Leave shuts down Cluster and removes current state. -func (c *Cluster) Leave(force bool) error { - c.controlMutex.Lock() - defer c.controlMutex.Unlock() - - c.mu.Lock() - nr := c.nr - if nr == nil { - c.mu.Unlock() - return errNoSwarm - } - - state := c.currentNodeState() - - if errors.Cause(state.err) == errSwarmLocked && !force { - // leave a locked swarm without --force is not allowed - c.mu.Unlock() - return errors.New("Swarm is encrypted and locked. Please unlock it first or use `--force` to ignore this message.") - } - - if state.IsManager() && !force { - msg := "You are attempting to leave the swarm on a node that is participating as a manager. " - if state.IsActiveManager() { - active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID()) - if err == nil { - if active && removingManagerCausesLossOfQuorum(reachable, unreachable) { - if isLastManager(reachable, unreachable) { - msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. " - c.mu.Unlock() - return errors.New(msg) - } - msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable) - } - } - } else { - msg += "Doing so may lose the consensus of your cluster. " - } - - msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message." - c.mu.Unlock() - return errors.New(msg) - } - // release readers in here - if err := nr.Stop(); err != nil { - logrus.Errorf("failed to shut down cluster node: %v", err) - signal.DumpStacks("") - c.mu.Unlock() - return err - } - c.nr = nil - c.mu.Unlock() - if nodeID := state.NodeID(); nodeID != "" { - nodeContainers, err := c.listContainerForNode(nodeID) - if err != nil { - return err - } - for _, id := range nodeContainers { - if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil { - logrus.Errorf("error removing %v: %v", id, err) - } - } - } - - c.configEvent <- struct{}{} - // todo: cleanup optional? - if err := clearPersistentState(c.root); err != nil { - return err - } - c.config.Backend.DaemonLeavesCluster() - return nil -} - -func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) { - var ids []string - filters := filters.NewArgs() - filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID)) - containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{ - Filters: filters, - }) - if err != nil { - return []string{}, err - } - for _, c := range containers { - ids = append(ids, c.ID) - } - return ids, nil -} - func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost return context.WithTimeout(context.Background(), swarmRequestTimeout) } -// Inspect retrieves the configuration properties of a managed swarm cluster. -func (c *Cluster) Inspect() (types.Swarm, error) { - c.mu.RLock() - defer c.mu.RUnlock() - - state := c.currentNodeState() - if !state.IsActiveManager() { - return types.Swarm{}, c.errNoManager(state) - } - - ctx, cancel := c.getRequestContext() - defer cancel() - - swarm, err := getSwarm(ctx, state.controlClient) - if err != nil { - return types.Swarm{}, err - } - - return convert.SwarmFromGRPC(*swarm), nil -} - -// Update updates configuration of a managed swarm cluster. -func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error { - c.mu.RLock() - defer c.mu.RUnlock() - - state := c.currentNodeState() - if !state.IsActiveManager() { - return c.errNoManager(state) - } - - ctx, cancel := c.getRequestContext() - defer cancel() - - swarm, err := getSwarm(ctx, state.controlClient) - if err != nil { - return err - } - - // In update, client should provide the complete spec of the swarm, including - // Name and Labels. If a field is specified with 0 or nil, then the default value - // will be used to swarmkit. - clusterSpec, err := convert.SwarmSpecToGRPC(spec) - if err != nil { - return apierrors.NewBadRequestError(err) - } - - _, err = state.controlClient.UpdateCluster( - ctx, - &swarmapi.UpdateClusterRequest{ - ClusterID: swarm.ID, - Spec: &clusterSpec, - ClusterVersion: &swarmapi.Version{ - Index: version, - }, - Rotation: swarmapi.KeyRotation{ - WorkerJoinToken: flags.RotateWorkerToken, - ManagerJoinToken: flags.RotateManagerToken, - ManagerUnlockKey: flags.RotateManagerUnlockKey, - }, - }, - ) - return err -} - // IsManager returns true if Cluster is participating as a manager. func (c *Cluster) IsManager() bool { c.mu.RLock() @@ -711,55 +306,6 @@ func (c *Cluster) ListenClusterEvents() <-chan struct{} { return c.configEvent } -// Info returns information about the current cluster state. -func (c *Cluster) Info() types.Info { - info := types.Info{ - NodeAddr: c.GetAdvertiseAddress(), - } - c.mu.RLock() - defer c.mu.RUnlock() - - state := c.currentNodeState() - info.LocalNodeState = state.status - if state.err != nil { - info.Error = state.err.Error() - } - - ctx, cancel := c.getRequestContext() - defer cancel() - - if state.IsActiveManager() { - info.ControlAvailable = true - swarm, err := c.Inspect() - if err != nil { - info.Error = err.Error() - } - - // Strip JoinTokens - info.Cluster = swarm.ClusterInfo - - if r, err := state.controlClient.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err != nil { - info.Error = err.Error() - } else { - info.Nodes = len(r.Nodes) - for _, n := range r.Nodes { - if n.ManagerStatus != nil { - info.Managers = info.Managers + 1 - } - } - } - } - - if state.swarmNode != nil { - for _, r := range state.swarmNode.Remotes() { - info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr}) - } - info.NodeID = state.swarmNode.NodeID() - } - - return info -} - // currentNodeState should not be called without a read lock func (c *Cluster) currentNodeState() nodeState { return c.nr.State() @@ -835,99 +381,6 @@ func managerStats(client swarmapi.ControlClient, currentNodeID string) (current return } -func validateAndSanitizeInitRequest(req *types.InitRequest) error { - var err error - req.ListenAddr, err = validateAddr(req.ListenAddr) - if err != nil { - return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) - } - - if req.Spec.Annotations.Name == "" { - req.Spec.Annotations.Name = "default" - } else if req.Spec.Annotations.Name != "default" { - return errors.New(`swarm spec must be named "default"`) - } - - return nil -} - -func validateAndSanitizeJoinRequest(req *types.JoinRequest) error { - var err error - req.ListenAddr, err = validateAddr(req.ListenAddr) - if err != nil { - return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) - } - if len(req.RemoteAddrs) == 0 { - return errors.New("at least 1 RemoteAddr is required to join") - } - for i := range req.RemoteAddrs { - req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i]) - if err != nil { - return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err) - } - } - return nil -} - -func validateAddr(addr string) (string, error) { - if addr == "" { - return addr, errors.New("invalid empty address") - } - newaddr, err := opts.ParseTCPAddr(addr, defaultAddr) - if err != nil { - return addr, nil - } - return strings.TrimPrefix(newaddr, "tcp://"), nil -} - -func initClusterSpec(node *swarmnode.Node, spec types.Spec) error { - ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) - for conn := range node.ListenControlSocket(ctx) { - if ctx.Err() != nil { - return ctx.Err() - } - if conn != nil { - client := swarmapi.NewControlClient(conn) - var cluster *swarmapi.Cluster - for i := 0; ; i++ { - lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) - if err != nil { - return fmt.Errorf("error on listing clusters: %v", err) - } - if len(lcr.Clusters) == 0 { - if i < 10 { - time.Sleep(200 * time.Millisecond) - continue - } - return errors.New("empty list of clusters was returned") - } - cluster = lcr.Clusters[0] - break - } - // In init, we take the initial default values from swarmkit, and merge - // any non nil or 0 value from spec to GRPC spec. This will leave the - // default value alone. - // Note that this is different from Update(), as in Update() we expect - // user to specify the complete spec of the cluster (as they already know - // the existing one and knows which field to update) - clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec) - if err != nil { - return fmt.Errorf("error updating cluster settings: %v", err) - } - _, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ - ClusterID: cluster.ID, - ClusterVersion: &cluster.Meta.Version, - Spec: &clusterSpec, - }) - if err != nil { - return fmt.Errorf("error updating cluster settings: %v", err) - } - return nil - } - } - return ctx.Err() -} - func detectLockedError(err error) error { if err == swarmnode.ErrInvalidUnlockKey { return errors.WithStack(errSwarmLocked) diff --git a/components/engine/daemon/cluster/swarm.go b/components/engine/daemon/cluster/swarm.go new file mode 100644 index 0000000000..027e190b4d --- /dev/null +++ b/components/engine/daemon/cluster/swarm.go @@ -0,0 +1,562 @@ +package cluster + +import ( + "fmt" + "net" + "strings" + "time" + + "github.com/Sirupsen/logrus" + apierrors "github.com/docker/docker/api/errors" + apitypes "github.com/docker/docker/api/types" + "github.com/docker/docker/api/types/filters" + types "github.com/docker/docker/api/types/swarm" + "github.com/docker/docker/daemon/cluster/convert" + "github.com/docker/docker/opts" + "github.com/docker/docker/pkg/signal" + swarmapi "github.com/docker/swarmkit/api" + "github.com/docker/swarmkit/manager/encryption" + swarmnode "github.com/docker/swarmkit/node" + "github.com/pkg/errors" + "golang.org/x/net/context" +) + +// Init initializes new cluster from user provided request. +func (c *Cluster) Init(req types.InitRequest) (string, error) { + c.controlMutex.Lock() + defer c.controlMutex.Unlock() + c.mu.Lock() + if c.nr != nil { + if req.ForceNewCluster { + if err := c.nr.Stop(); err != nil { + c.mu.Unlock() + return "", err + } + } else { + c.mu.Unlock() + return "", errSwarmExists + } + } + c.mu.Unlock() + + if err := validateAndSanitizeInitRequest(&req); err != nil { + return "", apierrors.NewBadRequestError(err) + } + + listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) + if err != nil { + return "", err + } + + advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) + if err != nil { + return "", err + } + + localAddr := listenHost + + // If the local address is undetermined, the advertise address + // will be used as local address, if it belongs to this system. + // If the advertise address is not local, then we try to find + // a system address to use as local address. If this fails, + // we give up and ask the user to pass the listen address. + if net.ParseIP(localAddr).IsUnspecified() { + advertiseIP := net.ParseIP(advertiseHost) + + found := false + for _, systemIP := range listSystemIPs() { + if systemIP.Equal(advertiseIP) { + localAddr = advertiseIP.String() + found = true + break + } + } + + if !found { + ip, err := c.resolveSystemAddr() + if err != nil { + logrus.Warnf("Could not find a local address: %v", err) + return "", errMustSpecifyListenAddr + } + localAddr = ip.String() + } + } + + if !req.ForceNewCluster { + clearPersistentState(c.root) + } + + nr, err := c.newNodeRunner(nodeStartConfig{ + forceNewCluster: req.ForceNewCluster, + autolock: req.AutoLockManagers, + LocalAddr: localAddr, + ListenAddr: net.JoinHostPort(listenHost, listenPort), + AdvertiseAddr: net.JoinHostPort(advertiseHost, advertisePort), + availability: req.Availability, + }) + if err != nil { + return "", err + } + c.mu.Lock() + c.nr = nr + c.mu.Unlock() + + if err := <-nr.Ready(); err != nil { + if !req.ForceNewCluster { // if failure on first attempt don't keep state + if err := clearPersistentState(c.root); err != nil { + return "", err + } + } + if err != nil { + c.mu.Lock() + c.nr = nil + c.mu.Unlock() + } + return "", err + } + state := nr.State() + if state.swarmNode == nil { // should never happen but protect from panic + return "", errors.New("invalid cluster state for spec initialization") + } + if err := initClusterSpec(state.swarmNode, req.Spec); err != nil { + return "", err + } + return state.NodeID(), nil +} + +// Join makes current Cluster part of an existing swarm cluster. +func (c *Cluster) Join(req types.JoinRequest) error { + c.controlMutex.Lock() + defer c.controlMutex.Unlock() + c.mu.Lock() + if c.nr != nil { + c.mu.Unlock() + return errSwarmExists + } + c.mu.Unlock() + + if err := validateAndSanitizeJoinRequest(&req); err != nil { + return apierrors.NewBadRequestError(err) + } + + listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) + if err != nil { + return err + } + + var advertiseAddr string + if req.AdvertiseAddr != "" { + advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) + // For joining, we don't need to provide an advertise address, + // since the remote side can detect it. + if err == nil { + advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort) + } + } + + clearPersistentState(c.root) + + nr, err := c.newNodeRunner(nodeStartConfig{ + RemoteAddr: req.RemoteAddrs[0], + ListenAddr: net.JoinHostPort(listenHost, listenPort), + AdvertiseAddr: advertiseAddr, + joinAddr: req.RemoteAddrs[0], + joinToken: req.JoinToken, + availability: req.Availability, + }) + if err != nil { + return err + } + + c.mu.Lock() + c.nr = nr + c.mu.Unlock() + + select { + case <-time.After(swarmConnectTimeout): + return errSwarmJoinTimeoutReached + case err := <-nr.Ready(): + if err != nil { + c.mu.Lock() + c.nr = nil + c.mu.Unlock() + } + return err + } +} + +// Inspect retrieves the configuration properties of a managed swarm cluster. +func (c *Cluster) Inspect() (types.Swarm, error) { + c.mu.RLock() + defer c.mu.RUnlock() + + state := c.currentNodeState() + if !state.IsActiveManager() { + return types.Swarm{}, c.errNoManager(state) + } + + ctx, cancel := c.getRequestContext() + defer cancel() + + swarm, err := getSwarm(ctx, state.controlClient) + if err != nil { + return types.Swarm{}, err + } + + return convert.SwarmFromGRPC(*swarm), nil +} + +// Update updates configuration of a managed swarm cluster. +func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error { + c.mu.RLock() + defer c.mu.RUnlock() + + state := c.currentNodeState() + if !state.IsActiveManager() { + return c.errNoManager(state) + } + + ctx, cancel := c.getRequestContext() + defer cancel() + + swarm, err := getSwarm(ctx, state.controlClient) + if err != nil { + return err + } + + // In update, client should provide the complete spec of the swarm, including + // Name and Labels. If a field is specified with 0 or nil, then the default value + // will be used to swarmkit. + clusterSpec, err := convert.SwarmSpecToGRPC(spec) + if err != nil { + return apierrors.NewBadRequestError(err) + } + + _, err = state.controlClient.UpdateCluster( + ctx, + &swarmapi.UpdateClusterRequest{ + ClusterID: swarm.ID, + Spec: &clusterSpec, + ClusterVersion: &swarmapi.Version{ + Index: version, + }, + Rotation: swarmapi.KeyRotation{ + WorkerJoinToken: flags.RotateWorkerToken, + ManagerJoinToken: flags.RotateManagerToken, + ManagerUnlockKey: flags.RotateManagerUnlockKey, + }, + }, + ) + return err +} + +// GetUnlockKey returns the unlock key for the swarm. +func (c *Cluster) GetUnlockKey() (string, error) { + c.mu.RLock() + defer c.mu.RUnlock() + + state := c.currentNodeState() + if !state.IsActiveManager() { + return "", c.errNoManager(state) + } + + ctx, cancel := c.getRequestContext() + defer cancel() + + client := swarmapi.NewCAClient(state.grpcConn) + + r, err := client.GetUnlockKey(ctx, &swarmapi.GetUnlockKeyRequest{}) + if err != nil { + return "", err + } + + if len(r.UnlockKey) == 0 { + // no key + return "", nil + } + + return encryption.HumanReadableKey(r.UnlockKey), nil +} + +// UnlockSwarm provides a key to decrypt data that is encrypted at rest. +func (c *Cluster) UnlockSwarm(req types.UnlockRequest) error { + c.controlMutex.Lock() + defer c.controlMutex.Unlock() + + c.mu.RLock() + state := c.currentNodeState() + + if !state.IsActiveManager() { + // when manager is not active, + // unless it is locked, otherwise return error. + if err := c.errNoManager(state); err != errSwarmLocked { + c.mu.RUnlock() + return err + } + } else { + // when manager is active, return an error of "not locked" + c.mu.RUnlock() + return errors.New("swarm is not locked") + } + + // only when swarm is locked, code running reaches here + nr := c.nr + c.mu.RUnlock() + + key, err := encryption.ParseHumanReadableKey(req.UnlockKey) + if err != nil { + return err + } + + config := nr.config + config.lockKey = key + if err := nr.Stop(); err != nil { + return err + } + nr, err = c.newNodeRunner(config) + if err != nil { + return err + } + + c.mu.Lock() + c.nr = nr + c.mu.Unlock() + + if err := <-nr.Ready(); err != nil { + if errors.Cause(err) == errSwarmLocked { + return errors.New("swarm could not be unlocked: invalid key provided") + } + return fmt.Errorf("swarm component could not be started: %v", err) + } + return nil +} + +// Leave shuts down Cluster and removes current state. +func (c *Cluster) Leave(force bool) error { + c.controlMutex.Lock() + defer c.controlMutex.Unlock() + + c.mu.Lock() + nr := c.nr + if nr == nil { + c.mu.Unlock() + return errNoSwarm + } + + state := c.currentNodeState() + + if errors.Cause(state.err) == errSwarmLocked && !force { + // leave a locked swarm without --force is not allowed + c.mu.Unlock() + return errors.New("Swarm is encrypted and locked. Please unlock it first or use `--force` to ignore this message.") + } + + if state.IsManager() && !force { + msg := "You are attempting to leave the swarm on a node that is participating as a manager. " + if state.IsActiveManager() { + active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID()) + if err == nil { + if active && removingManagerCausesLossOfQuorum(reachable, unreachable) { + if isLastManager(reachable, unreachable) { + msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. " + c.mu.Unlock() + return errors.New(msg) + } + msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable) + } + } + } else { + msg += "Doing so may lose the consensus of your cluster. " + } + + msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message." + c.mu.Unlock() + return errors.New(msg) + } + // release readers in here + if err := nr.Stop(); err != nil { + logrus.Errorf("failed to shut down cluster node: %v", err) + signal.DumpStacks("") + c.mu.Unlock() + return err + } + c.nr = nil + c.mu.Unlock() + if nodeID := state.NodeID(); nodeID != "" { + nodeContainers, err := c.listContainerForNode(nodeID) + if err != nil { + return err + } + for _, id := range nodeContainers { + if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil { + logrus.Errorf("error removing %v: %v", id, err) + } + } + } + + c.configEvent <- struct{}{} + // todo: cleanup optional? + if err := clearPersistentState(c.root); err != nil { + return err + } + c.config.Backend.DaemonLeavesCluster() + return nil +} + +// Info returns information about the current cluster state. +func (c *Cluster) Info() types.Info { + info := types.Info{ + NodeAddr: c.GetAdvertiseAddress(), + } + c.mu.RLock() + defer c.mu.RUnlock() + + state := c.currentNodeState() + info.LocalNodeState = state.status + if state.err != nil { + info.Error = state.err.Error() + } + + ctx, cancel := c.getRequestContext() + defer cancel() + + if state.IsActiveManager() { + info.ControlAvailable = true + swarm, err := c.Inspect() + if err != nil { + info.Error = err.Error() + } + + // Strip JoinTokens + info.Cluster = swarm.ClusterInfo + + if r, err := state.controlClient.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err != nil { + info.Error = err.Error() + } else { + info.Nodes = len(r.Nodes) + for _, n := range r.Nodes { + if n.ManagerStatus != nil { + info.Managers = info.Managers + 1 + } + } + } + } + + if state.swarmNode != nil { + for _, r := range state.swarmNode.Remotes() { + info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr}) + } + info.NodeID = state.swarmNode.NodeID() + } + + return info +} + +func validateAndSanitizeInitRequest(req *types.InitRequest) error { + var err error + req.ListenAddr, err = validateAddr(req.ListenAddr) + if err != nil { + return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) + } + + if req.Spec.Annotations.Name == "" { + req.Spec.Annotations.Name = "default" + } else if req.Spec.Annotations.Name != "default" { + return errors.New(`swarm spec must be named "default"`) + } + + return nil +} + +func validateAndSanitizeJoinRequest(req *types.JoinRequest) error { + var err error + req.ListenAddr, err = validateAddr(req.ListenAddr) + if err != nil { + return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) + } + if len(req.RemoteAddrs) == 0 { + return errors.New("at least 1 RemoteAddr is required to join") + } + for i := range req.RemoteAddrs { + req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i]) + if err != nil { + return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err) + } + } + return nil +} + +func validateAddr(addr string) (string, error) { + if addr == "" { + return addr, errors.New("invalid empty address") + } + newaddr, err := opts.ParseTCPAddr(addr, defaultAddr) + if err != nil { + return addr, nil + } + return strings.TrimPrefix(newaddr, "tcp://"), nil +} + +func initClusterSpec(node *swarmnode.Node, spec types.Spec) error { + ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) + for conn := range node.ListenControlSocket(ctx) { + if ctx.Err() != nil { + return ctx.Err() + } + if conn != nil { + client := swarmapi.NewControlClient(conn) + var cluster *swarmapi.Cluster + for i := 0; ; i++ { + lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) + if err != nil { + return fmt.Errorf("error on listing clusters: %v", err) + } + if len(lcr.Clusters) == 0 { + if i < 10 { + time.Sleep(200 * time.Millisecond) + continue + } + return errors.New("empty list of clusters was returned") + } + cluster = lcr.Clusters[0] + break + } + // In init, we take the initial default values from swarmkit, and merge + // any non nil or 0 value from spec to GRPC spec. This will leave the + // default value alone. + // Note that this is different from Update(), as in Update() we expect + // user to specify the complete spec of the cluster (as they already know + // the existing one and knows which field to update) + clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec) + if err != nil { + return fmt.Errorf("error updating cluster settings: %v", err) + } + _, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ + ClusterID: cluster.ID, + ClusterVersion: &cluster.Meta.Version, + Spec: &clusterSpec, + }) + if err != nil { + return fmt.Errorf("error updating cluster settings: %v", err) + } + return nil + } + } + return ctx.Err() +} + +func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) { + var ids []string + filters := filters.NewArgs() + filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID)) + containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{ + Filters: filters, + }) + if err != nil { + return []string{}, err + } + for _, c := range containers { + ids = append(ids, c.ID) + } + return ids, nil +}