From feccd72a493c858b58305720c3bb84ab92bfe64b Mon Sep 17 00:00:00 2001 From: Phil Estes Date: Wed, 13 Dec 2017 23:38:22 -0500 Subject: [PATCH 1/4] Fix overlay2 storage driver inside a user namespace The overlay2 driver was not setting up the archive.TarOptions field properly like other storage backend routes to "applyTarLayer" functionality. The InUserNS field is populated now for overlay2 using the same query function used by the other storage drivers. Signed-off-by: Phil Estes Upstream-commit: 05b8d59015f8a5ce26c8bbaa8053b5bc7cb1a77b Component: engine --- components/engine/daemon/graphdriver/overlay2/overlay.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/engine/daemon/graphdriver/overlay2/overlay.go b/components/engine/daemon/graphdriver/overlay2/overlay.go index f1731ea935..4b596ae33e 100644 --- a/components/engine/daemon/graphdriver/overlay2/overlay.go +++ b/components/engine/daemon/graphdriver/overlay2/overlay.go @@ -31,6 +31,7 @@ import ( "github.com/docker/docker/pkg/parsers/kernel" "github.com/docker/docker/pkg/system" "github.com/docker/go-units" + rsystem "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/selinux/go-selinux/label" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -704,6 +705,7 @@ func (d *Driver) ApplyDiff(id string, parent string, diff io.Reader) (size int64 UIDMaps: d.uidMaps, GIDMaps: d.gidMaps, WhiteoutFormat: archive.OverlayWhiteoutFormat, + InUserNS: rsystem.RunningInUserNS(), }); err != nil { return 0, err } From 72249a1f2eec21786d2610fd22c13529f9bb74a0 Mon Sep 17 00:00:00 2001 From: Brian Goff Date: Fri, 15 Dec 2017 10:00:15 -0500 Subject: [PATCH 2/4] Fix error handling for kill/process not found With the contianerd 1.0 migration we now have strongly typed errors that we can check for process not found. We also had some bad error checks looking for `ESRCH` which would only be returned from `unix.Kill` and never from containerd even though we were checking containerd responses for it. Fixes some race conditions around process handling and our error checks that could lead to errors that propagate up to the user that should not. Signed-off-by: Brian Goff Upstream-commit: e55bead518e4c72cdecf7de2e49db6c477cb58eb Component: engine --- components/engine/daemon/kill.go | 14 +++++------- .../engine/libcontainerd/client_daemon.go | 22 +++++++++++++------ 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/components/engine/daemon/kill.go b/components/engine/daemon/kill.go index 1292f86b0c..5cde0d776d 100644 --- a/components/engine/daemon/kill.go +++ b/components/engine/daemon/kill.go @@ -4,10 +4,10 @@ import ( "context" "fmt" "runtime" - "strings" "syscall" "time" + "github.com/docker/docker/api/errdefs" containerpkg "github.com/docker/docker/container" "github.com/docker/docker/libcontainerd" "github.com/docker/docker/pkg/signal" @@ -97,15 +97,11 @@ func (daemon *Daemon) killWithSignal(container *containerpkg.Container, sig int) } if err := daemon.kill(container, sig); err != nil { - err = errors.Wrapf(err, "Cannot kill container %s", container.ID) - // if container or process not exists, ignore the error - // TODO: we shouldn't have to parse error strings from containerd - if strings.Contains(err.Error(), "container not found") || - strings.Contains(err.Error(), "no such process") { - logrus.Warnf("container kill failed because of 'container not found' or 'no such process': %s", err.Error()) + if errdefs.IsNotFound(err) { unpause = false + logrus.WithError(err).WithField("container", container.ID).WithField("action", "kill").Debug("container kill failed because of 'container not found' or 'no such process'") } else { - return err + return errors.Wrapf(err, "Cannot kill container %s", container.ID) } } @@ -171,7 +167,7 @@ func (daemon *Daemon) Kill(container *containerpkg.Container) error { // killPossibleDeadProcess is a wrapper around killSig() suppressing "no such process" error. func (daemon *Daemon) killPossiblyDeadProcess(container *containerpkg.Container, sig int) error { err := daemon.killWithSignal(container, sig) - if err == syscall.ESRCH { + if errdefs.IsNotFound(err) { e := errNoSuchProcess{container.GetPID(), sig} logrus.Debug(e) return e diff --git a/components/engine/libcontainerd/client_daemon.go b/components/engine/libcontainerd/client_daemon.go index 0a3502c347..78b1412068 100644 --- a/components/engine/libcontainerd/client_daemon.go +++ b/components/engine/libcontainerd/client_daemon.go @@ -27,6 +27,7 @@ import ( "github.com/containerd/containerd/archive" "github.com/containerd/containerd/cio" "github.com/containerd/containerd/content" + "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/images" "github.com/containerd/containerd/linux/runctypes" "github.com/containerd/typeurl" @@ -317,7 +318,7 @@ func (c *client) SignalProcess(ctx context.Context, containerID, processID strin if err != nil { return err } - return p.Kill(ctx, syscall.Signal(signal)) + return wrapError(p.Kill(ctx, syscall.Signal(signal))) } func (c *client) ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error { @@ -816,12 +817,19 @@ func (c *client) writeContent(ctx context.Context, mediaType, ref string, r io.R } func wrapError(err error) error { - if err != nil { - msg := err.Error() - for _, s := range []string{"container does not exist", "not found", "no such container"} { - if strings.Contains(msg, s) { - return wrapNotFoundError(err) - } + if err == nil { + return nil + } + + switch { + case errdefs.IsNotFound(err): + return wrapNotFoundError(err) + } + + msg := err.Error() + for _, s := range []string{"container does not exist", "not found", "no such container"} { + if strings.Contains(msg, s) { + return wrapNotFoundError(err) } } return err From fbc087df98a813d464e75d249e874c70316aa3c4 Mon Sep 17 00:00:00 2001 From: Brian Goff Date: Fri, 15 Dec 2017 11:32:08 -0500 Subject: [PATCH 3/4] Fix some missing synchronization in libcontainerd Signed-off-by: Brian Goff Upstream-commit: 647cec4324186faa3183bd6a7bc72a032a86c8c9 Component: engine --- .../engine/libcontainerd/client_daemon.go | 142 +++++++++++------- 1 file changed, 87 insertions(+), 55 deletions(-) diff --git a/components/engine/libcontainerd/client_daemon.go b/components/engine/libcontainerd/client_daemon.go index 78b1412068..a9f7c11dd1 100644 --- a/components/engine/libcontainerd/client_daemon.go +++ b/components/engine/libcontainerd/client_daemon.go @@ -43,7 +43,7 @@ import ( const InitProcessName = "init" type container struct { - sync.Mutex + mu sync.Mutex bundleDir string ctr containerd.Container @@ -52,6 +52,54 @@ type container struct { oomKilled bool } +func (c *container) setTask(t containerd.Task) { + c.mu.Lock() + c.task = t + c.mu.Unlock() +} + +func (c *container) getTask() containerd.Task { + c.mu.Lock() + t := c.task + c.mu.Unlock() + return t +} + +func (c *container) addProcess(id string, p containerd.Process) { + c.mu.Lock() + if c.execs == nil { + c.execs = make(map[string]containerd.Process) + } + c.execs[id] = p + c.mu.Unlock() +} + +func (c *container) deleteProcess(id string) { + c.mu.Lock() + delete(c.execs, id) + c.mu.Unlock() +} + +func (c *container) getProcess(id string) containerd.Process { + c.mu.Lock() + p := c.execs[id] + c.mu.Unlock() + return p +} + +func (c *container) setOOMKilled(killed bool) { + c.mu.Lock() + c.oomKilled = killed + c.mu.Unlock() +} + +func (c *container) getOOMKilled() bool { + c.mu.Lock() + killed := c.oomKilled + c.mu.Unlock() + return killed +} + type client struct { sync.RWMutex // protects containers map @@ -161,10 +209,10 @@ func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, run // Start create and start a task for the specified containerd id func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio StdioCallback) (int, error) { ctr := c.getContainer(id) - switch { - case ctr == nil: + if ctr == nil { return -1, errors.WithStack(newNotFoundError("no such container")) - case ctr.task != nil: + } + if t := ctr.getTask(); t != nil { return -1, errors.WithStack(newConflictError("container already started")) } @@ -228,9 +276,7 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin return -1, err } - c.Lock() - c.containers[id].task = t - c.Unlock() + ctr.setTask(t) // Signal c.createIO that it can call CloseIO close(stdinCloseSync) @@ -240,9 +286,7 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin c.logger.WithError(err).WithField("container", id). Error("failed to delete task after fail start") } - c.Lock() - c.containers[id].task = nil - c.Unlock() + ctr.setTask(nil) return -1, err } @@ -251,12 +295,15 @@ func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin func (c *client) Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error) { ctr := c.getContainer(containerID) - switch { - case ctr == nil: + if ctr == nil { return -1, errors.WithStack(newNotFoundError("no such container")) - case ctr.task == nil: + } + t := ctr.getTask() + if t == nil { return -1, errors.WithStack(newInvalidParameterError("container is not running")) - case ctr.execs != nil && ctr.execs[processID] != nil: + } + + if p := ctr.getProcess(processID); p != nil { return -1, errors.WithStack(newConflictError("id already in use")) } @@ -279,7 +326,7 @@ func (c *client) Exec(ctx context.Context, containerID, processID string, spec * } }() - p, err = ctr.task.Exec(ctx, processID, spec, func(id string) (cio.IO, error) { + p, err = t.Exec(ctx, processID, spec, func(id string) (cio.IO, error) { rio, err = c.createIO(fifos, containerID, processID, stdinCloseSync, attachStdio) return rio, err }) @@ -292,21 +339,14 @@ func (c *client) Exec(ctx context.Context, containerID, processID string, spec * return -1, err } - ctr.Lock() - if ctr.execs == nil { - ctr.execs = make(map[string]containerd.Process) - } - ctr.execs[processID] = p - ctr.Unlock() + ctr.addProcess(processID, p) // Signal c.createIO that it can call CloseIO close(stdinCloseSync) if err = p.Start(ctx); err != nil { p.Delete(context.Background()) - ctr.Lock() - delete(ctr.execs, processID) - ctr.Unlock() + ctr.deleteProcess(processID) return -1, err } @@ -432,12 +472,9 @@ func (c *client) DeleteTask(ctx context.Context, containerID string) (uint32, ti return 255, time.Now(), nil } - c.Lock() - if ctr, ok := c.containers[containerID]; ok { - ctr.task = nil + if ctr := c.getContainer(containerID); ctr != nil { + ctr.setTask(nil) } - c.Unlock() - return status.ExitCode(), status.ExitTime(), nil } @@ -471,7 +508,12 @@ func (c *client) Status(ctx context.Context, containerID string) (Status, error) return StatusUnknown, errors.WithStack(newNotFoundError("no such container")) } - s, err := ctr.task.Status(ctx) + t := ctr.getTask() + if t == nil { + return StatusUnknown, errors.WithStack(newNotFoundError("no such task")) + } + + s, err := t.Status(ctx) if err != nil { return StatusUnknown, err } @@ -547,26 +589,22 @@ func (c *client) removeContainer(id string) { func (c *client) getProcess(containerID, processID string) (containerd.Process, error) { ctr := c.getContainer(containerID) - switch { - case ctr == nil: + if ctr == nil { return nil, errors.WithStack(newNotFoundError("no such container")) - case ctr.task == nil: - return nil, errors.WithStack(newNotFoundError("container is not running")) - case processID == InitProcessName: - return ctr.task, nil - default: - ctr.Lock() - defer ctr.Unlock() - if ctr.execs == nil { - return nil, errors.WithStack(newNotFoundError("no execs")) - } } - p := ctr.execs[processID] + t := ctr.getTask() + if t == nil { + return nil, errors.WithStack(newNotFoundError("container is not running")) + } + if processID == InitProcessName { + return t, nil + } + + p := ctr.getProcess(processID) if p == nil { return nil, errors.WithStack(newNotFoundError("no such exec")) } - return p, nil } @@ -624,12 +662,7 @@ func (c *client) processEvent(ctr *container, et EventType, ei EventInfo) { } if et == EventExit && ei.ProcessID != ei.ContainerID { - var p containerd.Process - ctr.Lock() - if ctr.execs != nil { - p = ctr.execs[ei.ProcessID] - } - ctr.Unlock() + p := ctr.getProcess(ei.ProcessID) if p == nil { c.logger.WithError(errors.New("no such process")). WithFields(logrus.Fields{ @@ -645,9 +678,8 @@ func (c *client) processEvent(ctr *container, et EventType, ei EventInfo) { "process": ei.ProcessID, }).Warn("failed to delete process") } - c.Lock() - delete(ctr.execs, ei.ProcessID) - c.Unlock() + ctr.deleteProcess(ei.ProcessID) + ctr := c.getContainer(ei.ContainerID) if ctr == nil { c.logger.WithFields(logrus.Fields{ @@ -784,10 +816,10 @@ func (c *client) processEventStream(ctx context.Context) { } if oomKilled { - ctr.oomKilled = true + ctr.setOOMKilled(true) oomKilled = false } - ei.OOMKilled = ctr.oomKilled + ei.OOMKilled = ctr.getOOMKilled() c.processEvent(ctr, et, ei) } From e1fb11f9ea102d66e10cb56395b6de8651e7ce35 Mon Sep 17 00:00:00 2001 From: abhi Date: Fri, 15 Dec 2017 11:37:17 -0800 Subject: [PATCH 4/4] Vendoring swarmkit a6519e28ff2a558f5d32b2dab9fcb0882879b398 Signed-off-by: abhi Upstream-commit: efae8db785189dc1fe9ce880d190beb2e26cb0fd Component: engine --- components/engine/vendor.conf | 2 +- .../cnmallocator/networkallocator.go | 5 +++ .../allocator/cnmallocator/portallocator.go | 43 +++++++++++++------ .../manager/orchestrator/replicated/slot.go | 16 ++++++- 4 files changed, 52 insertions(+), 14 deletions(-) diff --git a/components/engine/vendor.conf b/components/engine/vendor.conf index 93a503896f..78330a284a 100644 --- a/components/engine/vendor.conf +++ b/components/engine/vendor.conf @@ -114,7 +114,7 @@ github.com/dmcgowan/go-tar go1.10 github.com/stevvooe/ttrpc 76e68349ad9ab4d03d764c713826d31216715e4f # cluster -github.com/docker/swarmkit 4429c763170d9ca96929249353c3270c19e7d39e +github.com/docker/swarmkit a6519e28ff2a558f5d32b2dab9fcb0882879b398 github.com/gogo/protobuf v0.4 github.com/cloudflare/cfssl 7fb22c8cba7ecaf98e4082d22d65800cf45e042a github.com/google/certificate-transparency d90e65c3a07988180c5b1ece71791c0b6506826e diff --git a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/networkallocator.go b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/networkallocator.go index 53f9ffbeee..b89e72ed6e 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/networkallocator.go +++ b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/networkallocator.go @@ -404,6 +404,11 @@ func (na *cnmNetworkAllocator) IsServiceAllocated(s *api.Service, flags ...func( vipLoop: for _, vip := range s.Endpoint.VirtualIPs { if na.IsVIPOnIngressNetwork(vip) && networkallocator.IsIngressNetworkNeeded(s) { + // This checks the condition when ingress network is needed + // but allocation has not been done. + if _, ok := na.services[s.ID]; !ok { + return false + } continue vipLoop } for _, net := range specNetworks { diff --git a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/portallocator.go b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/portallocator.go index 19dcbec772..7f3f1c13ae 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/portallocator.go +++ b/components/engine/vendor/github.com/docker/swarmkit/manager/allocator/cnmallocator/portallocator.go @@ -324,9 +324,18 @@ func (pa *portAllocator) isPortsAllocatedOnInit(s *api.Service, onInit bool) boo } portStates := allocatedPorts{} + hostTargetPorts := map[uint32]struct{}{} for _, portState := range s.Endpoint.Ports { - if portState.PublishMode == api.PublishModeIngress { + switch portState.PublishMode { + case api.PublishModeIngress: portStates.addState(portState) + case api.PublishModeHost: + // build a map of host mode ports we've seen. if in the spec we get + // a host port that's not in the service, then we need to do + // allocation. if we get the same target port but something else + // has changed, then HostPublishPortsNeedUpdate will cover that + // case. see docker/swarmkit#2376 + hostTargetPorts[portState.TargetPort] = struct{}{} } } @@ -344,18 +353,28 @@ func (pa *portAllocator) isPortsAllocatedOnInit(s *api.Service, onInit bool) boo // Iterate portConfigs with PublishedPort == 0 (low priority) for _, portConfig := range s.Spec.Endpoint.Ports { // Ignore ports which are not PublishModeIngress - if portConfig.PublishMode != api.PublishModeIngress { - continue - } - if portConfig.PublishedPort == 0 && portStates.delState(portConfig) == nil { - return false - } + switch portConfig.PublishMode { + case api.PublishModeIngress: + if portConfig.PublishedPort == 0 && portStates.delState(portConfig) == nil { + return false + } - // If SwarmPort was not defined by user and the func - // is called during allocator initialization state then - // we are not allocated. - if portConfig.PublishedPort == 0 && onInit { - return false + // If SwarmPort was not defined by user and the func + // is called during allocator initialization state then + // we are not allocated. + if portConfig.PublishedPort == 0 && onInit { + return false + } + case api.PublishModeHost: + // check if the target port is already in the port config. if it + // isn't, then it's our problem. + if _, ok := hostTargetPorts[portConfig.TargetPort]; !ok { + return false + } + // NOTE(dperny) there could be a further case where we check if + // there are host ports in the config that aren't in the spec, but + // that's only possible if there's a mismatch in the number of + // ports, which is handled by a length check earlier in the code } } diff --git a/components/engine/vendor/github.com/docker/swarmkit/manager/orchestrator/replicated/slot.go b/components/engine/vendor/github.com/docker/swarmkit/manager/orchestrator/replicated/slot.go index bdc25d9d76..cee9fe10a0 100644 --- a/components/engine/vendor/github.com/docker/swarmkit/manager/orchestrator/replicated/slot.go +++ b/components/engine/vendor/github.com/docker/swarmkit/manager/orchestrator/replicated/slot.go @@ -12,6 +12,8 @@ type slotsByRunningState []orchestrator.Slot func (is slotsByRunningState) Len() int { return len(is) } func (is slotsByRunningState) Swap(i, j int) { is[i], is[j] = is[j], is[i] } +// Less returns true if the first task should be preferred over the second task, +// all other things being equal in terms of node balance. func (is slotsByRunningState) Less(i, j int) bool { iRunning := false jRunning := false @@ -29,7 +31,19 @@ func (is slotsByRunningState) Less(i, j int) bool { } } - return iRunning && !jRunning + if iRunning && !jRunning { + return true + } + + if !iRunning && jRunning { + return false + } + + // Use Slot number as a tie-breaker to prefer to remove tasks in reverse + // order of Slot number. This would help us avoid unnecessary master + // migration when scaling down a stateful service because the master + // task of a stateful service is usually in a low numbered Slot. + return is[i][0].Slot < is[j][0].Slot } type slotWithIndex struct {