From 87c07240764e3355de6baff4074ebc36a5ca8efc Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Sun, 20 Aug 2017 13:50:52 +1000 Subject: [PATCH 1/2] devicemapper: remove container rootfs mountPath after umount libdm currently has a fairly substantial DoS bug that makes certain operations fail on a libdm device if the device has active references through mountpoints. This is a significant problem with the advent of mount namespaces and MS_PRIVATE, and can cause certain --volume mounts to cause libdm to no longer be able to remove containers: % docker run -d --name testA busybox top % docker run -d --name testB -v /var/lib/docker:/docker busybox top % docker rm -f testA [fails on libdm with dm_task_run errors.] This also solves the problem of unprivileged users being able to DoS docker by using unprivileged mount namespaces to preseve mounts that Docker has dropped. Signed-off-by: Aleksa Sarai Upstream-commit: 92e45b81e0a8b68d9567a2068247460a1ba59600 Component: engine --- .../engine/daemon/graphdriver/devmapper/deviceset.go | 12 ++++++++++++ .../engine/daemon/graphdriver/devmapper/driver.go | 4 +++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/components/engine/daemon/graphdriver/devmapper/deviceset.go b/components/engine/daemon/graphdriver/devmapper/deviceset.go index 32b35c9acd..5f239a126c 100644 --- a/components/engine/daemon/graphdriver/devmapper/deviceset.go +++ b/components/engine/daemon/graphdriver/devmapper/deviceset.go @@ -2416,6 +2416,18 @@ func (devices *DeviceSet) UnmountDevice(hash, mountPath string) error { } logrus.Debug("devmapper: Unmount done") + // Remove the mountpoint here. Removing the mountpoint (in newer kernels) + // will cause all other instances of this mount in other mount namespaces + // to be killed (this is an anti-DoS measure that is necessary for things + // like devicemapper). This is necessary to avoid cases where a libdm mount + // that is present in another namespace will cause subsequent RemoveDevice + // operations to fail. We ignore any errors here because this may fail on + // older kernels which don't have + // torvalds/linux@8ed936b5671bfb33d89bc60bdcc7cf0470ba52fe applied. + if err := os.Remove(mountPath); err != nil { + logrus.Debugf("devmapper: error doing a remove on unmounted device %s: %v", mountPath, err) + } + return devices.deactivateDevice(info) } diff --git a/components/engine/daemon/graphdriver/devmapper/driver.go b/components/engine/daemon/graphdriver/devmapper/driver.go index 02ee0124f1..1efd5f44e0 100644 --- a/components/engine/daemon/graphdriver/devmapper/driver.go +++ b/components/engine/daemon/graphdriver/devmapper/driver.go @@ -228,10 +228,12 @@ func (d *Driver) Put(id string) error { if count := d.ctr.Decrement(mp); count > 0 { return nil } + err := d.DeviceSet.UnmountDevice(id, mp) if err != nil { - logrus.Errorf("devmapper: Error unmounting device %s: %s", id, err) + logrus.Errorf("devmapper: Error unmounting device %s: %v", id, err) } + return err } From 95cc30e089cff3194127625925d6211cf4aead73 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 8 Nov 2017 10:02:58 +1100 Subject: [PATCH 2/2] devmapper: add a test for mount leak workaround In order to avoid reverting our fix for mount leakage in devicemapper, add a test which checks that devicemapper's Get() and Put() cycle can survive having a command running in an rprivate mount propagation setup in-between. While this is quite rudimentary, it should be sufficient. We have to skip this test for pre-3.18 kernels. Signed-off-by: Aleksa Sarai Upstream-commit: 1af8ea681fba1935c60c11edbbe19b894c9b286f Component: engine --- .../graphdriver/devmapper/devmapper_test.go | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/components/engine/daemon/graphdriver/devmapper/devmapper_test.go b/components/engine/daemon/graphdriver/devmapper/devmapper_test.go index 7501397fdc..24e535869e 100644 --- a/components/engine/daemon/graphdriver/devmapper/devmapper_test.go +++ b/components/engine/daemon/graphdriver/devmapper/devmapper_test.go @@ -5,12 +5,15 @@ package devmapper import ( "fmt" "os" + "os/exec" "syscall" "testing" "time" "github.com/docker/docker/daemon/graphdriver" "github.com/docker/docker/daemon/graphdriver/graphtest" + "github.com/docker/docker/pkg/parsers/kernel" + "golang.org/x/sys/unix" ) func init() { @@ -150,3 +153,53 @@ func TestDevmapperLockReleasedDeviceDeletion(t *testing.T) { case <-doneChan: } } + +// Ensure that mounts aren't leakedriver. It's non-trivial for us to test the full +// reproducer of #34573 in a unit test, but we can at least make sure that a +// simple command run in a new namespace doesn't break things horribly. +func TestDevmapperMountLeaks(t *testing.T) { + if !kernel.CheckKernelVersion(3, 18, 0) { + t.Skipf("kernel version <3.18.0 and so is missing torvalds/linux@8ed936b5671bfb33d89bc60bdcc7cf0470ba52fe.") + } + + driver := graphtest.GetDriver(t, "devicemapper", "dm.use_deferred_removal=false", "dm.use_deferred_deletion=false").(*graphtest.Driver).Driver.(*graphdriver.NaiveDiffDriver).ProtoDriver.(*Driver) + defer graphtest.PutDriver(t) + + // We need to create a new (dummy) device. + if err := driver.Create("some-layer", "", nil); err != nil { + t.Fatalf("setting up some-layer: %v", err) + } + + // Mount the device. + _, err := driver.Get("some-layer", "") + if err != nil { + t.Fatalf("mounting some-layer: %v", err) + } + + // Create a new subprocess which will inherit our mountpoint, then + // intentionally leak it and stick around. We can't do this entirely within + // Go because forking and namespaces in Go are really not handled well at + // all. + cmd := exec.Cmd{ + Path: "/bin/sh", + Args: []string{ + "/bin/sh", "-c", + "mount --make-rprivate / && sleep 1000s", + }, + SysProcAttr: &syscall.SysProcAttr{ + Unshareflags: syscall.CLONE_NEWNS, + }, + } + if err := cmd.Start(); err != nil { + t.Fatalf("starting sub-command: %v", err) + } + defer func() { + unix.Kill(cmd.Process.Pid, unix.SIGKILL) + cmd.Wait() + }() + + // Now try to "drop" the device. + if err := driver.Put("some-layer"); err != nil { + t.Fatalf("unmounting some-layer: %v", err) + } +}