diff --git a/components/engine/daemon/execdriver/lxc/driver.go b/components/engine/daemon/execdriver/lxc/driver.go index 6ee7f3c1dd..92a79ff5a5 100644 --- a/components/engine/daemon/execdriver/lxc/driver.go +++ b/components/engine/daemon/execdriver/lxc/driver.go @@ -2,11 +2,6 @@ package lxc import ( "fmt" - "github.com/dotcloud/docker/daemon/execdriver" - "github.com/dotcloud/docker/pkg/cgroups" - "github.com/dotcloud/docker/pkg/label" - "github.com/dotcloud/docker/pkg/system" - "github.com/dotcloud/docker/utils" "io/ioutil" "log" "os" @@ -17,6 +12,13 @@ import ( "strings" "syscall" "time" + + "github.com/dotcloud/docker/daemon/execdriver" + "github.com/dotcloud/docker/pkg/cgroups" + "github.com/dotcloud/docker/pkg/label" + "github.com/dotcloud/docker/pkg/libcontainer/security/restrict" + "github.com/dotcloud/docker/pkg/system" + "github.com/dotcloud/docker/utils" ) const DriverName = "lxc" @@ -26,27 +28,26 @@ func init() { if err := setupEnv(args); err != nil { return err } - if err := setupHostname(args); err != nil { return err } - if err := setupNetworking(args); err != nil { return err } - + if !args.Privileged { + if err := restrict.Restrict(); err != nil { + return err + } + } if err := setupCapabilities(args); err != nil { return err } - if err := setupWorkingDirectory(args); err != nil { return err } - if err := system.CloseFdsFrom(3); err != nil { return err } - if err := changeUser(args); err != nil { return err } @@ -64,10 +65,9 @@ func init() { } type driver struct { - root string // root path for the driver to use - apparmor bool - sharedRoot bool - restrictionPath string + root string // root path for the driver to use + apparmor bool + sharedRoot bool } func NewDriver(root string, apparmor bool) (*driver, error) { @@ -75,15 +75,10 @@ func NewDriver(root string, apparmor bool) (*driver, error) { if err := linkLxcStart(root); err != nil { return nil, err } - restrictionPath := filepath.Join(root, "empty") - if err := os.MkdirAll(restrictionPath, 0700); err != nil { - return nil, err - } return &driver{ - apparmor: apparmor, - root: root, - sharedRoot: rootIsShared(), - restrictionPath: restrictionPath, + apparmor: apparmor, + root: root, + sharedRoot: rootIsShared(), }, nil } @@ -414,16 +409,14 @@ func (d *driver) generateLXCConfig(c *execdriver.Command) (string, error) { if err := LxcTemplateCompiled.Execute(fo, struct { *execdriver.Command - AppArmor bool - ProcessLabel string - MountLabel string - RestrictionSource string + AppArmor bool + ProcessLabel string + MountLabel string }{ - Command: c, - AppArmor: d.apparmor, - ProcessLabel: process, - MountLabel: mount, - RestrictionSource: d.restrictionPath, + Command: c, + AppArmor: d.apparmor, + ProcessLabel: process, + MountLabel: mount, }); err != nil { return "", err } diff --git a/components/engine/daemon/execdriver/lxc/lxc_template.go b/components/engine/daemon/execdriver/lxc/lxc_template.go index bc94e7a19d..19fa43c4c2 100644 --- a/components/engine/daemon/execdriver/lxc/lxc_template.go +++ b/components/engine/daemon/execdriver/lxc/lxc_template.go @@ -1,10 +1,11 @@ package lxc import ( - "github.com/dotcloud/docker/daemon/execdriver" - "github.com/dotcloud/docker/pkg/label" "strings" "text/template" + + "github.com/dotcloud/docker/daemon/execdriver" + "github.com/dotcloud/docker/pkg/label" ) const LxcTemplate = ` @@ -82,15 +83,12 @@ lxc.pivotdir = lxc_putold # NOTICE: These mounts must be applied within the namespace -# WARNING: procfs is a known attack vector and should probably be disabled -# if your userspace allows it. eg. see http://blog.zx2c4.com/749 +# WARNING: mounting procfs and/or sysfs read-write is a known attack vector. +# See e.g. http://blog.zx2c4.com/749 and http://bit.ly/T9CkqJ +# We mount them read-write here, but later, dockerinit will call the Restrict() function to remount them read-only. +# We cannot mount them directly read-only, because that would prevent loading AppArmor profiles. lxc.mount.entry = proc {{escapeFstabSpaces $ROOTFS}}/proc proc nosuid,nodev,noexec 0 0 - -# WARNING: sysfs is a known attack vector and should probably be disabled -# if your userspace allows it. eg. see http://bit.ly/T9CkqJ -{{if .Privileged}} lxc.mount.entry = sysfs {{escapeFstabSpaces $ROOTFS}}/sys sysfs nosuid,nodev,noexec 0 0 -{{end}} {{if .Tty}} lxc.mount.entry = {{.Console}} {{escapeFstabSpaces $ROOTFS}}/dev/console none bind,rw 0 0 @@ -111,15 +109,8 @@ lxc.mount.entry = {{$value.Source}} {{escapeFstabSpaces $ROOTFS}}/{{escapeFstabS {{if .AppArmor}} lxc.aa_profile = unconfined {{else}} -# not unconfined +# Let AppArmor normal confinement take place (i.e., not unconfined) {{end}} -{{else}} -# restrict access to proc -lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/sys none bind,ro 0 0 -lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/irq none bind,ro 0 0 -lxc.mount.entry = {{.RestrictionSource}} {{escapeFstabSpaces $ROOTFS}}/proc/acpi none bind,ro 0 0 -lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/sysrq-trigger none bind,ro 0 0 -lxc.mount.entry = {{escapeFstabSpaces $ROOTFS}}/dev/null {{escapeFstabSpaces $ROOTFS}}/proc/kcore none bind,ro 0 0 {{end}} # limits diff --git a/components/engine/daemon/execdriver/native/create.go b/components/engine/daemon/execdriver/native/create.go index 00e6fc4b26..5562d08986 100644 --- a/components/engine/daemon/execdriver/native/create.go +++ b/components/engine/daemon/execdriver/native/create.go @@ -24,7 +24,7 @@ func (d *driver) createContainer(c *execdriver.Command) (*libcontainer.Container container.Cgroups.Name = c.ID // check to see if we are running in ramdisk to disable pivot root container.NoPivotRoot = os.Getenv("DOCKER_RAMDISK") != "" - container.Context["restriction_path"] = d.restrictionPath + container.Context["restrictions"] = "true" if err := d.createNetwork(container, c); err != nil { return nil, err @@ -84,9 +84,7 @@ func (d *driver) setPrivileged(container *libcontainer.Container) error { } container.Cgroups.DeviceAccess = true - // add sysfs as a mount for privileged containers - container.Mounts = append(container.Mounts, libcontainer.Mount{Type: "sysfs"}) - delete(container.Context, "restriction_path") + delete(container.Context, "restrictions") if apparmor.IsEnabled() { container.Context["apparmor_profile"] = "unconfined" diff --git a/components/engine/daemon/execdriver/native/driver.go b/components/engine/daemon/execdriver/native/driver.go index a397387f11..e674d57333 100644 --- a/components/engine/daemon/execdriver/native/driver.go +++ b/components/engine/daemon/execdriver/native/driver.go @@ -57,7 +57,6 @@ type driver struct { root string initPath string activeContainers map[string]*exec.Cmd - restrictionPath string } func NewDriver(root, initPath string) (*driver, error) { @@ -68,14 +67,8 @@ func NewDriver(root, initPath string) (*driver, error) { if err := apparmor.InstallDefaultProfile(filepath.Join(root, "../..", BackupApparmorProfilePath)); err != nil { return nil, err } - restrictionPath := filepath.Join(root, "empty") - if err := os.MkdirAll(restrictionPath, 0700); err != nil { - return nil, err - } - return &driver{ root: root, - restrictionPath: restrictionPath, initPath: initPath, activeContainers: make(map[string]*exec.Cmd), }, nil diff --git a/components/engine/integration-cli/docker_cli_run_test.go b/components/engine/integration-cli/docker_cli_run_test.go index 83867267ae..b9737feeea 100644 --- a/components/engine/integration-cli/docker_cli_run_test.go +++ b/components/engine/integration-cli/docker_cli_run_test.go @@ -725,24 +725,46 @@ func TestUnPrivilegedCannotMount(t *testing.T) { logDone("run - test un-privileged cannot mount") } -func TestSysNotAvaliableInNonPrivilegedContainers(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "busybox", "ls", "/sys/kernel") +func TestSysNotWritableInNonPrivilegedContainers(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "busybox", "touch", "/sys/kernel/profiling") if code, err := runCommand(cmd); err == nil || code == 0 { - t.Fatal("sys should not be available in a non privileged container") + t.Fatal("sys should not be writable in a non privileged container") } deleteAllContainers() - logDone("run - sys not avaliable in non privileged container") + logDone("run - sys not writable in non privileged container") } -func TestSysAvaliableInPrivilegedContainers(t *testing.T) { - cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "ls", "/sys/kernel") +func TestSysWritableInPrivilegedContainers(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "touch", "/sys/kernel/profiling") if code, err := runCommand(cmd); err != nil || code != 0 { - t.Fatalf("sys should be available in privileged container") + t.Fatalf("sys should be writable in privileged container") } deleteAllContainers() - logDone("run - sys avaliable in privileged container") + logDone("run - sys writable in privileged container") +} + +func TestProcNotWritableInNonPrivilegedContainers(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "busybox", "touch", "/proc/sysrq-trigger") + if code, err := runCommand(cmd); err == nil || code == 0 { + t.Fatal("proc should not be writable in a non privileged container") + } + + deleteAllContainers() + + logDone("run - proc not writable in non privileged container") +} + +func TestProcWritableInPrivilegedContainers(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "--privileged", "busybox", "touch", "/proc/sysrq-trigger") + if code, err := runCommand(cmd); err != nil || code != 0 { + t.Fatalf("proc should be writable in privileged container") + } + + deleteAllContainers() + + logDone("run - proc writable in privileged container") } diff --git a/components/engine/pkg/apparmor/apparmor.go b/components/engine/pkg/apparmor/apparmor.go index 6fdb1f8958..704ee29ed0 100644 --- a/components/engine/pkg/apparmor/apparmor.go +++ b/components/engine/pkg/apparmor/apparmor.go @@ -20,7 +20,7 @@ func IsEnabled() bool { return false } -func ApplyProfile(pid int, name string) error { +func ApplyProfile(name string) error { if name == "" { return nil } diff --git a/components/engine/pkg/apparmor/apparmor_disabled.go b/components/engine/pkg/apparmor/apparmor_disabled.go index 77543e4a87..8d86ce9d4a 100644 --- a/components/engine/pkg/apparmor/apparmor_disabled.go +++ b/components/engine/pkg/apparmor/apparmor_disabled.go @@ -2,12 +2,10 @@ package apparmor -import () - func IsEnabled() bool { return false } -func ApplyProfile(pid int, name string) error { +func ApplyProfile(name string) error { return nil } diff --git a/components/engine/pkg/libcontainer/console/console.go b/components/engine/pkg/libcontainer/console/console.go index 05cd08a92e..5f06aea225 100644 --- a/components/engine/pkg/libcontainer/console/console.go +++ b/components/engine/pkg/libcontainer/console/console.go @@ -4,11 +4,12 @@ package console import ( "fmt" - "github.com/dotcloud/docker/pkg/label" - "github.com/dotcloud/docker/pkg/system" "os" "path/filepath" "syscall" + + "github.com/dotcloud/docker/pkg/label" + "github.com/dotcloud/docker/pkg/system" ) // Setup initializes the proper /dev/console inside the rootfs path diff --git a/components/engine/pkg/libcontainer/mount/init.go b/components/engine/pkg/libcontainer/mount/init.go index 735970cded..6a54f2444e 100644 --- a/components/engine/pkg/libcontainer/mount/init.go +++ b/components/engine/pkg/libcontainer/mount/init.go @@ -11,7 +11,6 @@ import ( "github.com/dotcloud/docker/pkg/label" "github.com/dotcloud/docker/pkg/libcontainer" "github.com/dotcloud/docker/pkg/libcontainer/mount/nodes" - "github.com/dotcloud/docker/pkg/libcontainer/security/restrict" "github.com/dotcloud/docker/pkg/system" ) @@ -51,11 +50,6 @@ func InitializeMountNamespace(rootfs, console string, container *libcontainer.Co if err := nodes.CopyN(rootfs, nodes.DefaultNodes); err != nil { return fmt.Errorf("copy dev nodes %s", err) } - if restrictionPath := container.Context["restriction_path"]; restrictionPath != "" { - if err := restrict.Restrict(rootfs, restrictionPath); err != nil { - return fmt.Errorf("restrict %s", err) - } - } if err := SetupPtmx(rootfs, console, container.Context["mount_label"]); err != nil { return err } @@ -124,22 +118,17 @@ func setupBindmounts(rootfs string, bindMounts libcontainer.Mounts) error { } // TODO: this is crappy right now and should be cleaned up with a better way of handling system and -// standard bind mounts allowing them to be more dymanic +// standard bind mounts allowing them to be more dynamic func newSystemMounts(rootfs, mountLabel string, mounts libcontainer.Mounts) []mount { systemMounts := []mount{ {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, + {source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags}, + {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)}, + {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)}, } if len(mounts.OfType("devtmpfs")) == 1 { systemMounts = append(systemMounts, mount{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)}) } - systemMounts = append(systemMounts, - mount{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)}, - mount{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)}, - ) - - if len(mounts.OfType("sysfs")) == 1 { - systemMounts = append(systemMounts, mount{source: "sysfs", path: filepath.Join(rootfs, "sys"), device: "sysfs", flags: defaultMountFlags}) - } return systemMounts } diff --git a/components/engine/pkg/libcontainer/nsinit/init.go b/components/engine/pkg/libcontainer/nsinit/init.go index faec12af32..22345f603f 100644 --- a/components/engine/pkg/libcontainer/nsinit/init.go +++ b/components/engine/pkg/libcontainer/nsinit/init.go @@ -16,6 +16,7 @@ import ( "github.com/dotcloud/docker/pkg/libcontainer/mount" "github.com/dotcloud/docker/pkg/libcontainer/network" "github.com/dotcloud/docker/pkg/libcontainer/security/capabilities" + "github.com/dotcloud/docker/pkg/libcontainer/security/restrict" "github.com/dotcloud/docker/pkg/libcontainer/utils" "github.com/dotcloud/docker/pkg/system" "github.com/dotcloud/docker/pkg/user" @@ -68,18 +69,23 @@ func Init(container *libcontainer.Container, uncleanRootfs, consolePath string, if err := system.Sethostname(container.Hostname); err != nil { return fmt.Errorf("sethostname %s", err) } - if err := FinalizeNamespace(container); err != nil { - return fmt.Errorf("finalize namespace %s", err) - } runtime.LockOSThread() - if err := apparmor.ApplyProfile(os.Getpid(), container.Context["apparmor_profile"]); err != nil { - return err + if err := apparmor.ApplyProfile(container.Context["apparmor_profile"]); err != nil { + return fmt.Errorf("set apparmor profile %s: %s", container.Context["apparmor_profile"], err) } if err := label.SetProcessLabel(container.Context["process_label"]); err != nil { return fmt.Errorf("set process label %s", err) } + if container.Context["restrictions"] != "" { + if err := restrict.Restrict(); err != nil { + return err + } + } + if err := FinalizeNamespace(container); err != nil { + return fmt.Errorf("finalize namespace %s", err) + } return system.Execv(args[0], args[0:], container.Env) } diff --git a/components/engine/pkg/libcontainer/security/restrict/restrict.go b/components/engine/pkg/libcontainer/security/restrict/restrict.go index 291d6ca5dc..cfff09f512 100644 --- a/components/engine/pkg/libcontainer/security/restrict/restrict.go +++ b/components/engine/pkg/libcontainer/security/restrict/restrict.go @@ -1,51 +1,25 @@ +// +build linux + package restrict import ( "fmt" - "os" - "path/filepath" "syscall" "github.com/dotcloud/docker/pkg/system" ) -const flags = syscall.MS_BIND | syscall.MS_REC | syscall.MS_RDONLY - -var restrictions = map[string]string{ - // dirs - "/proc/sys": "", - "/proc/irq": "", - "/proc/acpi": "", - - // files - "/proc/sysrq-trigger": "/dev/null", - "/proc/kcore": "/dev/null", -} - -// Restrict locks down access to many areas of proc -// by using the asumption that the user does not have mount caps to -// revert the changes made here -func Restrict(rootfs, empty string) error { - for dest, source := range restrictions { - dest = filepath.Join(rootfs, dest) - - // we don't have a "/dev/null" for dirs so have the requester pass a dir - // for us to bind mount - switch source { - case "": - source = empty - default: - source = filepath.Join(rootfs, source) - } - if err := system.Mount(source, dest, "bind", flags, ""); err != nil { - if os.IsNotExist(err) { - continue - } - return fmt.Errorf("unable to mount %s over %s %s", source, dest, err) - } - if err := system.Mount("", dest, "bind", flags|syscall.MS_REMOUNT, ""); err != nil { - return fmt.Errorf("unable to mount %s over %s %s", source, dest, err) +// This has to be called while the container still has CAP_SYS_ADMIN (to be able to perform mounts). +// However, afterwards, CAP_SYS_ADMIN should be dropped (otherwise the user will be able to revert those changes). +func Restrict() error { + // remount proc and sys as readonly + for _, dest := range []string{"proc", "sys"} { + if err := system.Mount("", dest, "", syscall.MS_REMOUNT|syscall.MS_RDONLY, ""); err != nil { + return fmt.Errorf("unable to remount %s readonly: %s", dest, err) } } + if err := system.Mount("/dev/null", "/proc/kcore", "", syscall.MS_BIND, ""); err != nil { + return fmt.Errorf("unable to bind-mount /dev/null over /proc/kcore") + } return nil } diff --git a/components/engine/pkg/libcontainer/security/restrict/unsupported.go b/components/engine/pkg/libcontainer/security/restrict/unsupported.go new file mode 100644 index 0000000000..464e8d498d --- /dev/null +++ b/components/engine/pkg/libcontainer/security/restrict/unsupported.go @@ -0,0 +1,9 @@ +// +build !linux + +package restrict + +import "fmt" + +func Restrict() error { + return fmt.Errorf("not supported") +}