Files
docker-cli/components/engine/daemon/execdriver/native/create.go
Eric Windisch 9f8e7b5fed Introduce a dedicated unconfined AA policy
By using the 'unconfined' policy for privileged
containers, we have inherited the host's apparmor
policies, which really make no sense in the
context of the container's filesystem.

For instance, policies written against
the paths of binaries such as '/usr/sbin/tcpdump'
can be easily circumvented by moving the binary
within the container filesystem.

Fixes GH#5490

Signed-off-by: Eric Windisch <eric@windisch.us>
Upstream-commit: 87376c3add7dcd48830060652554e7ae43d11881
Component: engine
2015-07-22 11:28:32 -04:00

267 lines
6.2 KiB
Go

// +build linux,cgo
package native
import (
"errors"
"fmt"
"net"
"strings"
"syscall"
"github.com/docker/docker/daemon/execdriver"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
"github.com/opencontainers/runc/libcontainer/utils"
)
// createContainer populates and configures the container type with the
// data provided by the execdriver.Command
func (d *driver) createContainer(c *execdriver.Command) (*configs.Config, error) {
container := execdriver.InitContainer(c)
if err := d.createIpc(container, c); err != nil {
return nil, err
}
if err := d.createPid(container, c); err != nil {
return nil, err
}
if err := d.createUTS(container, c); err != nil {
return nil, err
}
if err := d.createNetwork(container, c); err != nil {
return nil, err
}
if c.ProcessConfig.Privileged {
if !container.Readonlyfs {
// clear readonly for /sys
for i := range container.Mounts {
if container.Mounts[i].Destination == "/sys" {
container.Mounts[i].Flags &= ^syscall.MS_RDONLY
}
}
container.ReadonlyPaths = nil
}
// clear readonly for cgroup
for i := range container.Mounts {
if container.Mounts[i].Device == "cgroup" {
container.Mounts[i].Flags &= ^syscall.MS_RDONLY
}
}
container.MaskPaths = nil
if err := d.setPrivileged(container); err != nil {
return nil, err
}
} else {
if err := d.setCapabilities(container, c); err != nil {
return nil, err
}
}
container.AdditionalGroups = c.GroupAdd
if c.AppArmorProfile != "" {
container.AppArmorProfile = c.AppArmorProfile
}
if err := execdriver.SetupCgroups(container, c); err != nil {
return nil, err
}
if container.Readonlyfs {
for i := range container.Mounts {
switch container.Mounts[i].Destination {
case "/proc", "/dev", "/dev/pts":
continue
}
container.Mounts[i].Flags |= syscall.MS_RDONLY
}
/* These paths must be remounted as r/o */
container.ReadonlyPaths = append(container.ReadonlyPaths, "/proc", "/dev")
}
if err := d.setupMounts(container, c); err != nil {
return nil, err
}
d.setupLabels(container, c)
d.setupRlimits(container, c)
return container, nil
}
func generateIfaceName() (string, error) {
for i := 0; i < 10; i++ {
name, err := utils.GenerateRandomName("veth", 7)
if err != nil {
continue
}
if _, err := net.InterfaceByName(name); err != nil {
if strings.Contains(err.Error(), "no such") {
return name, nil
}
return "", err
}
}
return "", errors.New("Failed to find name for new interface")
}
func (d *driver) createNetwork(container *configs.Config, c *execdriver.Command) error {
if c.Network == nil {
return nil
}
if c.Network.ContainerID != "" {
d.Lock()
active := d.activeContainers[c.Network.ContainerID]
d.Unlock()
if active == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
}
state, err := active.State()
if err != nil {
return err
}
container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
return nil
}
if c.Network.NamespacePath == "" {
return fmt.Errorf("network namespace path is empty")
}
container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath)
return nil
}
func (d *driver) createIpc(container *configs.Config, c *execdriver.Command) error {
if c.Ipc.HostIpc {
container.Namespaces.Remove(configs.NEWIPC)
return nil
}
if c.Ipc.ContainerID != "" {
d.Lock()
active := d.activeContainers[c.Ipc.ContainerID]
d.Unlock()
if active == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
}
state, err := active.State()
if err != nil {
return err
}
container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
}
return nil
}
func (d *driver) createPid(container *configs.Config, c *execdriver.Command) error {
if c.Pid.HostPid {
container.Namespaces.Remove(configs.NEWPID)
return nil
}
return nil
}
func (d *driver) createUTS(container *configs.Config, c *execdriver.Command) error {
if c.UTS.HostUTS {
container.Namespaces.Remove(configs.NEWUTS)
container.Hostname = ""
return nil
}
return nil
}
func (d *driver) setPrivileged(container *configs.Config) (err error) {
container.Capabilities = execdriver.GetAllCapabilities()
container.Cgroups.AllowAllDevices = true
hostDevices, err := devices.HostDevices()
if err != nil {
return err
}
container.Devices = hostDevices
if apparmor.IsEnabled() {
container.AppArmorProfile = "docker-unconfined"
}
return nil
}
func (d *driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
return err
}
func (d *driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
if c.Resources == nil {
return
}
for _, rlimit := range c.Resources.Rlimits {
container.Rlimits = append(container.Rlimits, configs.Rlimit{
Type: rlimit.Type,
Hard: rlimit.Hard,
Soft: rlimit.Soft,
})
}
}
func (d *driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
userMounts := make(map[string]struct{})
for _, m := range c.Mounts {
userMounts[m.Destination] = struct{}{}
}
// Filter out mounts that are overriden by user supplied mounts
var defaultMounts []*configs.Mount
_, mountDev := userMounts["/dev"]
for _, m := range container.Mounts {
if _, ok := userMounts[m.Destination]; !ok {
if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
continue
}
defaultMounts = append(defaultMounts, m)
}
}
container.Mounts = defaultMounts
for _, m := range c.Mounts {
flags := syscall.MS_BIND | syscall.MS_REC
if !m.Writable {
flags |= syscall.MS_RDONLY
}
if m.Slave {
flags |= syscall.MS_SLAVE
}
container.Mounts = append(container.Mounts, &configs.Mount{
Source: m.Source,
Destination: m.Destination,
Device: "bind",
Flags: flags,
})
}
return nil
}
func (d *driver) setupLabels(container *configs.Config, c *execdriver.Command) {
container.ProcessLabel = c.ProcessLabel
container.MountLabel = c.MountLabel
}