Files
docker-cli/components/engine/daemon/execdriver/native/create.go
Vivek Goyal a9fa7f4c75 Check Propagation properties of source mount point
Whether a shared/slave volume propagation will work or not also depends on
where source directory is mounted on and what are the propagation properties
of that mount point. For example, for shared volume mount to work, source
mount point should be shared. For slave volume mount to work, source mount
point should be either shared/slave.

This patch determines the mount point on which directory is mounted and
checks for desired minimum propagation properties of that mount point. It
errors out of configuration does not seem right.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Upstream-commit: d4b4ce2588d02acd3602d42b788c6b36ab9b01e5
Component: engine
2015-12-14 10:39:53 -05:00

508 lines
13 KiB
Go

// +build linux,cgo
package native
import (
"fmt"
"path/filepath"
"strings"
"syscall"
"github.com/docker/docker/daemon/execdriver"
derr "github.com/docker/docker/errors"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/volume"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"
)
// createContainer populates and configures the container type with the
// data provided by the execdriver.Command
func (d *Driver) createContainer(c *execdriver.Command, hooks execdriver.Hooks) (container *configs.Config, err error) {
container = execdriver.InitContainer(c)
if err := d.createIpc(container, c); err != nil {
return nil, err
}
if err := d.createPid(container, c); err != nil {
return nil, err
}
if err := d.createUTS(container, c); err != nil {
return nil, err
}
if err := d.setupRemappedRoot(container, c); err != nil {
return nil, err
}
if err := d.createNetwork(container, c, hooks); err != nil {
return nil, err
}
if c.ProcessConfig.Privileged {
if !container.Readonlyfs {
// clear readonly for /sys
for i := range container.Mounts {
if container.Mounts[i].Destination == "/sys" {
container.Mounts[i].Flags &= ^syscall.MS_RDONLY
}
}
container.ReadonlyPaths = nil
}
// clear readonly for cgroup
for i := range container.Mounts {
if container.Mounts[i].Device == "cgroup" {
container.Mounts[i].Flags &= ^syscall.MS_RDONLY
}
}
container.MaskPaths = nil
if err := d.setPrivileged(container); err != nil {
return nil, err
}
} else {
if err := d.setCapabilities(container, c); err != nil {
return nil, err
}
}
// add CAP_ prefix to all caps for new libcontainer update to match
// the spec format.
for i, s := range container.Capabilities {
if !strings.HasPrefix(s, "CAP_") {
container.Capabilities[i] = fmt.Sprintf("CAP_%s", s)
}
}
container.AdditionalGroups = c.GroupAdd
if c.AppArmorProfile != "" {
container.AppArmorProfile = c.AppArmorProfile
}
if c.SeccompProfile != "" {
container.Seccomp, err = loadSeccompProfile(c.SeccompProfile)
if err != nil {
return nil, err
}
}
if err := execdriver.SetupCgroups(container, c); err != nil {
return nil, err
}
container.OomScoreAdj = c.OomScoreAdj
if container.Readonlyfs {
for i := range container.Mounts {
switch container.Mounts[i].Destination {
case "/proc", "/dev", "/dev/pts":
continue
}
container.Mounts[i].Flags |= syscall.MS_RDONLY
}
/* These paths must be remounted as r/o */
container.ReadonlyPaths = append(container.ReadonlyPaths, "/dev")
}
if err := d.setupMounts(container, c); err != nil {
return nil, err
}
d.setupLabels(container, c)
d.setupRlimits(container, c)
return container, nil
}
func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error {
if c.Network == nil {
return nil
}
if c.Network.ContainerID != "" {
d.Lock()
active := d.activeContainers[c.Network.ContainerID]
d.Unlock()
if active == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Network.ContainerID)
}
state, err := active.State()
if err != nil {
return err
}
container.Namespaces.Add(configs.NEWNET, state.NamespacePaths[configs.NEWNET])
return nil
}
if c.Network.NamespacePath != "" {
container.Namespaces.Add(configs.NEWNET, c.Network.NamespacePath)
return nil
}
// only set up prestart hook if the namespace path is not set (this should be
// all cases *except* for --net=host shared networking)
container.Hooks = &configs.Hooks{
Prestart: []configs.Hook{
configs.NewFunctionHook(func(s configs.HookState) error {
if len(hooks.PreStart) > 0 {
for _, fnHook := range hooks.PreStart {
// A closed channel for OOM is returned here as it will be
// non-blocking and return the correct result when read.
chOOM := make(chan struct{})
close(chOOM)
if err := fnHook(&c.ProcessConfig, s.Pid, chOOM); err != nil {
return err
}
}
}
return nil
}),
},
}
return nil
}
func (d *Driver) createIpc(container *configs.Config, c *execdriver.Command) error {
if c.Ipc.HostIpc {
container.Namespaces.Remove(configs.NEWIPC)
return nil
}
if c.Ipc.ContainerID != "" {
d.Lock()
active := d.activeContainers[c.Ipc.ContainerID]
d.Unlock()
if active == nil {
return fmt.Errorf("%s is not a valid running container to join", c.Ipc.ContainerID)
}
state, err := active.State()
if err != nil {
return err
}
container.Namespaces.Add(configs.NEWIPC, state.NamespacePaths[configs.NEWIPC])
}
return nil
}
func (d *Driver) createPid(container *configs.Config, c *execdriver.Command) error {
if c.Pid.HostPid {
container.Namespaces.Remove(configs.NEWPID)
return nil
}
return nil
}
func (d *Driver) createUTS(container *configs.Config, c *execdriver.Command) error {
if c.UTS.HostUTS {
container.Namespaces.Remove(configs.NEWUTS)
container.Hostname = ""
return nil
}
return nil
}
func (d *Driver) setupRemappedRoot(container *configs.Config, c *execdriver.Command) error {
if c.RemappedRoot.UID == 0 {
container.Namespaces.Remove(configs.NEWUSER)
return nil
}
// convert the Docker daemon id map to the libcontainer variant of the same struct
// this keeps us from having to import libcontainer code across Docker client + daemon packages
cuidMaps := []configs.IDMap{}
cgidMaps := []configs.IDMap{}
for _, idMap := range c.UIDMapping {
cuidMaps = append(cuidMaps, configs.IDMap(idMap))
}
for _, idMap := range c.GIDMapping {
cgidMaps = append(cgidMaps, configs.IDMap(idMap))
}
container.UidMappings = cuidMaps
container.GidMappings = cgidMaps
for _, node := range container.Devices {
node.Uid = uint32(c.RemappedRoot.UID)
node.Gid = uint32(c.RemappedRoot.GID)
}
// TODO: until a kernel/mount solution exists for handling remount in a user namespace,
// we must clear the readonly flag for the cgroups mount (@mrunalp concurs)
for i := range container.Mounts {
if container.Mounts[i].Device == "cgroup" {
container.Mounts[i].Flags &= ^syscall.MS_RDONLY
}
}
return nil
}
func (d *Driver) setPrivileged(container *configs.Config) (err error) {
container.Capabilities = execdriver.GetAllCapabilities()
container.Cgroups.AllowAllDevices = true
hostDevices, err := devices.HostDevices()
if err != nil {
return err
}
container.Devices = hostDevices
if apparmor.IsEnabled() {
container.AppArmorProfile = "unconfined"
}
return nil
}
func (d *Driver) setCapabilities(container *configs.Config, c *execdriver.Command) (err error) {
container.Capabilities, err = execdriver.TweakCapabilities(container.Capabilities, c.CapAdd, c.CapDrop)
return err
}
func (d *Driver) setupRlimits(container *configs.Config, c *execdriver.Command) {
if c.Resources == nil {
return
}
for _, rlimit := range c.Resources.Rlimits {
container.Rlimits = append(container.Rlimits, configs.Rlimit{
Type: rlimit.Type,
Hard: rlimit.Hard,
Soft: rlimit.Soft,
})
}
}
// If rootfs mount propagation is RPRIVATE, that means all the volumes are
// going to be private anyway. There is no need to apply per volume
// propagation on top. This is just an optimzation so that cost of per volume
// propagation is paid only if user decides to make some volume non-private
// which will force rootfs mount propagation to be non RPRIVATE.
func checkResetVolumePropagation(container *configs.Config) {
if container.RootPropagation != mount.RPRIVATE {
return
}
for _, m := range container.Mounts {
m.PropagationFlags = nil
}
}
func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
for _, m := range mountinfo {
if m.Mountpoint == dir {
return m
}
}
return nil
}
// Get the source mount point of directory passed in as argument. Also return
// optional fields.
func getSourceMount(source string) (string, string, error) {
// Ensure any symlinks are resolved.
sourcePath, err := filepath.EvalSymlinks(source)
if err != nil {
return "", "", err
}
mountinfos, err := mount.GetMounts()
if err != nil {
return "", "", err
}
mountinfo := getMountInfo(mountinfos, sourcePath)
if mountinfo != nil {
return sourcePath, mountinfo.Optional, nil
}
path := sourcePath
for {
path = filepath.Dir(path)
mountinfo = getMountInfo(mountinfos, path)
if mountinfo != nil {
return path, mountinfo.Optional, nil
}
if path == "/" {
break
}
}
// If we are here, we did not find parent mount. Something is wrong.
return "", "", fmt.Errorf("Could not find source mount of %s", source)
}
// Ensure mount point on which path is mouted, is shared.
func ensureShared(path string) error {
sharedMount := false
sourceMount, optionalOpts, err := getSourceMount(path)
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings.Split(optionalOpts, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
sharedMount = true
break
}
}
if !sharedMount {
return fmt.Errorf("Path %s is mounted on %s but it is not a shared mount.", path, sourceMount)
}
return nil
}
// Ensure mount point on which path is mounted, is either shared or slave.
func ensureSharedOrSlave(path string) error {
sharedMount := false
slaveMount := false
sourceMount, optionalOpts, err := getSourceMount(path)
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings.Split(optionalOpts, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
sharedMount = true
break
} else if strings.HasPrefix(opt, "master:") {
slaveMount = true
break
}
}
if !sharedMount && !slaveMount {
return fmt.Errorf("Path %s is mounted on %s but it is not a shared or slave mount.", path, sourceMount)
}
return nil
}
func (d *Driver) setupMounts(container *configs.Config, c *execdriver.Command) error {
userMounts := make(map[string]struct{})
for _, m := range c.Mounts {
userMounts[m.Destination] = struct{}{}
}
// Filter out mounts that are overriden by user supplied mounts
var defaultMounts []*configs.Mount
_, mountDev := userMounts["/dev"]
for _, m := range container.Mounts {
if _, ok := userMounts[m.Destination]; !ok {
if mountDev && strings.HasPrefix(m.Destination, "/dev/") {
container.Devices = nil
continue
}
defaultMounts = append(defaultMounts, m)
}
}
container.Mounts = defaultMounts
mountPropagationMap := map[string]int{
"private": mount.PRIVATE,
"rprivate": mount.RPRIVATE,
"shared": mount.SHARED,
"rshared": mount.RSHARED,
"slave": mount.SLAVE,
"rslave": mount.RSLAVE,
}
for _, m := range c.Mounts {
for _, cm := range container.Mounts {
if cm.Destination == m.Destination {
return derr.ErrorCodeMountDup.WithArgs(m.Destination)
}
}
if m.Source == "tmpfs" {
var (
data = "size=65536k"
flags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
err error
)
fulldest := filepath.Join(c.Rootfs, m.Destination)
if m.Data != "" {
flags, data, err = mount.ParseTmpfsOptions(m.Data)
if err != nil {
return err
}
}
container.Mounts = append(container.Mounts, &configs.Mount{
Source: m.Source,
Destination: m.Destination,
Data: data,
Device: "tmpfs",
Flags: flags,
PremountCmds: genTmpfsPremountCmd(c.TmpDir, fulldest, m.Destination),
PostmountCmds: genTmpfsPostmountCmd(c.TmpDir, fulldest, m.Destination),
PropagationFlags: []int{mountPropagationMap[volume.DefaultPropagationMode]},
})
continue
}
flags := syscall.MS_BIND | syscall.MS_REC
var pFlag int
if !m.Writable {
flags |= syscall.MS_RDONLY
}
// Determine property of RootPropagation based on volume
// properties. If a volume is shared, then keep root propagtion
// shared. This should work for slave and private volumes too.
//
// For slave volumes, it can be either [r]shared/[r]slave.
//
// For private volumes any root propagation value should work.
pFlag = mountPropagationMap[m.Propagation]
if pFlag == mount.SHARED || pFlag == mount.RSHARED {
if err := ensureShared(m.Source); err != nil {
return err
}
rootpg := container.RootPropagation
if rootpg != mount.SHARED && rootpg != mount.RSHARED {
execdriver.SetRootPropagation(container, mount.SHARED)
}
} else if pFlag == mount.SLAVE || pFlag == mount.RSLAVE {
if err := ensureSharedOrSlave(m.Source); err != nil {
return err
}
rootpg := container.RootPropagation
if rootpg != mount.SHARED && rootpg != mount.RSHARED && rootpg != mount.SLAVE && rootpg != mount.RSLAVE {
execdriver.SetRootPropagation(container, mount.RSLAVE)
}
}
mount := &configs.Mount{
Source: m.Source,
Destination: m.Destination,
Device: "bind",
Flags: flags,
}
if pFlag != 0 {
mount.PropagationFlags = []int{pFlag}
}
container.Mounts = append(container.Mounts, mount)
}
checkResetVolumePropagation(container)
return nil
}
func (d *Driver) setupLabels(container *configs.Config, c *execdriver.Command) {
container.ProcessLabel = c.ProcessLabel
container.MountLabel = c.MountLabel
}