From 898ac34e337af5b527bac4c382996e0f88bcdf0f Mon Sep 17 00:00:00 2001 From: Phil Estes Date: Thu, 7 Jan 2016 22:43:11 -0500 Subject: [PATCH 1/3] Move userns support out of experimental into master Adds the `--userns-remap` flag to the master build Docker-DCO-1.1-Signed-off-by: Phil Estes (github: estesp) Upstream-commit: 557c7cb888ad8e2f1f378c9cf34e5fba14551904 Component: engine --- .../engine/daemon/config_experimental.go | 113 +----------- components/engine/daemon/config_unix.go | 1 + .../engine/daemon/daemon_experimental.go | 82 +-------- components/engine/daemon/daemon_stub.go | 21 +-- components/engine/daemon/daemon_unix.go | 173 ++++++++++++++++++ components/engine/daemon/daemon_windows.go | 15 ++ components/engine/hack/make.sh | 2 +- .../docker_api_containers_test.go | 8 +- 8 files changed, 199 insertions(+), 216 deletions(-) diff --git a/components/engine/daemon/config_experimental.go b/components/engine/daemon/config_experimental.go index f1c4bb925d..ceb7c38225 100644 --- a/components/engine/daemon/config_experimental.go +++ b/components/engine/daemon/config_experimental.go @@ -2,118 +2,7 @@ package daemon -import ( - "fmt" - "strconv" - "strings" - - "github.com/docker/docker/pkg/idtools" - flag "github.com/docker/docker/pkg/mflag" - "github.com/opencontainers/runc/libcontainer/user" -) +import flag "github.com/docker/docker/pkg/mflag" func (config *Config) attachExperimentalFlags(cmd *flag.FlagSet, usageFn func(string) string) { - cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces")) -} - -const ( - defaultIDSpecifier string = "default" - defaultRemappedID string = "dockremap" -) - -// Parse the remapped root (user namespace) option, which can be one of: -// username - valid username from /etc/passwd -// username:groupname - valid username; valid groupname from /etc/group -// uid - 32-bit unsigned int valid Linux UID value -// uid:gid - uid value; 32-bit unsigned int Linux GID value -// -// If no groupname is specified, and a username is specified, an attempt -// will be made to lookup a gid for that username as a groupname -// -// If names are used, they are verified to exist in passwd/group -func parseRemappedRoot(usergrp string) (string, string, error) { - - var ( - userID, groupID int - username, groupname string - ) - - idparts := strings.Split(usergrp, ":") - if len(idparts) > 2 { - return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp) - } - - if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil { - // must be a uid; take it as valid - userID = int(uid) - luser, err := user.LookupUid(userID) - if err != nil { - return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err) - } - username = luser.Name - if len(idparts) == 1 { - // if the uid was numeric and no gid was specified, take the uid as the gid - groupID = userID - lgrp, err := user.LookupGid(groupID) - if err != nil { - return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err) - } - groupname = lgrp.Name - } - } else { - lookupName := idparts[0] - // special case: if the user specified "default", they want Docker to create or - // use (after creation) the "dockremap" user/group for root remapping - if lookupName == defaultIDSpecifier { - lookupName = defaultRemappedID - } - luser, err := user.LookupUser(lookupName) - if err != nil && idparts[0] != defaultIDSpecifier { - // error if the name requested isn't the special "dockremap" ID - return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err) - } else if err != nil { - // special case-- if the username == "default", then we have been asked - // to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid} - // ranges will be used for the user and group mappings in user namespaced containers - _, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID) - if err == nil { - return defaultRemappedID, defaultRemappedID, nil - } - return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err) - } - userID = luser.Uid - username = luser.Name - if len(idparts) == 1 { - // we only have a string username, and no group specified; look up gid from username as group - group, err := user.LookupGroup(lookupName) - if err != nil { - return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err) - } - groupID = group.Gid - groupname = group.Name - } - } - - if len(idparts) == 2 { - // groupname or gid is separately specified and must be resolved - // to a unsigned 32-bit gid - if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil { - // must be a gid, take it as valid - groupID = int(gid) - lgrp, err := user.LookupGid(groupID) - if err != nil { - return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err) - } - groupname = lgrp.Name - } else { - // not a number; attempt a lookup - group, err := user.LookupGroup(idparts[1]) - if err != nil { - return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err) - } - groupID = group.Gid - groupname = idparts[1] - } - } - return username, groupname, nil } diff --git a/components/engine/daemon/config_unix.go b/components/engine/daemon/config_unix.go index ce14f9fe54..a25df90704 100644 --- a/components/engine/daemon/config_unix.go +++ b/components/engine/daemon/config_unix.go @@ -79,6 +79,7 @@ func (config *Config) InstallFlags(cmd *flag.FlagSet, usageFn func(string) strin cmd.BoolVar(&config.EnableCors, []string{"#api-enable-cors", "#-api-enable-cors"}, false, usageFn("Enable CORS headers in the remote API, this is deprecated by --api-cors-header")) cmd.StringVar(&config.CorsHeaders, []string{"-api-cors-header"}, "", usageFn("Set CORS headers in the remote API")) cmd.StringVar(&config.CgroupParent, []string{"-cgroup-parent"}, "", usageFn("Set parent cgroup for all containers")) + cmd.StringVar(&config.RemappedRoot, []string{"-userns-remap"}, "", usageFn("User/Group setting for user namespaces")) config.attachExperimentalFlags(cmd, usageFn) } diff --git a/components/engine/daemon/daemon_experimental.go b/components/engine/daemon/daemon_experimental.go index cc3852c853..3fd0e765da 100644 --- a/components/engine/daemon/daemon_experimental.go +++ b/components/engine/daemon/daemon_experimental.go @@ -2,88 +2,8 @@ package daemon -import ( - "fmt" - "os" - "path/filepath" - "runtime" - - "github.com/Sirupsen/logrus" - "github.com/docker/docker/pkg/idtools" - "github.com/docker/engine-api/types/container" -) - -func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) { - if runtime.GOOS != "linux" && config.RemappedRoot != "" { - return nil, nil, fmt.Errorf("User namespaces are only supported on Linux") - } - - // if the daemon was started with remapped root option, parse - // the config option to the int uid,gid values - var ( - uidMaps, gidMaps []idtools.IDMap - ) - if config.RemappedRoot != "" { - username, groupname, err := parseRemappedRoot(config.RemappedRoot) - if err != nil { - return nil, nil, err - } - if username == "root" { - // Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op - // effectively - logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF") - return uidMaps, gidMaps, nil - } - logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname) - // update remapped root setting now that we have resolved them to actual names - config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname) - - uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname) - if err != nil { - return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err) - } - } - return uidMaps, gidMaps, nil -} - -func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error { - config.Root = rootDir - // the docker root metadata directory needs to have execute permissions for all users (o+x) - // so that syscalls executing as non-root, operating on subdirectories of the graph root - // (e.g. mounted layers of a container) can traverse this path. - // The user namespace support will create subdirectories for the remapped root host uid:gid - // pair owned by that same uid:gid pair for proper write access to those needed metadata and - // layer content subtrees. - if _, err := os.Stat(rootDir); err == nil { - // root current exists; verify the access bits are correct by setting them - if err = os.Chmod(rootDir, 0701); err != nil { - return err - } - } else if os.IsNotExist(err) { - // no root exists yet, create it 0701 with root:root ownership - if err := os.MkdirAll(rootDir, 0701); err != nil { - return err - } - } - - // if user namespaces are enabled we will create a subtree underneath the specified root - // with any/all specified remapped root uid/gid options on the daemon creating - // a new subdirectory with ownership set to the remapped uid/gid (so as to allow - // `chdir()` to work for containers namespaced to that uid/gid) - if config.RemappedRoot != "" { - config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID)) - logrus.Debugf("Creating user namespaced daemon root: %s", config.Root) - // Create the root directory if it doesn't exists - if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil { - return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err) - } - } - return nil -} +import "github.com/docker/engine-api/types/container" func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *container.HostConfig, config *container.Config) ([]string, error) { - if hostConfig.Privileged && daemon.configStore.RemappedRoot != "" { - return nil, fmt.Errorf("Privileged mode is incompatible with user namespace mappings") - } return nil, nil } diff --git a/components/engine/daemon/daemon_stub.go b/components/engine/daemon/daemon_stub.go index d60f063847..40e8ddc881 100644 --- a/components/engine/daemon/daemon_stub.go +++ b/components/engine/daemon/daemon_stub.go @@ -2,26 +2,7 @@ package daemon -import ( - "os" - - "github.com/docker/docker/pkg/idtools" - "github.com/docker/docker/pkg/system" - "github.com/docker/engine-api/types/container" -) - -func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) { - return nil, nil, nil -} - -func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error { - config.Root = rootDir - // Create the root directory if it doesn't exists - if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) { - return err - } - return nil -} +import "github.com/docker/engine-api/types/container" func (daemon *Daemon) verifyExperimentalContainerSettings(hostConfig *container.HostConfig, config *container.Config) ([]string, error) { return nil, nil diff --git a/components/engine/daemon/daemon_unix.go b/components/engine/daemon/daemon_unix.go index 78ca7595ac..be673bcf08 100644 --- a/components/engine/daemon/daemon_unix.go +++ b/components/engine/daemon/daemon_unix.go @@ -7,6 +7,7 @@ import ( "net" "os" "path/filepath" + "runtime" "strconv" "strings" "syscall" @@ -33,6 +34,7 @@ import ( "github.com/docker/libnetwork/types" blkiodev "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/label" + "github.com/opencontainers/runc/libcontainer/user" ) const ( @@ -42,6 +44,9 @@ const ( platformSupported = true // It's not kernel limit, we want this 4M limit to supply a reasonable functional container linuxMinMemory = 4194304 + // constants for remapped root settings + defaultIDSpecifier string = "default" + defaultRemappedID string = "dockremap" ) func getBlkioWeightDevices(config *containertypes.HostConfig) ([]*blkiodev.WeightDevice, error) { @@ -375,6 +380,9 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes. warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.") logrus.Warnf("IPv4 forwarding is disabled. Networking will not work") } + if hostConfig.Privileged && daemon.configStore.RemappedRoot != "" { + return warnings, fmt.Errorf("Privileged mode is incompatible with user namespace mappings") + } return warnings, nil } @@ -674,6 +682,171 @@ func setupInitLayer(initLayer string, rootUID, rootGID int) error { return nil } +// Parse the remapped root (user namespace) option, which can be one of: +// username - valid username from /etc/passwd +// username:groupname - valid username; valid groupname from /etc/group +// uid - 32-bit unsigned int valid Linux UID value +// uid:gid - uid value; 32-bit unsigned int Linux GID value +// +// If no groupname is specified, and a username is specified, an attempt +// will be made to lookup a gid for that username as a groupname +// +// If names are used, they are verified to exist in passwd/group +func parseRemappedRoot(usergrp string) (string, string, error) { + + var ( + userID, groupID int + username, groupname string + ) + + idparts := strings.Split(usergrp, ":") + if len(idparts) > 2 { + return "", "", fmt.Errorf("Invalid user/group specification in --userns-remap: %q", usergrp) + } + + if uid, err := strconv.ParseInt(idparts[0], 10, 32); err == nil { + // must be a uid; take it as valid + userID = int(uid) + luser, err := user.LookupUid(userID) + if err != nil { + return "", "", fmt.Errorf("Uid %d has no entry in /etc/passwd: %v", userID, err) + } + username = luser.Name + if len(idparts) == 1 { + // if the uid was numeric and no gid was specified, take the uid as the gid + groupID = userID + lgrp, err := user.LookupGid(groupID) + if err != nil { + return "", "", fmt.Errorf("Gid %d has no entry in /etc/group: %v", groupID, err) + } + groupname = lgrp.Name + } + } else { + lookupName := idparts[0] + // special case: if the user specified "default", they want Docker to create or + // use (after creation) the "dockremap" user/group for root remapping + if lookupName == defaultIDSpecifier { + lookupName = defaultRemappedID + } + luser, err := user.LookupUser(lookupName) + if err != nil && idparts[0] != defaultIDSpecifier { + // error if the name requested isn't the special "dockremap" ID + return "", "", fmt.Errorf("Error during uid lookup for %q: %v", lookupName, err) + } else if err != nil { + // special case-- if the username == "default", then we have been asked + // to create a new entry pair in /etc/{passwd,group} for which the /etc/sub{uid,gid} + // ranges will be used for the user and group mappings in user namespaced containers + _, _, err := idtools.AddNamespaceRangesUser(defaultRemappedID) + if err == nil { + return defaultRemappedID, defaultRemappedID, nil + } + return "", "", fmt.Errorf("Error during %q user creation: %v", defaultRemappedID, err) + } + userID = luser.Uid + username = luser.Name + if len(idparts) == 1 { + // we only have a string username, and no group specified; look up gid from username as group + group, err := user.LookupGroup(lookupName) + if err != nil { + return "", "", fmt.Errorf("Error during gid lookup for %q: %v", lookupName, err) + } + groupID = group.Gid + groupname = group.Name + } + } + + if len(idparts) == 2 { + // groupname or gid is separately specified and must be resolved + // to a unsigned 32-bit gid + if gid, err := strconv.ParseInt(idparts[1], 10, 32); err == nil { + // must be a gid, take it as valid + groupID = int(gid) + lgrp, err := user.LookupGid(groupID) + if err != nil { + return "", "", fmt.Errorf("Gid %d has no entry in /etc/passwd: %v", groupID, err) + } + groupname = lgrp.Name + } else { + // not a number; attempt a lookup + group, err := user.LookupGroup(idparts[1]) + if err != nil { + return "", "", fmt.Errorf("Error during gid lookup for %q: %v", idparts[1], err) + } + groupID = group.Gid + groupname = idparts[1] + } + } + return username, groupname, nil +} + +func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) { + if runtime.GOOS != "linux" && config.RemappedRoot != "" { + return nil, nil, fmt.Errorf("User namespaces are only supported on Linux") + } + + // if the daemon was started with remapped root option, parse + // the config option to the int uid,gid values + var ( + uidMaps, gidMaps []idtools.IDMap + ) + if config.RemappedRoot != "" { + username, groupname, err := parseRemappedRoot(config.RemappedRoot) + if err != nil { + return nil, nil, err + } + if username == "root" { + // Cannot setup user namespaces with a 1-to-1 mapping; "--root=0:0" is a no-op + // effectively + logrus.Warnf("User namespaces: root cannot be remapped with itself; user namespaces are OFF") + return uidMaps, gidMaps, nil + } + logrus.Infof("User namespaces: ID ranges will be mapped to subuid/subgid ranges of: %s:%s", username, groupname) + // update remapped root setting now that we have resolved them to actual names + config.RemappedRoot = fmt.Sprintf("%s:%s", username, groupname) + + uidMaps, gidMaps, err = idtools.CreateIDMappings(username, groupname) + if err != nil { + return nil, nil, fmt.Errorf("Can't create ID mappings: %v", err) + } + } + return uidMaps, gidMaps, nil +} + +func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error { + config.Root = rootDir + // the docker root metadata directory needs to have execute permissions for all users (o+x) + // so that syscalls executing as non-root, operating on subdirectories of the graph root + // (e.g. mounted layers of a container) can traverse this path. + // The user namespace support will create subdirectories for the remapped root host uid:gid + // pair owned by that same uid:gid pair for proper write access to those needed metadata and + // layer content subtrees. + if _, err := os.Stat(rootDir); err == nil { + // root current exists; verify the access bits are correct by setting them + if err = os.Chmod(rootDir, 0701); err != nil { + return err + } + } else if os.IsNotExist(err) { + // no root exists yet, create it 0701 with root:root ownership + if err := os.MkdirAll(rootDir, 0701); err != nil { + return err + } + } + + // if user namespaces are enabled we will create a subtree underneath the specified root + // with any/all specified remapped root uid/gid options on the daemon creating + // a new subdirectory with ownership set to the remapped uid/gid (so as to allow + // `chdir()` to work for containers namespaced to that uid/gid) + if config.RemappedRoot != "" { + config.Root = filepath.Join(rootDir, fmt.Sprintf("%d.%d", rootUID, rootGID)) + logrus.Debugf("Creating user namespaced daemon root: %s", config.Root) + // Create the root directory if it doesn't exists + if err := idtools.MkdirAllAs(config.Root, 0700, rootUID, rootGID); err != nil { + return fmt.Errorf("Cannot create daemon root: %s: %v", config.Root, err) + } + } + return nil +} + // registerLinks writes the links to a file. func (daemon *Daemon) registerLinks(container *container.Container, hostConfig *containertypes.HostConfig) error { if hostConfig == nil || hostConfig.Links == nil { diff --git a/components/engine/daemon/daemon_windows.go b/components/engine/daemon/daemon_windows.go index 1e36892e01..3b571b6c1b 100644 --- a/components/engine/daemon/daemon_windows.go +++ b/components/engine/daemon/daemon_windows.go @@ -4,6 +4,7 @@ import ( "encoding/json" "errors" "fmt" + "os" "path/filepath" "runtime" "strings" @@ -18,6 +19,7 @@ import ( containertypes "github.com/docker/engine-api/types/container" // register the windows graph driver "github.com/docker/docker/daemon/graphdriver/windows" + "github.com/docker/docker/pkg/idtools" "github.com/docker/docker/pkg/system" "github.com/docker/libnetwork" blkiodev "github.com/opencontainers/runc/libcontainer/configs" @@ -135,6 +137,19 @@ func (daemon *Daemon) cleanupMounts() error { return nil } +func setupRemappedRoot(config *Config) ([]idtools.IDMap, []idtools.IDMap, error) { + return nil, nil, nil +} + +func setupDaemonRoot(config *Config, rootDir string, rootUID, rootGID int) error { + config.Root = rootDir + // Create the root directory if it doesn't exists + if err := system.MkdirAll(config.Root, 0700); err != nil && !os.IsExist(err) { + return err + } + return nil +} + // conditionalMountOnStart is a platform specific helper function during the // container start to call mount. func (daemon *Daemon) conditionalMountOnStart(container *container.Container) error { diff --git a/components/engine/hack/make.sh b/components/engine/hack/make.sh index 5aa044dddd..78f25cdcbc 100755 --- a/components/engine/hack/make.sh +++ b/components/engine/hack/make.sh @@ -99,7 +99,7 @@ if [ ! "$GOPATH" ]; then exit 1 fi -if [ "$DOCKER_EXPERIMENTAL" ] || [ "$DOCKER_REMAP_ROOT" ]; then +if [ "$DOCKER_EXPERIMENTAL" ]; then echo >&2 '# WARNING! DOCKER_EXPERIMENTAL is set: building experimental features' echo >&2 DOCKER_BUILDTAGS+=" experimental pkcs11" diff --git a/components/engine/integration-cli/docker_api_containers_test.go b/components/engine/integration-cli/docker_api_containers_test.go index 568beca238..c11229f4b7 100644 --- a/components/engine/integration-cli/docker_api_containers_test.go +++ b/components/engine/integration-cli/docker_api_containers_test.go @@ -652,10 +652,14 @@ func (s *DockerSuite) TestContainerApiCreateWithDomainName(c *check.C) { c.Assert(containerJSON.Config.Domainname, checker.Equals, domainName, check.Commentf("Mismatched Domainname")) } -func (s *DockerSuite) TestContainerApiCreateNetworkMode(c *check.C) { +func (s *DockerSuite) TestContainerApiCreateBridgeNetworkMode(c *check.C) { testRequires(c, DaemonIsLinux) - UtilCreateNetworkMode(c, "host") UtilCreateNetworkMode(c, "bridge") +} + +func (s *DockerSuite) TestContainerApiCreateOtherNetworkModes(c *check.C) { + testRequires(c, DaemonIsLinux, NotUserNamespace) + UtilCreateNetworkMode(c, "host") UtilCreateNetworkMode(c, "container:web1") } From 664cab4653225b94e848e87e85afa676bbc9963b Mon Sep 17 00:00:00 2001 From: Phil Estes Date: Fri, 8 Jan 2016 09:03:17 -0500 Subject: [PATCH 2/3] Properly report conflicting namespace options when using userns This prevents strange errors and clarifies which namespace options are incompatible with user namespaces (at this time). Docker-DCO-1.1-Signed-off-by: Phil Estes (github: estesp) Upstream-commit: d5743a3a5c0864686a300b1fe5f58b89a36bb2f6 Component: engine --- components/engine/daemon/daemon_unix.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/components/engine/daemon/daemon_unix.go b/components/engine/daemon/daemon_unix.go index be673bcf08..21ac089923 100644 --- a/components/engine/daemon/daemon_unix.go +++ b/components/engine/daemon/daemon_unix.go @@ -380,8 +380,23 @@ func verifyPlatformContainerSettings(daemon *Daemon, hostConfig *containertypes. warnings = append(warnings, "IPv4 forwarding is disabled. Networking will not work.") logrus.Warnf("IPv4 forwarding is disabled. Networking will not work") } - if hostConfig.Privileged && daemon.configStore.RemappedRoot != "" { - return warnings, fmt.Errorf("Privileged mode is incompatible with user namespace mappings") + // check for various conflicting options with user namespaces + if daemon.configStore.RemappedRoot != "" { + if hostConfig.Privileged { + return warnings, fmt.Errorf("Privileged mode is incompatible with user namespaces.") + } + if hostConfig.NetworkMode.IsHost() || hostConfig.NetworkMode.IsContainer() { + return warnings, fmt.Errorf("Cannot share the host or a container's network namespace when user namespaces are enabled.") + } + if hostConfig.PidMode.IsHost() { + return warnings, fmt.Errorf("Cannot share the host PID namespace when user namespaces are enabled.") + } + if hostConfig.IpcMode.IsContainer() { + return warnings, fmt.Errorf("Cannot share a container's IPC namespace when user namespaces are enabled.") + } + if hostConfig.ReadonlyRootfs { + return warnings, fmt.Errorf("Cannot use the --read-only option when user namespaces are enabled.") + } } return warnings, nil } From cae7fb6550bd37c68f2b416bede025a8e324a82f Mon Sep 17 00:00:00 2001 From: Phil Estes Date: Fri, 8 Jan 2016 10:07:46 -0500 Subject: [PATCH 3/3] Add daemon documentation on user namespaces feature Remove the experimental docs for user namespaces and add similar content to the `docker daemon` command documentation. Docker-DCO-1.1-Signed-off-by: Phil Estes (github: estesp) Upstream-commit: cc63db4fd19f99372a84cc97a87a023fa9193734 Component: engine --- .../docs/reference/commandline/daemon.md | 128 ++++++++++++++++++ components/engine/experimental/README.md | 2 +- components/engine/experimental/userns.md | 119 ---------------- components/engine/man/docker-daemon.8.md | 4 + 4 files changed, 133 insertions(+), 120 deletions(-) delete mode 100644 components/engine/experimental/userns.md diff --git a/components/engine/docs/reference/commandline/daemon.md b/components/engine/docs/reference/commandline/daemon.md index 38ed09bc75..cc74b17af0 100644 --- a/components/engine/docs/reference/commandline/daemon.md +++ b/components/engine/docs/reference/commandline/daemon.md @@ -62,6 +62,7 @@ weight = -1 --tlscert="~/.docker/cert.pem" Path to TLS certificate file --tlskey="~/.docker/key.pem" Path to TLS key file --tlsverify Use TLS and verify the remote + --userns-remap="default" Enable user namespace remapping --userland-proxy=true Use userland proxy for loopback traffic Options with [] may be specified multiple times. @@ -628,6 +629,133 @@ For information about how to create an authorization plugin, see [authorization plugin](../../extend/authorization.md) section in the Docker extend section of this documentation. +## Daemon user namespace options + +The Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling +a process, and therefore a container, to have a unique range of user and +group IDs which are outside the traditional user and group range utilized by +the host system. Potentially the most important security improvement is that, +by default, container processes running as the `root` user will have expected +administrative privilege (with some restrictions) inside the container but will +effectively be mapped to an unprivileged `uid` on the host. + +When user namespace support is enabled, Docker creates a single daemon-wide mapping +for all containers running on the same engine instance. The mappings will +utilize the existing subordinate user and group ID feature available on all modern +Linux distributions. +The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and +[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be +read for the user, and optional group, specified to the `--userns-remap` +parameter. If you do not wish to specify your own user and/or group, you can +provide `default` as the value to this flag, and a user will be created on your behalf +and provided subordinate uid and gid ranges. This default user will be named +`dockremap`, and entries will be created for it in `/etc/passwd` and +`/etc/group` using your distro's standard user and group creation tools. + +> **Note**: The single mapping per-daemon restriction is in place for now +> because Docker shares image layers from its local cache across all +> containers running on the engine instance. Since file ownership must be +> the same for all containers sharing the same layer content, the decision +> was made to map the file ownership on `docker pull` to the daemon's user and +> group mappings so that there is no delay for running containers once the +> content is downloaded. This design preserves the same performance for `docker +> pull`, `docker push`, and container startup as users expect with +> user namespaces disabled. + +### Starting the daemon with user namespaces enabled + +To enable user namespace support, start the daemon with the +`--userns-remap` flag, which accepts values in the following formats: + + - uid + - uid:gid + - username + - username:groupname + +If numeric IDs are provided, translation back to valid user or group names +will occur so that the subordinate uid and gid information can be read, given +these resources are name-based, not id-based. If the numeric ID information +provided does not exist as entries in `/etc/passwd` or `/etc/group`, daemon +startup will fail with an error message. + +*Example: starting with default Docker user management:* + +``` + $ docker daemon --userns-remap=default +``` +When `default` is provided, Docker will create - or find the existing - user and group +named `dockremap`. If the user is created, and the Linux distribution has +appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated +with a contiguous 65536 length range of subordinate user and group IDs, starting +at an offset based on prior entries in those files. For example, Ubuntu will +create the following range, based on an existing user named `user1` already owning +the first 65536 range: + +``` + $ cat /etc/subuid + user1:100000:65536 + dockremap:165536:65536 +``` + +> **Note:** On a fresh Fedora install, we had to `touch` the +> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users +> were created. Once these files existed, range assignment on user creation +> worked properly. + +If you have a preferred/self-managed user with subordinate ID mappings already +configured, you can provide that username or uid to the `--userns-remap` flag. +If you have a group that doesn't match the username, you may provide the `gid` +or group name as well; otherwise the username will be used as the group name +when querying the system for the subordinate group ID range. + +### Detailed information on `subuid`/`subgid` ranges + +Given potential advanced use of the subordinate ID ranges by power users, the +following paragraphs define how the Docker daemon currently uses the range entries +found within the subordinate range files. + +The simplest case is that only one contiguous range is defined for the +provided user or group. In this case, Docker will use that entire contiguous +range for the mapping of host uids and gids to the container process. This +means that the first ID in the range will be the remapped root user, and the +IDs above that initial ID will map host ID 1 through the end of the range. + +From the example `/etc/subid` content shown above, the remapped root +user would be uid 165536. + +If the system administrator has set up multiple ranges for a single user or +group, the Docker daemon will read all the available ranges and use the +following algorithm to create the mapping ranges: + +1. The range segments found for the particular user will be sorted by *start ID* ascending. +2. Map segments will be created from each range in increasing value with a length matching the length of each segment. Therefore the range segment with the lowest numeric starting value will be equal to the remapped root, and continue up through host uid/gid equal to the range segment length. As an example, if the lowest segment starts at ID 1000 and has a length of 100, then a map of 1000 -> 0 (the remapped root) up through 1100 -> 100 will be created from this segment. If the next segment starts at ID 10000, then the next map will start with mapping 10000 -> 101 up to the length of this second segment. This will continue until no more segments are found in the subordinate files for this user. +3. If more than five range segments exist for a single user, only the first five will be utilized, matching the kernel's limitation of only five entries in `/proc/self/uid_map` and `proc/self/gid_map`. + +### User namespace known restrictions + +The following standard Docker features are currently incompatible when +running a Docker daemon with user namespaces enabled: + + - sharing PID or NET namespaces with the host (`--pid=host` or `--net=host`) + - sharing a network namespace with an existing container (`--net=container:*other*`) + - sharing an IPC namespace with an existing container (`--ipc=container:*other*`) + - A `--readonly` container filesystem (this is a Linux kernel restriction against remounting with modified flags of a currently mounted filesystem when inside a user namespace) + - external (volume or graph) drivers which are unaware/incapable of using daemon user mappings + - Using `--privileged` mode flag on `docker run` + +In general, user namespaces are an advanced feature and will require +coordination with other capabilities. For example, if volumes are mounted from +the host, file ownership will have to be pre-arranged if the user or +administrator wishes the containers to have expected access to the volume +contents. + +Finally, while the `root` user inside a user namespaced container process has +many of the expected admin privileges that go along with being the superuser, the +Linux kernel has restrictions based on internal knowledge that this is a user namespaced +process. The most notable restriction that we are aware of at this time is the +inability to use `mknod`. Permission will be denied for device creation even as +container `root` inside a user namespace. + ## Miscellaneous options IP masquerading uses address translation to allow containers without a public diff --git a/components/engine/experimental/README.md b/components/engine/experimental/README.md index d2eff37d8d..659780e3fa 100644 --- a/components/engine/experimental/README.md +++ b/components/engine/experimental/README.md @@ -72,7 +72,7 @@ to build a Docker binary with the experimental features enabled: ## Current experimental features * [External graphdriver plugins](plugins_graphdriver.md) - * [User namespaces](userns.md) + * The user namespaces feature has graduated from experimental. ## How to comment on an experimental feature diff --git a/components/engine/experimental/userns.md b/components/engine/experimental/userns.md deleted file mode 100644 index cb713f7d65..0000000000 --- a/components/engine/experimental/userns.md +++ /dev/null @@ -1,119 +0,0 @@ -# Experimental: User namespace support - -Linux kernel [user namespace support](http://man7.org/linux/man-pages/man7/user_namespaces.7.html) provides additional security by enabling -a process--and therefore a container--to have a unique range of user and -group IDs which are outside the traditional user and group range utilized by -the host system. Potentially the most important security improvement is that, -by default, container processes running as the `root` user will have expected -administrative privilege (with some restrictions) inside the container but will -effectively be mapped to an unprivileged `uid` on the host. - -In this experimental phase, the Docker daemon creates a single daemon-wide mapping -for all containers running on the same engine instance. The mappings will -utilize the existing subordinate user and group ID feature available on all modern -Linux distributions. -The [`/etc/subuid`](http://man7.org/linux/man-pages/man5/subuid.5.html) and -[`/etc/subgid`](http://man7.org/linux/man-pages/man5/subgid.5.html) files will be -read for the user, and optional group, specified to the `--userns-remap` -parameter. If you do not wish to specify your own user and/or group, you can -provide `default` as the value to this flag, and a user will be created on your behalf -and provided subordinate uid and gid ranges. This default user will be named -`dockremap`, and entries will be created for it in `/etc/passwd` and -`/etc/group` using your distro's standard user and group creation tools. - -> **Note**: The single mapping per-daemon restriction exists for this experimental -> phase because Docker shares image layers from its local cache across all -> containers running on the engine instance. Since file ownership must be -> the same for all containers sharing the same layer content, the decision -> was made to map the file ownership on `docker pull` to the daemon's user and -> group mappings so that there is no delay for running containers once the -> content is downloaded--exactly the same performance characteristics as with -> user namespaces disabled. - -## Starting the daemon with user namespaces enabled -To enable this experimental user namespace support for a Docker daemon instance, -start the daemon with the aforementioned `--userns-remap` flag, which accepts -values in the following formats: - - - uid - - uid:gid - - username - - username:groupname - -If numeric IDs are provided, translation back to valid user or group names -will occur so that the subordinate uid and gid information can be read, given -these resources are name-based, not id-based. If the numeric ID information -provided does not exist as entries in `/etc/passwd` or `/etc/group`, daemon -startup will fail with an error message. - -*An example: starting with default Docker user management:* - -``` - $ docker daemon --userns-remap=default -``` -In this case, Docker will create--or find the existing--user and group -named `dockremap`. If the user is created, and the Linux distribution has -appropriate support, the `/etc/subuid` and `/etc/subgid` files will be populated -with a contiguous 65536 length range of subordinate user and group IDs, starting -at an offset based on prior entries in those files. For example, Ubuntu will -create the following range, based on an existing user already having the first -65536 range: - -``` - $ cat /etc/subuid - user1:100000:65536 - dockremap:165536:65536 -``` - -> **Note:** On a fresh Fedora install, we found that we had to `touch` the -> `/etc/subuid` and `/etc/subgid` files to have ranges assigned when users -> were created. Once these files existed, range assignment on user creation -> worked properly. - -If you have a preferred/self-managed user with subordinate ID mappings already -configured, you can provide that username or uid to the `--userns-remap` flag. -If you have a group that doesn't match the username, you may provide the `gid` -or group name as well; otherwise the username will be used as the group name -when querying the system for the subordinate group ID range. - -## Detailed information on `subuid`/`subgid` ranges - -Given there may be advanced use of the subordinate ID ranges by power users, we will -describe how the Docker daemon uses the range entries within these files under the -current experimental user namespace support. - -The simplest case exists where only one contiguous range is defined for the -provided user or group. In this case, Docker will use that entire contiguous -range for the mapping of host uids and gids to the container process. This -means that the first ID in the range will be the remapped root user, and the -IDs above that initial ID will map host ID 1 through the end of the range. - -From the example `/etc/subid` content shown above, that means the remapped root -user would be uid 165536. - -If the system administrator has set up multiple ranges for a single user or -group, the Docker daemon will read all the available ranges and use the -following algorithm to create the mapping ranges: - -1. The ranges will be sorted by *start ID* ascending -2. Maps will be created from each range with where the host ID will increment starting at 0 for the first range, 0+*range1* length for the second, and so on. This means that the lowest range start ID will be the remapped root, and all further ranges will map IDs from 1 through the uid or gid that equals the sum of all range lengths. -3. Ranges segments above five will be ignored as the kernel ignores any ID maps after five (in `/proc/self/{u,g}id_map`) - -## User namespace known restrictions - -The following standard Docker features are currently incompatible when -running a Docker daemon with experimental user namespaces enabled: - - - sharing namespaces with the host (--pid=host, --net=host, etc.) - - sharing namespaces with other containers (--net=container:*other*) - - A `--readonly` container filesystem (a Linux kernel restriction on remount with new flags of a currently mounted filesystem when inside a user namespace) - - external (volume/graph) drivers which are unaware/incapable of using daemon user mappings - - Using `--privileged` mode containers - - volume use without pre-arranging proper file ownership in mounted volumes - -Additionally, while the `root` user inside a user namespaced container -process has many of the privileges of the administrative root user, the -following operations will fail: - - - Use of `mknod` - permission is denied for device creation by the container root - - others will be listed here when fully tested diff --git a/components/engine/man/docker-daemon.8.md b/components/engine/man/docker-daemon.8.md index 8001c72d65..8e4a3acc0b 100644 --- a/components/engine/man/docker-daemon.8.md +++ b/components/engine/man/docker-daemon.8.md @@ -53,6 +53,7 @@ docker-daemon - Enable daemon mode [**--tlskey**[=*~/.docker/key.pem*]] [**--tlsverify**] [**--userland-proxy**[=*true*]] +[**--userns-remap**[=*default*]] # DESCRIPTION **docker** has two distinct functions. It is used for starting the Docker @@ -223,6 +224,9 @@ unix://[/path/to/socket] to use. **--userland-proxy**=*true*|*false* Rely on a userland proxy implementation for inter-container and outside-to-container loopback communications. Default is true. +**--userns-remap**=*default*|*uid:gid*|*user:group*|*user*|*uid* + Enable user namespaces for containers on the daemon. Specifying "default" will cause a new user and group to be created to handle UID and GID range remapping for the user namespace mappings used for contained processes. Specifying a user (or uid) and optionally a group (or gid) will cause the daemon to lookup the user and group's subordinate ID ranges for use as the user namespace mappings for contained processes. + # STORAGE DRIVER OPTIONS Docker uses storage backends (known as "graphdrivers" in the Docker