diff --git a/components/engine/daemon/stats_linux.go b/components/engine/daemon/stats_linux.go index 146f09491d..9db3f17fe7 100644 --- a/components/engine/daemon/stats_linux.go +++ b/components/engine/daemon/stats_linux.go @@ -52,10 +52,10 @@ func convertStatsToAPITypes(ls *libcontainer.Stats) *types.Stats { } mem := cs.MemoryStats s.MemoryStats = types.MemoryStats{ - Usage: mem.Usage, - MaxUsage: mem.MaxUsage, + Usage: mem.Usage.Usage, + MaxUsage: mem.Usage.MaxUsage, Stats: mem.Stats, - Failcnt: mem.Failcnt, + Failcnt: mem.Usage.Failcnt, } } diff --git a/components/engine/hack/vendor.sh b/components/engine/hack/vendor.sh index 182f76c2be..faf08a6c41 100755 --- a/components/engine/hack/vendor.sh +++ b/components/engine/hack/vendor.sh @@ -30,7 +30,7 @@ clone git github.com/hashicorp/consul v0.5.2 # get distribution packages clone git github.com/docker/distribution b9eeb328080d367dbde850ec6e94f1e4ac2b5efe -clone git github.com/docker/libcontainer v2.1.1 +clone git github.com/docker/libcontainer v2.2.1 # libcontainer deps (see src/github.com/docker/libcontainer/update-vendor.sh) clone git github.com/coreos/go-systemd v2 clone git github.com/godbus/dbus v2 diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/README.md b/components/engine/vendor/src/github.com/docker/libcontainer/README.md index 8072bde40f..26bb82dde1 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/README.md +++ b/components/engine/vendor/src/github.com/docker/libcontainer/README.md @@ -1,4 +1,4 @@ -## libcontainer - reference implementation for containers [![Build Status](https://jenkins.dockerproject.com/buildStatus/icon?job=Libcontainer Master)](https://jenkins.dockerproject.com/job/Libcontainer%20Master/) +## libcontainer - reference implementation for containers [![Build Status](https://jenkins.dockerproject.org/buildStatus/icon?job=Libcontainer%20Master)](https://jenkins.dockerproject.org/job/Libcontainer%20Master/) Libcontainer provides a native Go implementation for creating containers with namespaces, cgroups, capabilities, and filesystem access controls. diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/SPEC.md b/components/engine/vendor/src/github.com/docker/libcontainer/SPEC.md index 5d37fe935a..430a31fe08 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/SPEC.md +++ b/components/engine/vendor/src/github.com/docker/libcontainer/SPEC.md @@ -47,14 +47,14 @@ unmount all the mounts that were setup within that namespace. For a container to execute properly there are certain filesystems that are required to be mounted within the rootfs that the runtime will setup. -| Path | Type | Flags | Data | -| ----------- | ------ | -------------------------------------- | --------------------------------------- | -| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | | -| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 | -| /dev/shm | shm | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k | -| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | | -| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid5 | -| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | | +| Path | Type | Flags | Data | +| ----------- | ------ | -------------------------------------- | ---------------------------------------- | +| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | | +| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 | +| /dev/shm | tmpfs | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k | +| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | | +| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid=5 | +| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | | After a container's filesystems are mounted within the newly created diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go index 8a68618e09..b272182b99 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go @@ -30,6 +30,7 @@ var ( "freezer": &FreezerGroup{}, } CgroupProcesses = "cgroup.procs" + HugePageSizes, _ = cgroups.GetHugePageSize() ) type subsystem interface { @@ -44,6 +45,7 @@ type subsystem interface { } type Manager struct { + mu sync.Mutex Cgroups *configs.Cgroup Paths map[string]string } @@ -82,7 +84,6 @@ type data struct { } func (m *Manager) Apply(pid int) error { - if m.Cgroups == nil { return nil } @@ -128,14 +129,25 @@ func (m *Manager) Apply(pid int) error { } func (m *Manager) Destroy() error { - return cgroups.RemovePaths(m.Paths) + m.mu.Lock() + defer m.mu.Unlock() + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil } func (m *Manager) GetPaths() map[string]string { - return m.Paths + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths } func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() stats := cgroups.NewStats() for name, path := range m.Paths { sys, ok := subsystems[name] diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/hugetlb.go b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/hugetlb.go index 277e87fe89..4b82649334 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/hugetlb.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/hugetlb.go @@ -3,6 +3,10 @@ package fs import ( + "fmt" + "strconv" + "strings" + "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" ) @@ -11,14 +15,25 @@ type HugetlbGroup struct { } func (s *HugetlbGroup) Apply(d *data) error { - // we just want to join this group even though we don't set anything - if _, err := d.join("hugetlb"); err != nil && !cgroups.IsNotFound(err) { + dir, err := d.join("hugetlb") + if err != nil && !cgroups.IsNotFound(err) { return err } + + if err := s.Set(dir, d.c); err != nil { + return err + } + return nil } func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error { + for _, hugetlb := range cgroup.HugetlbLimit { + if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.Itoa(hugetlb.Limit)); err != nil { + return err + } + } + return nil } @@ -27,5 +42,31 @@ func (s *HugetlbGroup) Remove(d *data) error { } func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error { + hugetlbStats := cgroups.HugetlbStats{} + for _, pageSize := range HugePageSizes { + usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".") + value, err := getCgroupParamUint(path, usage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", usage, err) + } + hugetlbStats.Usage = value + + maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".") + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + hugetlbStats.MaxUsage = value + + failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".") + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + return fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + hugetlbStats.Failcnt = value + + stats.HugetlbStats[pageSize] = hugetlbStats + } + return nil } diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go index bff7cafd76..dccdee6953 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/fs/memory.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "strconv" + "strings" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" @@ -45,12 +46,6 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { return err } } - // By default, MemorySwap is set to twice the size of Memory. - if cgroup.MemorySwap == 0 && cgroup.Memory != 0 { - if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Memory*2, 10)); err != nil { - return err - } - } if cgroup.MemorySwap > 0 { if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.MemorySwap, 10)); err != nil { return err @@ -67,6 +62,11 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { return err } } + if cgroup.MemorySwappiness >= 0 && cgroup.MemorySwappiness <= 100 { + if err := writeFile(path, "memory.swappiness", strconv.FormatInt(cgroup.MemorySwappiness, 10)); err != nil { + return err + } + } return nil } @@ -94,24 +94,62 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { } stats.MemoryStats.Stats[t] = v } - - // Set memory usage and max historical usage. - value, err := getCgroupParamUint(path, "memory.usage_in_bytes") - if err != nil { - return fmt.Errorf("failed to parse memory.usage_in_bytes - %v", err) - } - stats.MemoryStats.Usage = value stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] - value, err = getCgroupParamUint(path, "memory.max_usage_in_bytes") + + memoryUsage, err := getMemoryData(path, "") if err != nil { - return fmt.Errorf("failed to parse memory.max_usage_in_bytes - %v", err) + return err } - stats.MemoryStats.MaxUsage = value - value, err = getCgroupParamUint(path, "memory.failcnt") + stats.MemoryStats.Usage = memoryUsage + swapUsage, err := getMemoryData(path, "memsw") if err != nil { - return fmt.Errorf("failed to parse memory.failcnt - %v", err) + return err } - stats.MemoryStats.Failcnt = value + stats.MemoryStats.SwapUsage = swapUsage + kernelUsage, err := getMemoryData(path, "kmem") + if err != nil { + return err + } + stats.MemoryStats.KernelUsage = kernelUsage return nil } + +func getMemoryData(path, name string) (cgroups.MemoryData, error) { + memoryData := cgroups.MemoryData{} + + moduleName := "memory" + if name != "" { + moduleName = strings.Join([]string{"memory", name}, ".") + } + usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".") + maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".") + failcnt := strings.Join([]string{moduleName, "failcnt"}, ".") + + value, err := getCgroupParamUint(path, usage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err) + } + memoryData.Usage = value + value, err = getCgroupParamUint(path, maxUsage) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err) + } + memoryData.MaxUsage = value + value, err = getCgroupParamUint(path, failcnt) + if err != nil { + if moduleName != "memory" && os.IsNotExist(err) { + return cgroups.MemoryData{}, nil + } + return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err) + } + memoryData.Failcnt = value + + return memoryData, nil +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/stats.go b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/stats.go index 6d81a12e10..bda32b20c3 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/stats.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/stats.go @@ -32,18 +32,21 @@ type CpuStats struct { ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` } +type MemoryData struct { + Usage uint64 `json:"usage,omitempty"` + MaxUsage uint64 `json:"max_usage,omitempty"` + Failcnt uint64 `json:"failcnt"` +} type MemoryStats struct { - // current res_counter usage for memory - Usage uint64 `json:"usage,omitempty"` // memory used for cache Cache uint64 `json:"cache,omitempty"` - // maximum usage ever recorded. - MaxUsage uint64 `json:"max_usage,omitempty"` - // TODO(vishh): Export these as stronger types. - // all the stats exported via memory.stat. - Stats map[string]uint64 `json:"stats,omitempty"` - // number of times memory usage hits limits. - Failcnt uint64 `json:"failcnt"` + // usage of memory + Usage MemoryData `json:"usage,omitempty"` + // usage of memory + swap + SwapUsage MemoryData `json:"swap_usage,omitempty"` + // usafe of kernel memory + KernelUsage MemoryData `json:"kernel_usage,omitempty"` + Stats map[string]uint64 `json:"stats,omitempty"` } type BlkioStatEntry struct { @@ -65,13 +68,25 @@ type BlkioStats struct { SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` } +type HugetlbStats struct { + // current res_counter usage for hugetlb + Usage uint64 `json:"usage,omitempty"` + // maximum usage ever recorded. + MaxUsage uint64 `json:"max_usage,omitempty"` + // number of times htgetlb usage allocation failure. + Failcnt uint64 `json:"failcnt"` +} + type Stats struct { CpuStats CpuStats `json:"cpu_stats,omitempty"` MemoryStats MemoryStats `json:"memory_stats,omitempty"` BlkioStats BlkioStats `json:"blkio_stats,omitempty"` + // the map is in the format "size of hugepage: stats of the hugepage" + HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` } func NewStats() *Stats { memoryStats := MemoryStats{Stats: make(map[string]uint64)} - return &Stats{MemoryStats: memoryStats} + hugetlbStats := make(map[string]HugetlbStats) + return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats} } diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go index c2782285e3..fd7f680b50 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go @@ -20,6 +20,7 @@ import ( ) type Manager struct { + mu sync.Mutex Cgroups *configs.Cgroup Paths map[string]string } @@ -222,6 +223,9 @@ func (m *Manager) Apply(pid int) error { return err } + if err := joinHugetlb(c, pid); err != nil { + return err + } // FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem // using that (at least on systemd 208, see https://github.com/docker/libcontainer/pull/354), // so use fs work around for now. @@ -253,11 +257,21 @@ func (m *Manager) Apply(pid int) error { } func (m *Manager) Destroy() error { - return cgroups.RemovePaths(m.Paths) + m.mu.Lock() + defer m.mu.Unlock() + theConn.StopUnit(getUnitName(m.Cgroups), "replace") + if err := cgroups.RemovePaths(m.Paths); err != nil { + return err + } + m.Paths = make(map[string]string) + return nil } func (m *Manager) GetPaths() map[string]string { - return m.Paths + m.mu.Lock() + paths := m.Paths + m.mu.Unlock() + return paths } func writeFile(dir, file, data string) error { @@ -391,6 +405,8 @@ func (m *Manager) GetPids() ([]int, error) { } func (m *Manager) GetStats() (*cgroups.Stats, error) { + m.mu.Lock() + defer m.mu.Unlock() stats := cgroups.NewStats() for name, path := range m.Paths { sys, ok := subsystems[name] @@ -453,14 +469,8 @@ func joinMemory(c *configs.Cgroup, pid int) error { } // -1 disables memoryswap - if c.Memory != 0 && c.MemorySwap >= 0 { - memorySwap := c.MemorySwap - - if memorySwap == 0 { - // By default, MemorySwap is set to twice the size of RAM. - memorySwap = c.Memory * 2 - } - err = writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(memorySwap, 10)) + if c.MemorySwap > 0 { + err = writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.MemorySwap, 10)) if err != nil { return err } @@ -472,6 +482,12 @@ func joinMemory(c *configs.Cgroup, pid int) error { return err } } + if c.MemorySwappiness >= 0 && c.MemorySwappiness <= 100 { + err = writeFile(path, "memory.swappiness", strconv.FormatInt(c.MemorySwappiness, 10)) + if err != nil { + return err + } + } return nil } @@ -526,3 +542,13 @@ func joinBlkio(c *configs.Cgroup, pid int) error { return nil } + +func joinHugetlb(c *configs.Cgroup, pid int) error { + path, err := join(c, "hugetlb", pid) + if err != nil && !cgroups.IsNotFound(err) { + return err + } + + hugetlb := subsystems["hugetlb"] + return hugetlb.Set(path, c) +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/utils.go b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/utils.go index 5486883996..8ab80a7f2c 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/utils.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/cgroups/utils.go @@ -14,24 +14,28 @@ import ( "time" "github.com/docker/docker/pkg/mount" + "github.com/docker/docker/pkg/units" ) // https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt func FindCgroupMountpoint(subsystem string) (string, error) { - mounts, err := mount.GetMounts() + f, err := os.Open("/proc/self/mountinfo") if err != nil { return "", err } - - for _, mount := range mounts { - if mount.Fstype == "cgroup" { - for _, opt := range strings.Split(mount.VfsOpts, ",") { - if opt == subsystem { - return mount.Mountpoint, nil - } + scanner := bufio.NewScanner(f) + for scanner.Scan() { + txt := scanner.Text() + fields := strings.Split(txt, " ") + for _, opt := range strings.Split(fields[len(fields)-1], ",") { + if opt == subsystem { + return fields[4], nil } } } + if err := scanner.Err(); err != nil { + return "", err + } return "", NewNotFoundError(subsystem) } @@ -238,3 +242,23 @@ func RemovePaths(paths map[string]string) (err error) { } return fmt.Errorf("Failed to remove paths: %s", paths) } + +func GetHugePageSize() ([]string, error) { + var pageSizes []string + sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"} + files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages") + if err != nil { + return pageSizes, err + } + for _, st := range files { + nameArray := strings.Split(st.Name(), "-") + pageSize, err := units.RAMInBytes(nameArray[1]) + if err != nil { + return []string{}, err + } + sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList) + pageSizes = append(pageSizes, sizeString) + } + + return pageSizes, nil +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/configs/cgroup.go b/components/engine/vendor/src/github.com/docker/libcontainer/configs/cgroup.go index 55a81ded2f..140b530d66 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/configs/cgroup.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/configs/cgroup.go @@ -78,12 +78,18 @@ type Cgroup struct { // set the freeze value for the process Freezer FreezerState `json:"freezer"` + // Hugetlb limit (in bytes) + HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"` + // Parent slice to use for systemd TODO: remove in favor or parent Slice string `json:"slice"` // Whether to disable OOM Killer OomKillDisable bool `json:"oom_kill_disable"` + // Tuning swappiness behaviour per cgroup + MemorySwappiness int64 `json:"memory_swappiness"` + // Set priority of network traffic for container NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"` diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/configs/config.go b/components/engine/vendor/src/github.com/docker/libcontainer/configs/config.go index 293af0a9b2..04ea91ffd0 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/configs/config.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/configs/config.go @@ -13,6 +13,40 @@ type IDMap struct { Size int `json:"size"` } +type Seccomp struct { + Syscalls []*Syscall `json:"syscalls"` +} + +type Action int + +const ( + Kill Action = iota - 3 + Trap + Allow +) + +type Operator int + +const ( + EqualTo Operator = iota + NotEqualTo + GreatherThan + LessThan + MaskEqualTo +) + +type Arg struct { + Index int `json:"index"` + Value uint32 `json:"value"` + Op Operator `json:"op"` +} + +type Syscall struct { + Value int `json:"value"` + Action Action `json:"action"` + Args []*Arg `json:"args"` +} + // TODO Windows. Many of these fields should be factored out into those parts // which are common across platforms, and those which are platform specific. @@ -85,7 +119,7 @@ type Config struct { // AdditionalGroups specifies the gids that should be added to supplementary groups // in addition to those that the user belongs to. - AdditionalGroups []int `json:"additional_groups"` + AdditionalGroups []string `json:"additional_groups"` // UidMappings is an array of User ID mappings for User Namespaces UidMappings []IDMap `json:"uid_mappings"` @@ -104,4 +138,9 @@ type Config struct { // SystemProperties is a map of properties and their values. It is the equivalent of using // sysctl -w my.property.name value in Linux. SystemProperties map[string]string `json:"system_properties"` + + // Seccomp allows actions to be taken whenever a syscall is made within the container. + // By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno + // can be specified on a per syscall basis. + Seccomp *Seccomp `json:"seccomp"` } diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/configs/config_linux.go b/components/engine/vendor/src/github.com/docker/libcontainer/configs/config_unix.go similarity index 98% rename from components/engine/vendor/src/github.com/docker/libcontainer/configs/config_linux.go rename to components/engine/vendor/src/github.com/docker/libcontainer/configs/config_unix.go index 97544b2abc..89f580bfa3 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/configs/config_linux.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/configs/config_unix.go @@ -1,3 +1,5 @@ +// +build freebsd linux + package configs import "fmt" diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/configs/device_defaults.go b/components/engine/vendor/src/github.com/docker/libcontainer/configs/device_defaults.go index b0966b9754..0ce040fd34 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/configs/device_defaults.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/configs/device_defaults.go @@ -1,4 +1,4 @@ -// +build linux +// +build linux freebsd package configs diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/configs/hugepage_limit.go b/components/engine/vendor/src/github.com/docker/libcontainer/configs/hugepage_limit.go new file mode 100644 index 0000000000..1cce8d09be --- /dev/null +++ b/components/engine/vendor/src/github.com/docker/libcontainer/configs/hugepage_limit.go @@ -0,0 +1,9 @@ +package configs + +type HugepageLimit struct { + // which type of hugepage to limit. + Pagesize string `json:"page_size"` + + // usage limit for hugepage. + Limit int `json:"limit"` +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/configs/namespaces_linux.go b/components/engine/vendor/src/github.com/docker/libcontainer/configs/namespaces_unix.go similarity index 98% rename from components/engine/vendor/src/github.com/docker/libcontainer/configs/namespaces_linux.go rename to components/engine/vendor/src/github.com/docker/libcontainer/configs/namespaces_unix.go index c937b49ff4..7bc9085468 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/configs/namespaces_linux.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/configs/namespaces_unix.go @@ -1,4 +1,4 @@ -// +build linux +// +build linux freebsd package configs diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/console_freebsd.go b/components/engine/vendor/src/github.com/docker/libcontainer/console_freebsd.go new file mode 100644 index 0000000000..4d20b8da40 --- /dev/null +++ b/components/engine/vendor/src/github.com/docker/libcontainer/console_freebsd.go @@ -0,0 +1,13 @@ +// +build freebsd + +package libcontainer + +import ( + "errors" +) + +// newConsole returns an initalized console that can be used within a container by copying bytes +// from the master side to the slave that is attached as the tty for the container's init process. +func newConsole(uid, gid int) (Console, error) { + return nil, errors.New("libcontainer console is not supported on FreeBSD") +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/console_linux.go b/components/engine/vendor/src/github.com/docker/libcontainer/console_linux.go index 5eaf03169b..e35ac529db 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/console_linux.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/console_linux.go @@ -92,7 +92,7 @@ func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error { return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "") } -// dupStdio opens the slavePath for the console and dup2s the fds to the current +// dupStdio opens the slavePath for the console and dups the fds to the current // processes stdio, fd 0,1,2. func (c *linuxConsole) dupStdio() error { slave, err := c.open(syscall.O_RDWR) @@ -101,7 +101,7 @@ func (c *linuxConsole) dupStdio() error { } fd := int(slave.Fd()) for _, i := range []int{0, 1, 2} { - if err := syscall.Dup2(fd, i); err != nil { + if err := syscall.Dup3(fd, i, 0); err != nil { return err } } diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/devices/devices_linux.go b/components/engine/vendor/src/github.com/docker/libcontainer/devices/devices_unix.go similarity index 98% rename from components/engine/vendor/src/github.com/docker/libcontainer/devices/devices_linux.go rename to components/engine/vendor/src/github.com/docker/libcontainer/devices/devices_unix.go index 7a11eaf11b..a4df06c3a8 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/devices/devices_linux.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/devices/devices_unix.go @@ -1,3 +1,5 @@ +// +build linux freebsd + package devices import ( diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/devices/number.go b/components/engine/vendor/src/github.com/docker/libcontainer/devices/number.go index e9c3e516a1..885b6e5dd9 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/devices/number.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/devices/number.go @@ -1,4 +1,4 @@ -// +build linux +// +build linux freebsd package devices diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/init_linux.go b/components/engine/vendor/src/github.com/docker/libcontainer/init_linux.go index 1771fd1930..f36e354f2e 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/init_linux.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/init_linux.go @@ -13,6 +13,7 @@ import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/netlink" + "github.com/docker/libcontainer/seccomp" "github.com/docker/libcontainer/system" "github.com/docker/libcontainer/user" "github.com/docker/libcontainer/utils" @@ -176,10 +177,20 @@ func setupUser(config *initConfig) error { if err != nil { return err } - suppGroups := append(execUser.Sgids, config.Config.AdditionalGroups...) + + var addGroups []int + if len(config.Config.AdditionalGroups) > 0 { + addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath) + if err != nil { + return err + } + } + + suppGroups := append(execUser.Sgids, addGroups...) if err := syscall.Setgroups(suppGroups); err != nil { return err } + if err := system.Setgid(execUser.Gid); err != nil { return err } @@ -259,3 +270,61 @@ func killCgroupProcesses(m cgroups.Manager) error { } return nil } + +func finalizeSeccomp(config *initConfig) error { + if config.Config.Seccomp == nil { + return nil + } + context := seccomp.New() + for _, s := range config.Config.Seccomp.Syscalls { + ss := &seccomp.Syscall{ + Value: uint32(s.Value), + Action: seccompAction(s.Action), + } + if len(s.Args) > 0 { + ss.Args = seccompArgs(s.Args) + } + context.Add(ss) + } + return context.Load() +} + +func seccompAction(a configs.Action) seccomp.Action { + switch a { + case configs.Kill: + return seccomp.Kill + case configs.Trap: + return seccomp.Trap + case configs.Allow: + return seccomp.Allow + } + return seccomp.Error(syscall.Errno(int(a))) +} + +func seccompArgs(args []*configs.Arg) seccomp.Args { + var sa []seccomp.Arg + for _, a := range args { + sa = append(sa, seccomp.Arg{ + Index: uint32(a.Index), + Op: seccompOperator(a.Op), + Value: uint(a.Value), + }) + } + return seccomp.Args{sa} +} + +func seccompOperator(o configs.Operator) seccomp.Operator { + switch o { + case configs.EqualTo: + return seccomp.EqualTo + case configs.NotEqualTo: + return seccomp.NotEqualTo + case configs.GreatherThan: + return seccomp.GreatherThan + case configs.LessThan: + return seccomp.LessThan + case configs.MaskEqualTo: + return seccomp.MaskEqualTo + } + return 0 +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_arm.go b/components/engine/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_armppc64.go similarity index 70% rename from components/engine/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_arm.go rename to components/engine/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_armppc64.go index 779e58a771..965e0bfbc7 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_arm.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_armppc64.go @@ -1,3 +1,5 @@ +// +build arm ppc64 ppc64le + package netlink func ifrDataByte(b byte) uint8 { diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_notarm.go b/components/engine/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_notarm.go index f151722a1b..7446279892 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_notarm.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/netlink/netlink_linux_notarm.go @@ -1,4 +1,4 @@ -// +build !arm +// +build !arm,!ppc64,!ppc64le package netlink diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/nsenter/nsexec.c b/components/engine/vendor/src/github.com/docker/libcontainer/nsenter/nsexec.c index d8e45f3cda..d78e1691c6 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/nsenter/nsexec.c +++ b/components/engine/vendor/src/github.com/docker/libcontainer/nsenter/nsexec.c @@ -148,15 +148,15 @@ void nsexec() pr_perror("ioctl TIOCSCTTY failed"); exit(1); } - if (dup2(consolefd, STDIN_FILENO) != STDIN_FILENO) { + if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) { pr_perror("Failed to dup 0"); exit(1); } - if (dup2(consolefd, STDOUT_FILENO) != STDOUT_FILENO) { + if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) { pr_perror("Failed to dup 1"); exit(1); } - if (dup2(consolefd, STDERR_FILENO) != STDERR_FILENO) { + if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) { pr_perror("Failed to dup 2"); exit(1); } diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/rootfs_linux.go b/components/engine/vendor/src/github.com/docker/libcontainer/rootfs_linux.go index 4ddfff1fe2..0b0c3815cb 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/rootfs_linux.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/rootfs_linux.go @@ -272,7 +272,7 @@ func reOpenDevNull(rootfs string) error { } if stat.Rdev == devNullStat.Rdev { // Close and re-open the fd. - if err := syscall.Dup2(int(file.Fd()), fd); err != nil { + if err := syscall.Dup3(int(file.Fd()), fd, 0); err != nil { return err } } diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/bpf.go b/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/bpf.go new file mode 100644 index 0000000000..a4b3bdf7a5 --- /dev/null +++ b/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/bpf.go @@ -0,0 +1,32 @@ +package seccomp + +import "strings" + +type bpfLabel struct { + label string + location uint32 +} + +type bpfLabels []bpfLabel + +// labelIndex returns the index for the label if it exists in the slice. +// if it does not exist in the slice it appends the label lb to the end +// of the slice and returns the index. +func labelIndex(labels *bpfLabels, lb string) uint32 { + var id uint32 + for id = 0; id < uint32(len(*labels)); id++ { + if strings.EqualFold(lb, (*labels)[id].label) { + return id + } + } + *labels = append(*labels, bpfLabel{lb, 0xffffffff}) + return id +} + +func scmpBpfStmt(code uint16, k uint32) sockFilter { + return sockFilter{code, 0, 0, k} +} + +func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter { + return sockFilter{code, jt, jf, k} +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/context.go b/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/context.go new file mode 100644 index 0000000000..c8d4e73144 --- /dev/null +++ b/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/context.go @@ -0,0 +1,144 @@ +package seccomp + +import ( + "errors" + "syscall" +) + +const labelTemplate = "lb-%d-%d" + +// Action is the type of action that will be taken when a +// syscall is performed. +type Action int + +const ( + Kill Action = iota - 3 // Kill the calling process of the syscall. + Trap // Trap and coredump the calling process of the syscall. + Allow // Allow the syscall to be completed. +) + +// Syscall is the specified syscall, action, and any type of arguments +// to filter on. +type Syscall struct { + // Value is the syscall number. + Value uint32 + // Action is the action to perform when the specified syscall is made. + Action Action + // Args are filters that can be specified on the arguments to the syscall. + Args Args +} + +func (s *Syscall) scmpAction() uint32 { + switch s.Action { + case Allow: + return retAllow + case Trap: + return retTrap + case Kill: + return retKill + } + return actionErrno(uint32(s.Action)) +} + +// Arg represents an argument to the syscall with the argument's index, +// the operator to apply when matching, and the argument's value at that time. +type Arg struct { + Index uint32 // index of args which start from zero + Op Operator // operation, such as EQ/NE/GE/LE + Value uint // the value of arg +} + +type Args [][]Arg + +var ( + ErrUnresolvedLabel = errors.New("seccomp: unresolved label") + ErrDuplicateLabel = errors.New("seccomp: duplicate label use") + ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument") +) + +// Error returns an Action that will be used to send the calling +// process the specified errno when the syscall is made. +func Error(code syscall.Errno) Action { + return Action(code) +} + +// New returns a new syscall context for use. +func New() *Context { + return &Context{ + syscalls: make(map[uint32]*Syscall), + } +} + +// Context holds syscalls for the current process to limit the type of +// actions the calling process can make. +type Context struct { + syscalls map[uint32]*Syscall +} + +// Add will add the specified syscall, action, and arguments to the seccomp +// Context. +func (c *Context) Add(s *Syscall) { + c.syscalls[s.Value] = s +} + +// Remove removes the specified syscall configuration from the Context. +func (c *Context) Remove(call uint32) { + delete(c.syscalls, call) +} + +// Load will apply the Context to the calling process makeing any secccomp process changes +// apply after the context is loaded. +func (c *Context) Load() error { + filter, err := c.newFilter() + if err != nil { + return err + } + if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil { + return err + } + prog := newSockFprog(filter) + return prog.set() +} + +func (c *Context) newFilter() ([]sockFilter, error) { + var ( + labels bpfLabels + f = newFilter() + ) + for _, s := range c.syscalls { + f.addSyscall(s, &labels) + } + f.allow() + // process args for the syscalls + for _, s := range c.syscalls { + if err := f.addArguments(s, &labels); err != nil { + return nil, err + } + } + // apply labels for arguments + idx := int32(len(*f) - 1) + for ; idx >= 0; idx-- { + lf := &(*f)[idx] + if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) { + continue + } + rel := int32(lf.jt)<<8 | int32(lf.jf) + if ((jumpJT << 8) | jumpJF) == rel { + if labels[lf.k].location == 0xffffffff { + return nil, ErrUnresolvedLabel + } + lf.k = labels[lf.k].location - uint32(idx+1) + lf.jt = 0 + lf.jf = 0 + } else if ((labelJT << 8) | labelJF) == rel { + if labels[lf.k].location != 0xffffffff { + return nil, ErrDuplicateLabel + } + labels[lf.k].location = uint32(idx) + lf.k = 0 + lf.jt = 0 + lf.jf = 0 + } + } + return *f, nil +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/filter.go b/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/filter.go new file mode 100644 index 0000000000..370cdf087e --- /dev/null +++ b/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/filter.go @@ -0,0 +1,116 @@ +package seccomp + +import ( + "fmt" + "syscall" + "unsafe" +) + +type sockFilter struct { + code uint16 + jt uint8 + jf uint8 + k uint32 +} + +func newFilter() *filter { + var f filter + f = append(f, sockFilter{ + pfLD + syscall.BPF_W + syscall.BPF_ABS, + 0, + 0, + uint32(unsafe.Offsetof(secData.nr)), + }) + return &f +} + +type filter []sockFilter + +func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) { + if len(s.Args) == 0 { + f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())) + } else { + if len(s.Args[0]) > 0 { + lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index) + f.call(s.Value, + scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), + jumpJT, jumpJF)) + } + } +} + +func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error { + for i := 0; len(s.Args) > i; i++ { + if len(s.Args[i]) > 0 { + lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index) + f.label(labels, lb) + f.arg(s.Args[i][0].Index) + } + for j := 0; j < len(s.Args[i]); j++ { + var jf sockFilter + if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 { + lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index) + jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, + labelIndex(labels, lbj), jumpJT, jumpJF) + } else { + jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()) + } + if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil { + return err + } + } + f.allow() + } + return nil +} + +func (f *filter) label(labels *bpfLabels, lb string) { + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF)) +} + +func (f *filter) call(nr uint32, jt sockFilter) { + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1)) + *f = append(*f, jt) +} + +func (f *filter) allow() { + *f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow)) +} + +func (f *filter) deny() { + *f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap)) +} + +func (f *filter) arg(index uint32) { + arg(f, index) +} + +func (f *filter) op(operation Operator, v uint, jf sockFilter) error { + switch operation { + case EqualTo: + jumpEqualTo(f, v, jf) + case NotEqualTo: + jumpNotEqualTo(f, v, jf) + case GreatherThan: + jumpGreaterThan(f, v, jf) + case LessThan: + jumpLessThan(f, v, jf) + case MaskEqualTo: + jumpMaskEqualTo(f, v, jf) + default: + return ErrUnsupportedOperation + } + return nil +} + +func arg(f *filter, idx uint32) { + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx))) + *f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx))) + *f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1)) +} + +func jump(f *filter, labels *bpfLabels, lb string) { + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), + jumpJT, jumpJF)) +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/jump_amd64.go b/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/jump_amd64.go new file mode 100644 index 0000000000..f0d07716a4 --- /dev/null +++ b/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/jump_amd64.go @@ -0,0 +1,68 @@ +// +build linux,amd64 + +package seccomp + +// Using BPF filters +// +// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf +import "syscall" + +func jumpGreaterThan(f *filter, v uint, jt sockFilter) { + lo := uint32(uint64(v) % 0x100000000) + hi := uint32(uint64(v) / 0x100000000) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) + *f = append(*f, jt) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) +} + +func jumpEqualTo(f *filter, v uint, jt sockFilter) { + lo := uint32(uint64(v) % 0x100000000) + hi := uint32(uint64(v) / 0x100000000) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) + *f = append(*f, jt) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) +} + +func jumpLessThan(f *filter, v uint, jt sockFilter) { + lo := uint32(uint64(v) % 0x100000000) + hi := uint32(uint64(v) / 0x100000000) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) + *f = append(*f, jt) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) +} + +func jumpNotEqualTo(f *filter, v uint, jt sockFilter) { + lo := uint32(uint64(v) % 0x100000000) + hi := uint32(uint64(v) / 0x100000000) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) + *f = append(*f, jt) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) +} + +// this checks for a value inside a mask. The evalusation is equal to doing +// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER +func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) { + lo := uint32(uint64(v) % 0x100000000) + hi := uint32(uint64(v) / 0x100000000) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) + *f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v))) + *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2)) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) + *f = append(*f, jt) + *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/seccomp.go b/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/seccomp.go new file mode 100644 index 0000000000..78d7d85334 --- /dev/null +++ b/components/engine/vendor/src/github.com/docker/libcontainer/seccomp/seccomp.go @@ -0,0 +1,122 @@ +// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go. +package seccomp + +import ( + "syscall" + "unsafe" +) + +// Operator that is used for argument comparison. +type Operator int + +const ( + EqualTo Operator = iota + NotEqualTo + GreatherThan + LessThan + MaskEqualTo +) + +const ( + jumpJT = 0xff + jumpJF = 0xff + labelJT = 0xfe + labelJF = 0xfe +) + +const ( + pfLD = 0x0 + retKill = 0x00000000 + retTrap = 0x00030000 + retAllow = 0x7fff0000 + modeFilter = 0x2 + prSetNoNewPrivileges = 0x26 +) + +func actionErrno(errno uint32) uint32 { + return 0x00050000 | (errno & 0x0000ffff) +} + +var ( + secData = struct { + nr int32 + arch uint32 + insPointer uint64 + args [6]uint64 + }{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}} +) + +var isLittle = func() bool { + var ( + x = 0x1234 + p = unsafe.Pointer(&x) + p2 = (*[unsafe.Sizeof(0)]byte)(p) + ) + if p2[0] == 0 { + return false + } + return true +}() + +var endian endianSupport + +type endianSupport struct { +} + +func (e endianSupport) hi(i uint32) uint32 { + if isLittle { + return e.little(i) + } + return e.big(i) +} + +func (e endianSupport) low(i uint32) uint32 { + if isLittle { + return e.big(i) + } + return e.little(i) +} + +func (endianSupport) big(idx uint32) uint32 { + if idx >= 6 { + return 0 + } + return uint32(unsafe.Offsetof(secData.args)) + 8*idx +} + +func (endianSupport) little(idx uint32) uint32 { + if idx < 0 || idx >= 6 { + return 0 + } + return uint32(unsafe.Offsetof(secData.args)) + + uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch)) +} + +func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error { + _, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0) + if err != 0 { + return err + } + return nil +} + +func newSockFprog(filter []sockFilter) *sockFprog { + return &sockFprog{ + len: uint16(len(filter)), + filt: filter, + } +} + +type sockFprog struct { + len uint16 + filt []sockFilter +} + +func (s *sockFprog) set() error { + _, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP), + uintptr(modeFilter), uintptr(unsafe.Pointer(s))) + if err != 0 { + return err + } + return nil +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/standard_init_linux.go b/components/engine/vendor/src/github.com/docker/libcontainer/standard_init_linux.go index 251c09f696..445c1fa29c 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/standard_init_linux.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/standard_init_linux.go @@ -99,5 +99,8 @@ func (l *linuxStandardInit) Init() error { if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } + if err := finalizeSeccomp(l.config); err != nil { + return err + } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) } diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/stats_freebsd.go b/components/engine/vendor/src/github.com/docker/libcontainer/stats_freebsd.go new file mode 100644 index 0000000000..f8d1d689ce --- /dev/null +++ b/components/engine/vendor/src/github.com/docker/libcontainer/stats_freebsd.go @@ -0,0 +1,5 @@ +package libcontainer + +type Stats struct { + Interfaces []*NetworkInterface +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/system/setns_linux.go b/components/engine/vendor/src/github.com/docker/libcontainer/system/setns_linux.go index a3c4cbb273..615ff4c827 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/system/setns_linux.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/system/setns_linux.go @@ -21,16 +21,20 @@ var setNsMap = map[string]uintptr{ "linux/s390x": 339, } +var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] + +func SysSetns() uint32 { + return uint32(sysSetns) +} + func Setns(fd uintptr, flags uintptr) error { ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] if !exists { return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) } - _, _, err := syscall.RawSyscall(ns, fd, flags, 0) if err != 0 { return err } - return nil } diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/system/sysconfig.go b/components/engine/vendor/src/github.com/docker/libcontainer/system/sysconfig.go index b8434f1050..b3a07cba3e 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/system/sysconfig.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/system/sysconfig.go @@ -1,4 +1,4 @@ -// +build cgo,linux +// +build cgo,linux cgo,freebsd package system diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/user/user.go b/components/engine/vendor/src/github.com/docker/libcontainer/user/user.go index d7439f12e3..13226dbfa7 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/user/user.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/user/user.go @@ -348,3 +348,60 @@ func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) ( return user, nil } + +// GetAdditionalGroupsPath looks up a list of groups by name or group id +// against the group file. If a group name cannot be found, an error will be +// returned. If a group id cannot be found, it will be returned as-is. +func GetAdditionalGroupsPath(additionalGroups []string, groupPath string) ([]int, error) { + groupReader, err := os.Open(groupPath) + if err != nil { + return nil, fmt.Errorf("Failed to open group file: %v", err) + } + defer groupReader.Close() + + groups, err := ParseGroupFilter(groupReader, func(g Group) bool { + for _, ag := range additionalGroups { + if g.Name == ag || strconv.Itoa(g.Gid) == ag { + return true + } + } + return false + }) + if err != nil { + return nil, fmt.Errorf("Unable to find additional groups %v: %v", additionalGroups, err) + } + + gidMap := make(map[int]struct{}) + for _, ag := range additionalGroups { + var found bool + for _, g := range groups { + // if we found a matched group either by name or gid, take the + // first matched as correct + if g.Name == ag || strconv.Itoa(g.Gid) == ag { + if _, ok := gidMap[g.Gid]; !ok { + gidMap[g.Gid] = struct{}{} + found = true + break + } + } + } + // we asked for a group but didn't find it. let's check to see + // if we wanted a numeric group + if !found { + gid, err := strconv.Atoi(ag) + if err != nil { + return nil, fmt.Errorf("Unable to find group %s", ag) + } + // Ensure gid is inside gid range. + if gid < minId || gid > maxId { + return nil, ErrRange + } + gidMap[gid] = struct{}{} + } + } + gids := []int{} + for gid := range gidMap { + gids = append(gids, gid) + } + return gids, nil +} diff --git a/components/engine/vendor/src/github.com/docker/libcontainer/utils/utils.go b/components/engine/vendor/src/github.com/docker/libcontainer/utils/utils.go index 094bce5300..26a0fb7d09 100644 --- a/components/engine/vendor/src/github.com/docker/libcontainer/utils/utils.go +++ b/components/engine/vendor/src/github.com/docker/libcontainer/utils/utils.go @@ -21,6 +21,9 @@ func GenerateRandomName(prefix string, size int) (string, error) { if _, err := io.ReadFull(rand.Reader, id); err != nil { return "", err } + if size > 64 { + size = 64 + } return prefix + hex.EncodeToString(id)[:size], nil }