Although having a request ID available throughout the codebase is very valuable, the impact of requiring a Context as an argument to every function in the codepath of an API request, is too significant and was not properly understood at the time of the review. Furthermore, mixing API-layer code with non-API-layer code makes the latter usable only by API-layer code (one that has a notion of Context). This reverts commit de4164043546d2b9ee3bf323dbc41f4979c84480, reversing changes made to 7daeecd42d7bb112bfe01532c8c9a962bb0c7967. Signed-off-by: Tibor Vass <tibor@docker.com> Conflicts: api/server/container.go builder/internals.go daemon/container_unix.go daemon/create.go Upstream-commit: b08f071e18043abe8ce15f56826d38dd26bedb78 Component: engine
487 lines
12 KiB
Go
487 lines
12 KiB
Go
// +build linux,cgo
|
|
|
|
package native
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/Sirupsen/logrus"
|
|
"github.com/docker/docker/daemon/execdriver"
|
|
"github.com/docker/docker/pkg/parsers"
|
|
"github.com/docker/docker/pkg/pools"
|
|
"github.com/docker/docker/pkg/reexec"
|
|
sysinfo "github.com/docker/docker/pkg/system"
|
|
"github.com/docker/docker/pkg/term"
|
|
"github.com/opencontainers/runc/libcontainer"
|
|
"github.com/opencontainers/runc/libcontainer/apparmor"
|
|
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
|
"github.com/opencontainers/runc/libcontainer/configs"
|
|
"github.com/opencontainers/runc/libcontainer/system"
|
|
"github.com/opencontainers/runc/libcontainer/utils"
|
|
)
|
|
|
|
// Define constants for native driver
|
|
const (
|
|
DriverName = "native"
|
|
Version = "0.2"
|
|
)
|
|
|
|
// Driver contains all information for native driver,
|
|
// it implements execdriver.Driver.
|
|
type Driver struct {
|
|
root string
|
|
initPath string
|
|
activeContainers map[string]libcontainer.Container
|
|
machineMemory int64
|
|
factory libcontainer.Factory
|
|
sync.Mutex
|
|
}
|
|
|
|
// NewDriver returns a new native driver, called from NewDriver of execdriver.
|
|
func NewDriver(root, initPath string, options []string) (*Driver, error) {
|
|
meminfo, err := sysinfo.ReadMemInfo()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := sysinfo.MkdirAll(root, 0700); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if apparmor.IsEnabled() {
|
|
if err := installAppArmorProfile(); err != nil {
|
|
apparmorProfiles := []string{"docker-default"}
|
|
|
|
// Allow daemon to run if loading failed, but are active
|
|
// (possibly through another run, manually, or via system startup)
|
|
for _, policy := range apparmorProfiles {
|
|
if err := hasAppArmorProfileLoaded(policy); err != nil {
|
|
return nil, fmt.Errorf("AppArmor enabled on system but the %s profile could not be loaded.", policy)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// choose cgroup manager
|
|
// this makes sure there are no breaking changes to people
|
|
// who upgrade from versions without native.cgroupdriver opt
|
|
cgm := libcontainer.Cgroupfs
|
|
if systemd.UseSystemd() {
|
|
cgm = libcontainer.SystemdCgroups
|
|
}
|
|
|
|
// parse the options
|
|
for _, option := range options {
|
|
key, val, err := parsers.ParseKeyValueOpt(option)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
key = strings.ToLower(key)
|
|
switch key {
|
|
case "native.cgroupdriver":
|
|
// override the default if they set options
|
|
switch val {
|
|
case "systemd":
|
|
if systemd.UseSystemd() {
|
|
cgm = libcontainer.SystemdCgroups
|
|
} else {
|
|
// warn them that they chose the wrong driver
|
|
logrus.Warn("You cannot use systemd as native.cgroupdriver, using cgroupfs instead")
|
|
}
|
|
case "cgroupfs":
|
|
cgm = libcontainer.Cgroupfs
|
|
default:
|
|
return nil, fmt.Errorf("Unknown native.cgroupdriver given %q. try cgroupfs or systemd", val)
|
|
}
|
|
default:
|
|
return nil, fmt.Errorf("Unknown option %s\n", key)
|
|
}
|
|
}
|
|
|
|
f, err := libcontainer.New(
|
|
root,
|
|
cgm,
|
|
libcontainer.InitPath(reexec.Self(), DriverName),
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &Driver{
|
|
root: root,
|
|
initPath: initPath,
|
|
activeContainers: make(map[string]libcontainer.Container),
|
|
machineMemory: meminfo.MemTotal,
|
|
factory: f,
|
|
}, nil
|
|
}
|
|
|
|
type execOutput struct {
|
|
exitCode int
|
|
err error
|
|
}
|
|
|
|
// Run implements the exec driver Driver interface,
|
|
// it calls libcontainer APIs to run a container.
|
|
func (d *Driver) Run(c *execdriver.Command, pipes *execdriver.Pipes, hooks execdriver.Hooks) (execdriver.ExitStatus, error) {
|
|
// take the Command and populate the libcontainer.Config from it
|
|
container, err := d.createContainer(c, hooks)
|
|
if err != nil {
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
|
|
p := &libcontainer.Process{
|
|
Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
|
|
Env: c.ProcessConfig.Env,
|
|
Cwd: c.WorkingDir,
|
|
User: c.ProcessConfig.User,
|
|
}
|
|
|
|
if err := setupPipes(container, &c.ProcessConfig, p, pipes); err != nil {
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
|
|
cont, err := d.factory.Create(c.ID, container)
|
|
if err != nil {
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
d.Lock()
|
|
d.activeContainers[c.ID] = cont
|
|
d.Unlock()
|
|
defer func() {
|
|
cont.Destroy()
|
|
d.cleanContainer(c.ID)
|
|
}()
|
|
|
|
if err := cont.Start(p); err != nil {
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
|
|
oom := notifyOnOOM(cont)
|
|
if hooks.Start != nil {
|
|
|
|
pid, err := p.Pid()
|
|
if err != nil {
|
|
p.Signal(os.Kill)
|
|
p.Wait()
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
hooks.Start(&c.ProcessConfig, pid, oom)
|
|
}
|
|
|
|
waitF := p.Wait
|
|
if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
|
|
// we need such hack for tracking processes with inherited fds,
|
|
// because cmd.Wait() waiting for all streams to be copied
|
|
waitF = waitInPIDHost(p, cont)
|
|
}
|
|
ps, err := waitF()
|
|
if err != nil {
|
|
execErr, ok := err.(*exec.ExitError)
|
|
if !ok {
|
|
return execdriver.ExitStatus{ExitCode: -1}, err
|
|
}
|
|
ps = execErr.ProcessState
|
|
}
|
|
cont.Destroy()
|
|
_, oomKill := <-oom
|
|
return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
|
|
}
|
|
|
|
// notifyOnOOM returns a channel that signals if the container received an OOM notification
|
|
// for any process. If it is unable to subscribe to OOM notifications then a closed
|
|
// channel is returned as it will be non-blocking and return the correct result when read.
|
|
func notifyOnOOM(container libcontainer.Container) <-chan struct{} {
|
|
oom, err := container.NotifyOOM()
|
|
if err != nil {
|
|
logrus.Warnf("Your kernel does not support OOM notifications: %s", err)
|
|
c := make(chan struct{})
|
|
close(c)
|
|
return c
|
|
}
|
|
return oom
|
|
}
|
|
|
|
func killCgroupProcs(c libcontainer.Container) {
|
|
var procs []*os.Process
|
|
if err := c.Pause(); err != nil {
|
|
logrus.Warn(err)
|
|
}
|
|
pids, err := c.Processes()
|
|
if err != nil {
|
|
// don't care about childs if we can't get them, this is mostly because cgroup already deleted
|
|
logrus.Warnf("Failed to get processes from container %s: %v", c.ID(), err)
|
|
}
|
|
for _, pid := range pids {
|
|
if p, err := os.FindProcess(pid); err == nil {
|
|
procs = append(procs, p)
|
|
if err := p.Kill(); err != nil {
|
|
logrus.Warn(err)
|
|
}
|
|
}
|
|
}
|
|
if err := c.Resume(); err != nil {
|
|
logrus.Warn(err)
|
|
}
|
|
for _, p := range procs {
|
|
if _, err := p.Wait(); err != nil {
|
|
logrus.Warn(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func waitInPIDHost(p *libcontainer.Process, c libcontainer.Container) func() (*os.ProcessState, error) {
|
|
return func() (*os.ProcessState, error) {
|
|
pid, err := p.Pid()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
process, err := os.FindProcess(pid)
|
|
s, err := process.Wait()
|
|
if err != nil {
|
|
execErr, ok := err.(*exec.ExitError)
|
|
if !ok {
|
|
return s, err
|
|
}
|
|
s = execErr.ProcessState
|
|
}
|
|
killCgroupProcs(c)
|
|
p.Wait()
|
|
return s, err
|
|
}
|
|
}
|
|
|
|
// Kill implements the exec driver Driver interface.
|
|
func (d *Driver) Kill(c *execdriver.Command, sig int) error {
|
|
d.Lock()
|
|
active := d.activeContainers[c.ID]
|
|
d.Unlock()
|
|
if active == nil {
|
|
return fmt.Errorf("active container for %s does not exist", c.ID)
|
|
}
|
|
state, err := active.State()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return syscall.Kill(state.InitProcessPid, syscall.Signal(sig))
|
|
}
|
|
|
|
// Pause implements the exec driver Driver interface,
|
|
// it calls libcontainer API to pause a container.
|
|
func (d *Driver) Pause(c *execdriver.Command) error {
|
|
d.Lock()
|
|
active := d.activeContainers[c.ID]
|
|
d.Unlock()
|
|
if active == nil {
|
|
return fmt.Errorf("active container for %s does not exist", c.ID)
|
|
}
|
|
return active.Pause()
|
|
}
|
|
|
|
// Unpause implements the exec driver Driver interface,
|
|
// it calls libcontainer API to unpause a container.
|
|
func (d *Driver) Unpause(c *execdriver.Command) error {
|
|
d.Lock()
|
|
active := d.activeContainers[c.ID]
|
|
d.Unlock()
|
|
if active == nil {
|
|
return fmt.Errorf("active container for %s does not exist", c.ID)
|
|
}
|
|
return active.Resume()
|
|
}
|
|
|
|
// Terminate implements the exec driver Driver interface.
|
|
func (d *Driver) Terminate(c *execdriver.Command) error {
|
|
defer d.cleanContainer(c.ID)
|
|
container, err := d.factory.Load(c.ID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer container.Destroy()
|
|
state, err := container.State()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
pid := state.InitProcessPid
|
|
currentStartTime, err := system.GetProcessStartTime(pid)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if state.InitProcessStartTime == currentStartTime {
|
|
err = syscall.Kill(pid, 9)
|
|
syscall.Wait4(pid, nil, 0, nil)
|
|
}
|
|
return err
|
|
}
|
|
|
|
// Info implements the exec driver Driver interface.
|
|
func (d *Driver) Info(id string) execdriver.Info {
|
|
return &info{
|
|
ID: id,
|
|
driver: d,
|
|
}
|
|
}
|
|
|
|
// Name implements the exec driver Driver interface.
|
|
func (d *Driver) Name() string {
|
|
return fmt.Sprintf("%s-%s", DriverName, Version)
|
|
}
|
|
|
|
// GetPidsForContainer implements the exec driver Driver interface.
|
|
func (d *Driver) GetPidsForContainer(id string) ([]int, error) {
|
|
d.Lock()
|
|
active := d.activeContainers[id]
|
|
d.Unlock()
|
|
|
|
if active == nil {
|
|
return nil, fmt.Errorf("active container for %s does not exist", id)
|
|
}
|
|
return active.Processes()
|
|
}
|
|
|
|
func (d *Driver) cleanContainer(id string) error {
|
|
d.Lock()
|
|
delete(d.activeContainers, id)
|
|
d.Unlock()
|
|
return os.RemoveAll(filepath.Join(d.root, id))
|
|
}
|
|
|
|
func (d *Driver) createContainerRoot(id string) error {
|
|
return os.MkdirAll(filepath.Join(d.root, id), 0655)
|
|
}
|
|
|
|
// Clean implements the exec driver Driver interface.
|
|
func (d *Driver) Clean(id string) error {
|
|
return os.RemoveAll(filepath.Join(d.root, id))
|
|
}
|
|
|
|
// Stats implements the exec driver Driver interface.
|
|
func (d *Driver) Stats(id string) (*execdriver.ResourceStats, error) {
|
|
d.Lock()
|
|
c := d.activeContainers[id]
|
|
d.Unlock()
|
|
if c == nil {
|
|
return nil, execdriver.ErrNotRunning
|
|
}
|
|
now := time.Now()
|
|
stats, err := c.Stats()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
memoryLimit := c.Config().Cgroups.Memory
|
|
// if the container does not have any memory limit specified set the
|
|
// limit to the machines memory
|
|
if memoryLimit == 0 {
|
|
memoryLimit = d.machineMemory
|
|
}
|
|
return &execdriver.ResourceStats{
|
|
Stats: stats,
|
|
Read: now,
|
|
MemoryLimit: memoryLimit,
|
|
}, nil
|
|
}
|
|
|
|
// TtyConsole implements the exec driver Terminal interface.
|
|
type TtyConsole struct {
|
|
console libcontainer.Console
|
|
}
|
|
|
|
// NewTtyConsole returns a new TtyConsole struct.
|
|
func NewTtyConsole(console libcontainer.Console, pipes *execdriver.Pipes) (*TtyConsole, error) {
|
|
tty := &TtyConsole{
|
|
console: console,
|
|
}
|
|
|
|
if err := tty.AttachPipes(pipes); err != nil {
|
|
tty.Close()
|
|
return nil, err
|
|
}
|
|
|
|
return tty, nil
|
|
}
|
|
|
|
// Resize implements Resize method of Terminal interface
|
|
func (t *TtyConsole) Resize(h, w int) error {
|
|
return term.SetWinsize(t.console.Fd(), &term.Winsize{Height: uint16(h), Width: uint16(w)})
|
|
}
|
|
|
|
// AttachPipes attaches given pipes to TtyConsole
|
|
func (t *TtyConsole) AttachPipes(pipes *execdriver.Pipes) error {
|
|
go func() {
|
|
if wb, ok := pipes.Stdout.(interface {
|
|
CloseWriters() error
|
|
}); ok {
|
|
defer wb.CloseWriters()
|
|
}
|
|
|
|
pools.Copy(pipes.Stdout, t.console)
|
|
}()
|
|
|
|
if pipes.Stdin != nil {
|
|
go func() {
|
|
pools.Copy(t.console, pipes.Stdin)
|
|
|
|
pipes.Stdin.Close()
|
|
}()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Close implements Close method of Terminal interface
|
|
func (t *TtyConsole) Close() error {
|
|
return t.console.Close()
|
|
}
|
|
|
|
func setupPipes(container *configs.Config, processConfig *execdriver.ProcessConfig, p *libcontainer.Process, pipes *execdriver.Pipes) error {
|
|
var term execdriver.Terminal
|
|
var err error
|
|
|
|
if processConfig.Tty {
|
|
rootuid, err := container.HostUID()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cons, err := p.NewConsole(rootuid)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
term, err = NewTtyConsole(cons, pipes)
|
|
} else {
|
|
p.Stdout = pipes.Stdout
|
|
p.Stderr = pipes.Stderr
|
|
r, w, err := os.Pipe()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if pipes.Stdin != nil {
|
|
go func() {
|
|
io.Copy(w, pipes.Stdin)
|
|
w.Close()
|
|
}()
|
|
p.Stdin = r
|
|
}
|
|
term = &execdriver.StdConsole{}
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
processConfig.Terminal = term
|
|
return nil
|
|
}
|
|
|
|
// SupportsHooks implements the execdriver Driver interface.
|
|
// The libcontainer/runC-based native execdriver does exploit the hook mechanism
|
|
func (d *Driver) SupportsHooks() bool {
|
|
return true
|
|
}
|