Files
docker-cli/components/engine/daemon/execdriver/native/seccomp_default.go
Justin Cormack b5183e0bab Allow use of robust list syscalls
The set_robust_list syscall sets the list of futexes which are
cleaned up on thread exit, and are needed to avoid mutexes
being held forever on thread exit.

See for example in Musl libc mutex handling:
http://git.musl-libc.org/cgit/musl/tree/src/thread/pthread_mutex_trylock.c#n22

Signed-off-by: Justin Cormack <justin.cormack@unikernel.com>
Upstream-commit: 7b133e7235593f8d46832045da339395e71e8148
Component: engine
2015-12-29 10:22:05 +00:00

308 lines
7.5 KiB
Go

// +build linux
package native
import "github.com/opencontainers/runc/libcontainer/configs"
var defaultSeccompProfile = &configs.Seccomp{
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
// Quota and Accounting syscalls which could let containers
// disable their own resource limits or process accounting
Name: "acct",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Prevent containers from using the kernel keyring,
// which is not namespaced
Name: "add_key",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Similar to clock_settime and settimeofday
// Time/Date is not namespaced
Name: "adjtimex",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Time/Date is not namespaced
Name: "clock_settime",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny cloning new namespaces
Name: "clone",
Action: configs.Errno,
Args: []*configs.Arg{
{
// flags from sched.h
// CLONE_NEWUTS 0x04000000
// CLONE_NEWIPC 0x08000000
// CLONE_NEWUSER 0x10000000
// CLONE_NEWPID 0x20000000
// CLONE_NEWNET 0x40000000
Index: 0,
Value: uint64(0x04000000),
Op: configs.GreaterThanOrEqualTo,
},
{
// flags from sched.h
// CLONE_NEWNS 0x00020000
Index: 0,
Value: uint64(0x00020000),
Op: configs.EqualTo,
},
},
},
{
// Deny manipulation and functions on kernel modules.
Name: "create_module",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny manipulation and functions on kernel modules.
Name: "delete_module",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny retrieval of exported kernel and module symbols
Name: "get_kernel_syms",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Terrifying syscalls that modify kernel memory and NUMA settings.
// They're gated by CAP_SYS_NICE,
// which we do not retain by default in containers.
Name: "get_mempolicy",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny manipulation and functions on kernel modules.
Name: "init_module",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Prevent containers from modifying kernel I/O privilege levels.
// Already restricted as containers drop CAP_SYS_RAWIO by default.
Name: "ioperm",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Prevent containers from modifying kernel I/O privilege levels.
// Already restricted as containers drop CAP_SYS_RAWIO by default.
Name: "iopl",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Sister syscall of kexec_load that does the same thing,
// slightly different arguments
Name: "kexec_file_load",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny loading a new kernel for later execution
Name: "kexec_load",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Prevent containers from using the kernel keyring,
// which is not namespaced
Name: "keyctl",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Tracing/profiling syscalls,
// which could leak a lot of information on the host
Name: "lookup_dcookie",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Terrifying syscalls that modify kernel memory and NUMA settings.
// They're gated by CAP_SYS_NICE,
// which we do not retain by default in containers.
Name: "mbind",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Terrifying syscalls that modify kernel memory and NUMA settings.
// They're gated by CAP_SYS_NICE,
// which we do not retain by default in containers.
Name: "migrate_pages",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Old syscall only used in 16-bit code,
// and a potential information leak
Name: "modify_ldt",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny mount
Name: "mount",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Terrifying syscalls that modify kernel memory and NUMA settings.
// They're gated by CAP_SYS_NICE,
// which we do not retain by default in containers.
Name: "move_pages",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny interaction with the kernel nfs daemon
Name: "nfsservctl",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Cause of an old container breakout,
// might as well restrict it to be on the safe side
Name: "open_by_handle_at",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Tracing/profiling syscalls,
// which could leak a lot of information on the host
Name: "perf_event_open",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Prevent container from enabling BSD emulation.
// Not inherently dangerous, but poorly tested,
// potential for a lot of kernel vulns in this.
Name: "personality",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny pivot_root
Name: "pivot_root",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Already blocked by dropping CAP_PTRACE
Name: "ptrace",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny manipulation and functions on kernel modules.
Name: "query_module",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Quota and Accounting syscalls which could let containers
// disable their own resource limits or process accounting
Name: "quotactl",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Probably a bad idea to let containers reboot the host
Name: "reboot",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Probably a bad idea to let containers restart
Name: "restart_syscall",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Prevent containers from using the kernel keyring,
// which is not namespaced
Name: "request_key",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// meta, deny seccomp
Name: "seccomp",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Terrifying syscalls that modify kernel memory and NUMA settings.
// They're gated by CAP_SYS_NICE,
// which we do not retain by default in containers.
Name: "set_mempolicy",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// deny associating a thread with a namespace
Name: "setns",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Time/Date is not namespaced
Name: "settimeofday",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny start/stop swapping to file/device
Name: "swapon",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny start/stop swapping to file/device
Name: "swapoff",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny read/write system parameters
Name: "_sysctl",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Deny umount
Name: "umount2",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Same as clone
Name: "unshare",
Action: configs.Errno,
Args: []*configs.Arg{},
},
{
// Older syscall related to shared libraries, unused for a long time
Name: "uselib",
Action: configs.Errno,
Args: []*configs.Arg{},
},
},
}