The set_robust_list syscall sets the list of futexes which are cleaned up on thread exit, and are needed to avoid mutexes being held forever on thread exit. See for example in Musl libc mutex handling: http://git.musl-libc.org/cgit/musl/tree/src/thread/pthread_mutex_trylock.c#n22 Signed-off-by: Justin Cormack <justin.cormack@unikernel.com> Upstream-commit: 7b133e7235593f8d46832045da339395e71e8148 Component: engine
308 lines
7.5 KiB
Go
308 lines
7.5 KiB
Go
// +build linux
|
|
|
|
package native
|
|
|
|
import "github.com/opencontainers/runc/libcontainer/configs"
|
|
|
|
var defaultSeccompProfile = &configs.Seccomp{
|
|
DefaultAction: configs.Allow,
|
|
Syscalls: []*configs.Syscall{
|
|
{
|
|
// Quota and Accounting syscalls which could let containers
|
|
// disable their own resource limits or process accounting
|
|
Name: "acct",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Prevent containers from using the kernel keyring,
|
|
// which is not namespaced
|
|
Name: "add_key",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Similar to clock_settime and settimeofday
|
|
// Time/Date is not namespaced
|
|
Name: "adjtimex",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Time/Date is not namespaced
|
|
Name: "clock_settime",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny cloning new namespaces
|
|
Name: "clone",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{
|
|
{
|
|
// flags from sched.h
|
|
// CLONE_NEWUTS 0x04000000
|
|
// CLONE_NEWIPC 0x08000000
|
|
// CLONE_NEWUSER 0x10000000
|
|
// CLONE_NEWPID 0x20000000
|
|
// CLONE_NEWNET 0x40000000
|
|
Index: 0,
|
|
Value: uint64(0x04000000),
|
|
Op: configs.GreaterThanOrEqualTo,
|
|
},
|
|
{
|
|
// flags from sched.h
|
|
// CLONE_NEWNS 0x00020000
|
|
Index: 0,
|
|
Value: uint64(0x00020000),
|
|
Op: configs.EqualTo,
|
|
},
|
|
},
|
|
},
|
|
{
|
|
// Deny manipulation and functions on kernel modules.
|
|
Name: "create_module",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny manipulation and functions on kernel modules.
|
|
Name: "delete_module",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny retrieval of exported kernel and module symbols
|
|
Name: "get_kernel_syms",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Terrifying syscalls that modify kernel memory and NUMA settings.
|
|
// They're gated by CAP_SYS_NICE,
|
|
// which we do not retain by default in containers.
|
|
Name: "get_mempolicy",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny manipulation and functions on kernel modules.
|
|
Name: "init_module",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Prevent containers from modifying kernel I/O privilege levels.
|
|
// Already restricted as containers drop CAP_SYS_RAWIO by default.
|
|
Name: "ioperm",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Prevent containers from modifying kernel I/O privilege levels.
|
|
// Already restricted as containers drop CAP_SYS_RAWIO by default.
|
|
Name: "iopl",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Sister syscall of kexec_load that does the same thing,
|
|
// slightly different arguments
|
|
Name: "kexec_file_load",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny loading a new kernel for later execution
|
|
Name: "kexec_load",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Prevent containers from using the kernel keyring,
|
|
// which is not namespaced
|
|
Name: "keyctl",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Tracing/profiling syscalls,
|
|
// which could leak a lot of information on the host
|
|
Name: "lookup_dcookie",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Terrifying syscalls that modify kernel memory and NUMA settings.
|
|
// They're gated by CAP_SYS_NICE,
|
|
// which we do not retain by default in containers.
|
|
Name: "mbind",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Terrifying syscalls that modify kernel memory and NUMA settings.
|
|
// They're gated by CAP_SYS_NICE,
|
|
// which we do not retain by default in containers.
|
|
Name: "migrate_pages",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Old syscall only used in 16-bit code,
|
|
// and a potential information leak
|
|
Name: "modify_ldt",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny mount
|
|
Name: "mount",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Terrifying syscalls that modify kernel memory and NUMA settings.
|
|
// They're gated by CAP_SYS_NICE,
|
|
// which we do not retain by default in containers.
|
|
Name: "move_pages",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny interaction with the kernel nfs daemon
|
|
Name: "nfsservctl",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Cause of an old container breakout,
|
|
// might as well restrict it to be on the safe side
|
|
Name: "open_by_handle_at",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Tracing/profiling syscalls,
|
|
// which could leak a lot of information on the host
|
|
Name: "perf_event_open",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Prevent container from enabling BSD emulation.
|
|
// Not inherently dangerous, but poorly tested,
|
|
// potential for a lot of kernel vulns in this.
|
|
Name: "personality",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny pivot_root
|
|
Name: "pivot_root",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Already blocked by dropping CAP_PTRACE
|
|
Name: "ptrace",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny manipulation and functions on kernel modules.
|
|
Name: "query_module",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Quota and Accounting syscalls which could let containers
|
|
// disable their own resource limits or process accounting
|
|
Name: "quotactl",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Probably a bad idea to let containers reboot the host
|
|
Name: "reboot",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Probably a bad idea to let containers restart
|
|
Name: "restart_syscall",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Prevent containers from using the kernel keyring,
|
|
// which is not namespaced
|
|
Name: "request_key",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// meta, deny seccomp
|
|
Name: "seccomp",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Terrifying syscalls that modify kernel memory and NUMA settings.
|
|
// They're gated by CAP_SYS_NICE,
|
|
// which we do not retain by default in containers.
|
|
Name: "set_mempolicy",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// deny associating a thread with a namespace
|
|
Name: "setns",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Time/Date is not namespaced
|
|
Name: "settimeofday",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny start/stop swapping to file/device
|
|
Name: "swapon",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny start/stop swapping to file/device
|
|
Name: "swapoff",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny read/write system parameters
|
|
Name: "_sysctl",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Deny umount
|
|
Name: "umount2",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Same as clone
|
|
Name: "unshare",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
{
|
|
// Older syscall related to shared libraries, unused for a long time
|
|
Name: "uselib",
|
|
Action: configs.Errno,
|
|
Args: []*configs.Arg{},
|
|
},
|
|
},
|
|
}
|