From 43ccd4737c036616fbbe9b7a7fbbe7cd5b5fa30e Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Fri, 31 Aug 2018 12:43:32 +0200 Subject: [PATCH 1/7] systemd: no limit on core size set LimitCORE=infinity to ensure complete core creation, allows extraction of as much information as possible. Signed-off-by: Sebastiaan van Stijn (cherry picked from commit da69663b9ccd72d95ec60007ef707d82a35324fa) Signed-off-by: Eli Uriegas Upstream-commit: f93f4525173e3a32d315794344d44bae780de00e Component: packaging --- components/packaging/systemd/docker.service | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/packaging/systemd/docker.service b/components/packaging/systemd/docker.service index 23557a0089..ff042cb774 100644 --- a/components/packaging/systemd/docker.service +++ b/components/packaging/systemd/docker.service @@ -17,5 +17,7 @@ Restart=always # /opt/containerd/bin is in front so dockerd grabs the correct runc binary Environment="PATH=/opt/containerd/bin:/sbin:/usr/bin:/usr/local/bin:$PATH" +LimitCORE=infinity + [Install] WantedBy=multi-user.target From 39e2041c6947c654dbef50c751b4a9f39812e38b Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Fri, 31 Aug 2018 12:45:07 +0200 Subject: [PATCH 2/7] systemd: set Limit* to infinity There is a not-insignificant performance overhead for all containers (if containerd is a child of Docker, which is the current setup) if systemd sets rlimits on the main Docker daemon process (because the limits propogate to all children). Signed-off-by: Sebastiaan van Stijn (cherry picked from commit 68e15413dce8d6e8f428ac286641a9482d30aabc) Signed-off-by: Eli Uriegas Upstream-commit: 1246dedcd55058a2856c7042509fb0badcefbf91 Component: packaging --- components/packaging/systemd/docker.service | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/components/packaging/systemd/docker.service b/components/packaging/systemd/docker.service index ff042cb774..0c414c275b 100644 --- a/components/packaging/systemd/docker.service +++ b/components/packaging/systemd/docker.service @@ -17,6 +17,10 @@ Restart=always # /opt/containerd/bin is in front so dockerd grabs the correct runc binary Environment="PATH=/opt/containerd/bin:/sbin:/usr/bin:/usr/local/bin:$PATH" +# Having non-zero Limit*s causes performance problems due to accounting overhead +# in the kernel. We recommend using cgroups to do container-local accounting. +LimitNOFILE=infinity +LimitNPROC=infinity LimitCORE=infinity [Install] From 6d203c7711fa2c543a6b73ed635d3c9c1bea1ac7 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Fri, 31 Aug 2018 12:50:05 +0200 Subject: [PATCH 3/7] systemd: don't limit tasks Systemd sets a default of 512 tasks, which is far too low to run many containers. Note that TasksMax is only supported on systemd 226 and above. Signed-off-by: Sebastiaan van Stijn (cherry picked from commit 82fe96733f551d36018c3840cf21d813807e9b76) Signed-off-by: Eli Uriegas Upstream-commit: c42e4736e055c472284fd09ca05d405d31c4516a Component: packaging --- components/packaging/systemd/docker.service | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/components/packaging/systemd/docker.service b/components/packaging/systemd/docker.service index 0c414c275b..5683409328 100644 --- a/components/packaging/systemd/docker.service +++ b/components/packaging/systemd/docker.service @@ -23,5 +23,9 @@ LimitNOFILE=infinity LimitNPROC=infinity LimitCORE=infinity +# Comment TasksMax if your systemd version does not supports it. +# Only systemd 226 and above support this option. +TasksMax=infinity + [Install] WantedBy=multi-user.target From 32fc71f1557d240d906d9e1375c7031f0c14e6ee Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Fri, 31 Aug 2018 12:53:49 +0200 Subject: [PATCH 4/7] systemd: add "Delegate=yes" to docker's service file We need to add delegate yes to docker's service file so that it can manage the cgroups of the processes that it launches without systemd interfering with them and moving the processes after it is reloaded. Delegate= Turns on delegation of further resource control partitioning to processes of the unit. For unprivileged services (i.e. those using the User= setting), this allows processes to create a subhierarchy beneath its control group path. For privileged services and scopes, this ensures the processes will have all control group controllers enabled. This is the proper fix for issue moby/moby#20152 Signed-off-by: Sebastiaan van Stijn (cherry picked from commit e134e666a585b1f13e9e5e371dd93e5ce04a4b34) Signed-off-by: Eli Uriegas Upstream-commit: 1120496ca0492d29731befd1460a934ac115e7f8 Component: packaging --- components/packaging/systemd/docker.service | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/components/packaging/systemd/docker.service b/components/packaging/systemd/docker.service index 5683409328..ad94533863 100644 --- a/components/packaging/systemd/docker.service +++ b/components/packaging/systemd/docker.service @@ -8,6 +8,10 @@ Wants=network-online.target [Service] # Install containerd-shim-process if it's not already installed ExecStartPre=/usr/libexec/containerd-offline-installer /var/lib/containerd-offline-installer/containerd-shim-process.tar docker.io/docker/containerd-shim-process + +# the default is not to use systemd for cgroups because the delegate issues still +# exists and systemd currently does not support the cgroup feature set required +# for containers run by docker ExecStart=/usr/bin/dockerd ExecStopPost=/usr/bin/dockerd post-stop TimeoutSec=0 @@ -27,5 +31,8 @@ LimitCORE=infinity # Only systemd 226 and above support this option. TasksMax=infinity +# set delegate yes so that systemd does not reset the cgroups of docker containers +Delegate=yes + [Install] WantedBy=multi-user.target From 350162b4b07d106d16cbc6368306c70e3978263d Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Fri, 31 Aug 2018 12:55:39 +0200 Subject: [PATCH 5/7] systemd: set systemd KillMode Change the kill mode to process so that systemd does not kill container processes when the daemon is shutdown but only the docker daemon Signed-off-by: Sebastiaan van Stijn (cherry picked from commit d736ae9da7401f582469fa7e943adde9f2163024) Signed-off-by: Eli Uriegas Upstream-commit: 784c5936ec984b5525ac71eca3b7731e49eb6b37 Component: packaging --- components/packaging/systemd/docker.service | 3 +++ 1 file changed, 3 insertions(+) diff --git a/components/packaging/systemd/docker.service b/components/packaging/systemd/docker.service index ad94533863..6bfb873fd6 100644 --- a/components/packaging/systemd/docker.service +++ b/components/packaging/systemd/docker.service @@ -34,5 +34,8 @@ TasksMax=infinity # set delegate yes so that systemd does not reset the cgroups of docker containers Delegate=yes +# kill only the docker process, not all processes in the cgroup +KillMode=process + [Install] WantedBy=multi-user.target From 2e215339a193947a417f72aa24a5675e29001833 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Fri, 31 Aug 2018 12:57:03 +0200 Subject: [PATCH 6/7] systemd: add support for reloading daemon configuration through systemd This adds support for reloading the docker daemon (SIGHIUP) so that changes in '/etc/docker/daemon.json' can be loaded at runtime by reloading the service through systemd ('systemctl reload docker') Before this change, systemd would output an error that "reloading" is not supported for the docker service; systemctl reload docker Failed to reload docker.service: Job type reload is not applicable for unit docker.service. After this change, the docker daemon can be reloaded through 'systemctl reload docker', which reloads the configuration; journalctl -f -u docker.service May 02 03:49:20 testing systemd[1]: Reloading Docker Application Container Engine. May 02 03:49:20 testing docker[28496]: time="2016-05-02T03:49:20.143964103-04:00" level=info msg="Got signal to reload configuration, reloading from: /etc/docker/daemon.json" May 02 03:49:20 testing systemd[1]: Reloaded Docker Application Container Engine. Signed-off-by: Sebastiaan van Stijn (cherry picked from commit 3e1b508e5f70b35869d9c8417d3b65a141820af9) Signed-off-by: Eli Uriegas Upstream-commit: 127426fc693a882b670339391da5d12f08c1438d Component: packaging --- components/packaging/systemd/docker.service | 1 + 1 file changed, 1 insertion(+) diff --git a/components/packaging/systemd/docker.service b/components/packaging/systemd/docker.service index 6bfb873fd6..d6527e8a2e 100644 --- a/components/packaging/systemd/docker.service +++ b/components/packaging/systemd/docker.service @@ -13,6 +13,7 @@ ExecStartPre=/usr/libexec/containerd-offline-installer /var/lib/containerd-offli # exists and systemd currently does not support the cgroup feature set required # for containers run by docker ExecStart=/usr/bin/dockerd +ExecReload=/bin/kill -s HUP $MAINPID ExecStopPost=/usr/bin/dockerd post-stop TimeoutSec=0 RestartSec=2 From 73a59f6710fbfd73fb58275843ebf315a949ffe7 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Fri, 31 Aug 2018 12:58:50 +0200 Subject: [PATCH 7/7] systemd: set start burst limits Note that StartLimit* options were moved from "Service" to "Unit" in systemd 229 (https://github.com/systemd/systemd/commit/6bf0f408e4833152197fb38fb10a9989c89f3a59) both the old, and new location are accepted by systemd 229 and up, so using the old location to make them work for either version of systemd. StartLimitInterval was renamed to StartLimitIntervalSec in systemd 230 (https://github.com/systemd/systemd/commit/f0367da7d1a61ad698a55d17b5c28ddce0dc265a) both the old, and new name are accepted by systemd 230 and up, so using the old name to make this option work for either version of systemd. Signed-off-by: Sebastiaan van Stijn (cherry picked from commit 2c2bfea5d009fb884f5c61b62e3c85bd88e7909e) Signed-off-by: Eli Uriegas Upstream-commit: 5963decd1502e5012692a1f958bfdc5403f02920 Component: packaging --- components/packaging/systemd/docker.service | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/components/packaging/systemd/docker.service b/components/packaging/systemd/docker.service index d6527e8a2e..b99640fc82 100644 --- a/components/packaging/systemd/docker.service +++ b/components/packaging/systemd/docker.service @@ -18,6 +18,17 @@ ExecStopPost=/usr/bin/dockerd post-stop TimeoutSec=0 RestartSec=2 Restart=always + +# Note that StartLimit* options were moved from "Service" to "Unit" in systemd 229. +# Both the old, and new location are accepted by systemd 229 and up, so using the old location +# to make them work for either version of systemd. +StartLimitBurst=3 + +# Note that StartLimitInterval was renamed to StartLimitIntervalSec in systemd 230. +# Both the old, and new name are accepted by systemd 230 and up, so using the old name to make +# this option work for either version of systemd. +StartLimitInterval=60s + # On RPM Based distributions PATH isn't defined so we define it here # /opt/containerd/bin is in front so dockerd grabs the correct runc binary Environment="PATH=/opt/containerd/bin:/sbin:/usr/bin:/usr/local/bin:$PATH"