diff --git a/components/engine/cmd/dockerd/daemon.go b/components/engine/cmd/dockerd/daemon.go index b8552973e8..98b783829e 100644 --- a/components/engine/cmd/dockerd/daemon.go +++ b/components/engine/cmd/dockerd/daemon.go @@ -257,6 +257,8 @@ func (cli *DaemonCli) start(opts *daemonOptions) (err error) { PluginBackend: d.PluginManager(), NetworkSubnetsProvider: d, DefaultAdvertiseAddr: cli.Config.SwarmDefaultAdvertiseAddr, + RaftHeartbeatTick: cli.Config.SwarmRaftHeartbeatTick, + RaftElectionTick: cli.Config.SwarmRaftElectionTick, RuntimeRoot: cli.getSwarmRunRoot(), WatchStream: watchStream, }) diff --git a/components/engine/daemon/cluster/cluster.go b/components/engine/daemon/cluster/cluster.go index cdd47ec0e7..85538fca3b 100644 --- a/components/engine/daemon/cluster/cluster.go +++ b/components/engine/daemon/cluster/cluster.go @@ -96,6 +96,13 @@ type Config struct { // WatchStream is a channel to pass watch API notifications to daemon WatchStream chan *swarmapi.WatchMessage + + // RaftHeartbeatTick is the number of ticks for heartbeat of quorum members + RaftHeartbeatTick uint32 + + // RaftElectionTick is the number of ticks to elapse before followers propose a new round of leader election + // This value should be 10x that of RaftHeartbeatTick + RaftElectionTick uint32 } // Cluster provides capabilities to participate in a cluster as a worker or a @@ -134,6 +141,14 @@ func New(config Config) (*Cluster, error) { if config.RuntimeRoot == "" { config.RuntimeRoot = root } + if config.RaftHeartbeatTick == 0 { + config.RaftHeartbeatTick = 1 + } + if config.RaftElectionTick == 0 { + // 10X heartbeat tick is the recommended ratio according to etcd docs. + config.RaftElectionTick = 10 * config.RaftHeartbeatTick + } + if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil { return nil, err } diff --git a/components/engine/daemon/cluster/noderunner.go b/components/engine/daemon/cluster/noderunner.go index 989551a6ca..45221347d7 100644 --- a/components/engine/daemon/cluster/noderunner.go +++ b/components/engine/daemon/cluster/noderunner.go @@ -124,11 +124,11 @@ func (n *nodeRunner) start(conf nodeStartConfig) error { n.cluster.config.Backend, n.cluster.config.PluginBackend, n.cluster.config.ImageBackend), - HeartbeatTick: 1, + HeartbeatTick: n.cluster.config.RaftHeartbeatTick, // Recommended value in etcd/raft is 10 x (HeartbeatTick). // Lower values were seen to have caused instability because of // frequent leader elections when running on flakey networks. - ElectionTick: 10, + ElectionTick: n.cluster.config.RaftElectionTick, UnlockKey: conf.lockKey, AutoLockManagers: conf.autolock, PluginGetter: n.cluster.config.Backend.PluginGetter(), diff --git a/components/engine/daemon/config/config.go b/components/engine/daemon/config/config.go index 71457ed0f0..e86d025a5a 100644 --- a/components/engine/daemon/config/config.go +++ b/components/engine/daemon/config/config.go @@ -158,7 +158,18 @@ type CommonConfig struct { // given to the /swarm/init endpoint and no advertise address is // specified. SwarmDefaultAdvertiseAddr string `json:"swarm-default-advertise-addr"` - MetricsAddress string `json:"metrics-addr"` + + // SwarmRaftHeartbeatTick is the number of ticks in time for swarm mode raft quorum heartbeat + // Typical value is 1 + SwarmRaftHeartbeatTick uint32 `json:"swarm-raft-heartbeat-tick"` + + // SwarmRaftElectionTick is the number of ticks to elapse before followers in the quorum can propose + // a new round of leader election. Default, recommended value is at least 10X that of Heartbeat tick. + // Higher values can make the quorum less sensitive to transient faults in the environment, but this also + // means it takes longer for the managers to detect a down leader. + SwarmRaftElectionTick uint32 `json:"swarm-raft-election-tick"` + + MetricsAddress string `json:"metrics-addr"` LogConfig BridgeConfig // bridgeConfig holds bridge network specific configuration.