Files
docker-cli/components/engine/daemon/networkdriver/bridge/driver.go
Michael Crosby 45221d6bfb Ensure that bridge driver does not use global mappers
This has a few hacks in it but it ensures that the bridge driver does
not use global state in the mappers, atleast as much as possible at this
point without further refactoring.  Some of the exported fields are
hacks to handle the daemon port mapping but this results in a much
cleaner approach and completely remove the global state from the mapper
and allocator.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
Upstream-commit: d8c628cf082a50c0a2a5e381a21da8279a5462b4
Component: engine
2015-03-30 18:28:24 -07:00

726 lines
21 KiB
Go

package bridge
import (
"encoding/hex"
"errors"
"fmt"
"io/ioutil"
"net"
"os"
"strings"
"sync"
"github.com/Sirupsen/logrus"
"github.com/docker/docker/daemon/networkdriver"
"github.com/docker/docker/daemon/networkdriver/ipallocator"
"github.com/docker/docker/daemon/networkdriver/portmapper"
"github.com/docker/docker/engine"
"github.com/docker/docker/nat"
"github.com/docker/docker/pkg/iptables"
"github.com/docker/docker/pkg/parsers/kernel"
"github.com/docker/docker/pkg/resolvconf"
"github.com/docker/libcontainer/netlink"
)
const (
DefaultNetworkBridge = "docker0"
MaxAllocatedPortAttempts = 10
)
// Network interface represents the networking stack of a container
type networkInterface struct {
IP net.IP
IPv6 net.IP
PortMappings []net.Addr // There are mappings to the host interfaces
}
type ifaces struct {
c map[string]*networkInterface
sync.Mutex
}
func (i *ifaces) Set(key string, n *networkInterface) {
i.Lock()
i.c[key] = n
i.Unlock()
}
func (i *ifaces) Get(key string) *networkInterface {
i.Lock()
res := i.c[key]
i.Unlock()
return res
}
var (
addrs = []string{
// Here we don't follow the convention of using the 1st IP of the range for the gateway.
// This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges.
// In theory this shouldn't matter - in practice there's bound to be a few scripts relying
// on the internal addressing or other stupid things like that.
// They shouldn't, but hey, let's not break them unless we really have to.
"172.17.42.1/16", // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23
"10.0.42.1/16", // Don't even try using the entire /8, that's too intrusive
"10.1.42.1/16",
"10.42.42.1/16",
"172.16.42.1/24",
"172.16.43.1/24",
"172.16.44.1/24",
"10.0.42.1/24",
"10.0.43.1/24",
"192.168.42.1/24",
"192.168.43.1/24",
"192.168.44.1/24",
}
bridgeIface string
bridgeIPv4Network *net.IPNet
bridgeIPv6Addr net.IP
globalIPv6Network *net.IPNet
portMapper *portmapper.PortMapper
defaultBindingIP = net.ParseIP("0.0.0.0")
currentInterfaces = ifaces{c: make(map[string]*networkInterface)}
ipAllocator = ipallocator.New()
)
func InitDriver(job *engine.Job) error {
var (
networkv4 *net.IPNet
networkv6 *net.IPNet
addrv4 net.Addr
addrsv6 []net.Addr
enableIPTables = job.GetenvBool("EnableIptables")
enableIPv6 = job.GetenvBool("EnableIPv6")
icc = job.GetenvBool("InterContainerCommunication")
ipMasq = job.GetenvBool("EnableIpMasq")
ipForward = job.GetenvBool("EnableIpForward")
bridgeIP = job.Getenv("BridgeIP")
bridgeIPv6 = "fe80::1/64"
fixedCIDR = job.Getenv("FixedCIDR")
fixedCIDRv6 = job.Getenv("FixedCIDRv6")
)
portMapper = portmapper.New()
if defaultIP := job.Getenv("DefaultBindingIP"); defaultIP != "" {
defaultBindingIP = net.ParseIP(defaultIP)
}
bridgeIface = job.Getenv("BridgeIface")
usingDefaultBridge := false
if bridgeIface == "" {
usingDefaultBridge = true
bridgeIface = DefaultNetworkBridge
}
addrv4, addrsv6, err := networkdriver.GetIfaceAddr(bridgeIface)
if err != nil {
// No Bridge existent, create one
// If we're not using the default bridge, fail without trying to create it
if !usingDefaultBridge {
return err
}
// If the iface is not found, try to create it
if err := configureBridge(bridgeIP, bridgeIPv6, enableIPv6); err != nil {
return err
}
addrv4, addrsv6, err = networkdriver.GetIfaceAddr(bridgeIface)
if err != nil {
return err
}
if fixedCIDRv6 != "" {
// Setting route to global IPv6 subnet
logrus.Infof("Adding route to IPv6 network %q via device %q", fixedCIDRv6, bridgeIface)
if err := netlink.AddRoute(fixedCIDRv6, "", "", bridgeIface); err != nil {
logrus.Fatalf("Could not add route to IPv6 network %q via device %q", fixedCIDRv6, bridgeIface)
}
}
} else {
// Bridge exists already, getting info...
// Validate that the bridge ip matches the ip specified by BridgeIP
if bridgeIP != "" {
networkv4 = addrv4.(*net.IPNet)
bip, _, err := net.ParseCIDR(bridgeIP)
if err != nil {
return err
}
if !networkv4.IP.Equal(bip) {
return fmt.Errorf("Bridge ip (%s) does not match existing bridge configuration %s", networkv4.IP, bip)
}
}
// A bridge might exist but not have any IPv6 addr associated with it yet
// (for example, an existing Docker installation that has only been used
// with IPv4 and docker0 already is set up) In that case, we can perform
// the bridge init for IPv6 here, else we will error out below if --ipv6=true
if len(addrsv6) == 0 && enableIPv6 {
if err := setupIPv6Bridge(bridgeIPv6); err != nil {
return err
}
// Recheck addresses now that IPv6 is setup on the bridge
addrv4, addrsv6, err = networkdriver.GetIfaceAddr(bridgeIface)
if err != nil {
return err
}
}
// TODO: Check if route to fixedCIDRv6 is set
}
if enableIPv6 {
bip6, _, err := net.ParseCIDR(bridgeIPv6)
if err != nil {
return err
}
found := false
for _, addrv6 := range addrsv6 {
networkv6 = addrv6.(*net.IPNet)
if networkv6.IP.Equal(bip6) {
found = true
break
}
}
if !found {
return fmt.Errorf("Bridge IPv6 does not match existing bridge configuration %s", bip6)
}
}
networkv4 = addrv4.(*net.IPNet)
if enableIPv6 {
if len(addrsv6) == 0 {
return errors.New("IPv6 enabled but no IPv6 detected")
}
bridgeIPv6Addr = networkv6.IP
}
// Configure iptables for link support
if enableIPTables {
if err := setupIPTables(addrv4, icc, ipMasq); err != nil {
return err
}
}
if ipForward {
// Enable IPv4 forwarding
if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte{'1', '\n'}, 0644); err != nil {
logrus.Warnf("WARNING: unable to enable IPv4 forwarding: %s\n", err)
}
if fixedCIDRv6 != "" {
// Enable IPv6 forwarding
if err := ioutil.WriteFile("/proc/sys/net/ipv6/conf/default/forwarding", []byte{'1', '\n'}, 0644); err != nil {
logrus.Warnf("WARNING: unable to enable IPv6 default forwarding: %s\n", err)
}
if err := ioutil.WriteFile("/proc/sys/net/ipv6/conf/all/forwarding", []byte{'1', '\n'}, 0644); err != nil {
logrus.Warnf("WARNING: unable to enable IPv6 all forwarding: %s\n", err)
}
}
}
// We can always try removing the iptables
if err := iptables.RemoveExistingChain("DOCKER", iptables.Nat); err != nil {
return err
}
if enableIPTables {
_, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Nat)
if err != nil {
return err
}
chain, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Filter)
if err != nil {
return err
}
portMapper.SetIptablesChain(chain)
}
bridgeIPv4Network = networkv4
if fixedCIDR != "" {
_, subnet, err := net.ParseCIDR(fixedCIDR)
if err != nil {
return err
}
logrus.Debugf("Subnet: %v", subnet)
if err := ipAllocator.RegisterSubnet(bridgeIPv4Network, subnet); err != nil {
return err
}
}
if fixedCIDRv6 != "" {
_, subnet, err := net.ParseCIDR(fixedCIDRv6)
if err != nil {
return err
}
logrus.Debugf("Subnet: %v", subnet)
if err := ipAllocator.RegisterSubnet(subnet, subnet); err != nil {
return err
}
globalIPv6Network = subnet
}
// Block BridgeIP in IP allocator
ipAllocator.RequestIP(bridgeIPv4Network, bridgeIPv4Network.IP)
// https://github.com/docker/docker/issues/2768
job.Eng.HackSetGlobalVar("httpapi.bridgeIP", bridgeIPv4Network.IP)
for name, f := range map[string]engine.Handler{
"allocate_interface": Allocate,
"release_interface": Release,
"allocate_port": AllocatePort,
"link": LinkContainers,
} {
if err := job.Eng.Register(name, f); err != nil {
return err
}
}
return nil
}
func setupIPTables(addr net.Addr, icc, ipmasq bool) error {
// Enable NAT
if ipmasq {
natArgs := []string{"-s", addr.String(), "!", "-o", bridgeIface, "-j", "MASQUERADE"}
if !iptables.Exists(iptables.Nat, "POSTROUTING", natArgs...) {
if output, err := iptables.Raw(append([]string{
"-t", string(iptables.Nat), "-I", "POSTROUTING"}, natArgs...)...); err != nil {
return fmt.Errorf("Unable to enable network bridge NAT: %s", err)
} else if len(output) != 0 {
return &iptables.ChainError{Chain: "POSTROUTING", Output: output}
}
}
}
var (
args = []string{"-i", bridgeIface, "-o", bridgeIface, "-j"}
acceptArgs = append(args, "ACCEPT")
dropArgs = append(args, "DROP")
)
if !icc {
iptables.Raw(append([]string{"-D", "FORWARD"}, acceptArgs...)...)
if !iptables.Exists(iptables.Filter, "FORWARD", dropArgs...) {
logrus.Debugf("Disable inter-container communication")
if output, err := iptables.Raw(append([]string{"-I", "FORWARD"}, dropArgs...)...); err != nil {
return fmt.Errorf("Unable to prevent intercontainer communication: %s", err)
} else if len(output) != 0 {
return fmt.Errorf("Error disabling intercontainer communication: %s", output)
}
}
} else {
iptables.Raw(append([]string{"-D", "FORWARD"}, dropArgs...)...)
if !iptables.Exists(iptables.Filter, "FORWARD", acceptArgs...) {
logrus.Debugf("Enable inter-container communication")
if output, err := iptables.Raw(append([]string{"-I", "FORWARD"}, acceptArgs...)...); err != nil {
return fmt.Errorf("Unable to allow intercontainer communication: %s", err)
} else if len(output) != 0 {
return fmt.Errorf("Error enabling intercontainer communication: %s", output)
}
}
}
// Accept all non-intercontainer outgoing packets
outgoingArgs := []string{"-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"}
if !iptables.Exists(iptables.Filter, "FORWARD", outgoingArgs...) {
if output, err := iptables.Raw(append([]string{"-I", "FORWARD"}, outgoingArgs...)...); err != nil {
return fmt.Errorf("Unable to allow outgoing packets: %s", err)
} else if len(output) != 0 {
return &iptables.ChainError{Chain: "FORWARD outgoing", Output: output}
}
}
// Accept incoming packets for existing connections
existingArgs := []string{"-o", bridgeIface, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"}
if !iptables.Exists(iptables.Filter, "FORWARD", existingArgs...) {
if output, err := iptables.Raw(append([]string{"-I", "FORWARD"}, existingArgs...)...); err != nil {
return fmt.Errorf("Unable to allow incoming packets: %s", err)
} else if len(output) != 0 {
return &iptables.ChainError{Chain: "FORWARD incoming", Output: output}
}
}
return nil
}
func RequestPort(ip net.IP, proto string, port int) (int, error) {
return portMapper.Allocator.RequestPort(ip, proto, port)
}
// configureBridge attempts to create and configure a network bridge interface named `bridgeIface` on the host
// If bridgeIP is empty, it will try to find a non-conflicting IP from the Docker-specified private ranges
// If the bridge `bridgeIface` already exists, it will only perform the IP address association with the existing
// bridge (fixes issue #8444)
// If an address which doesn't conflict with existing interfaces can't be found, an error is returned.
func configureBridge(bridgeIP string, bridgeIPv6 string, enableIPv6 bool) error {
nameservers := []string{}
resolvConf, _ := resolvconf.Get()
// We don't check for an error here, because we don't really care
// if we can't read /etc/resolv.conf. So instead we skip the append
// if resolvConf is nil. It either doesn't exist, or we can't read it
// for some reason.
if resolvConf != nil {
nameservers = append(nameservers, resolvconf.GetNameserversAsCIDR(resolvConf)...)
}
var ifaceAddr string
if len(bridgeIP) != 0 {
_, _, err := net.ParseCIDR(bridgeIP)
if err != nil {
return err
}
ifaceAddr = bridgeIP
} else {
for _, addr := range addrs {
_, dockerNetwork, err := net.ParseCIDR(addr)
if err != nil {
return err
}
if err := networkdriver.CheckNameserverOverlaps(nameservers, dockerNetwork); err == nil {
if err := networkdriver.CheckRouteOverlaps(dockerNetwork); err == nil {
ifaceAddr = addr
break
} else {
logrus.Debugf("%s %s", addr, err)
}
}
}
}
if ifaceAddr == "" {
return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", bridgeIface, bridgeIface)
}
logrus.Debugf("Creating bridge %s with network %s", bridgeIface, ifaceAddr)
if err := createBridgeIface(bridgeIface); err != nil {
// The bridge may already exist, therefore we can ignore an "exists" error
if !os.IsExist(err) {
return err
}
}
iface, err := net.InterfaceByName(bridgeIface)
if err != nil {
return err
}
ipAddr, ipNet, err := net.ParseCIDR(ifaceAddr)
if err != nil {
return err
}
if err := netlink.NetworkLinkAddIp(iface, ipAddr, ipNet); err != nil {
return fmt.Errorf("Unable to add private network: %s", err)
}
if enableIPv6 {
if err := setupIPv6Bridge(bridgeIPv6); err != nil {
return err
}
}
if err := netlink.NetworkLinkUp(iface); err != nil {
return fmt.Errorf("Unable to start network bridge: %s", err)
}
return nil
}
func setupIPv6Bridge(bridgeIPv6 string) error {
iface, err := net.InterfaceByName(bridgeIface)
if err != nil {
return err
}
// Enable IPv6 on the bridge
procFile := "/proc/sys/net/ipv6/conf/" + iface.Name + "/disable_ipv6"
if err := ioutil.WriteFile(procFile, []byte{'0', '\n'}, 0644); err != nil {
return fmt.Errorf("Unable to enable IPv6 addresses on bridge: %v", err)
}
ipAddr6, ipNet6, err := net.ParseCIDR(bridgeIPv6)
if err != nil {
return fmt.Errorf("Unable to parse bridge IPv6 address: %q, error: %v", bridgeIPv6, err)
}
if err := netlink.NetworkLinkAddIp(iface, ipAddr6, ipNet6); err != nil {
return fmt.Errorf("Unable to add private IPv6 network: %v", err)
}
return nil
}
func createBridgeIface(name string) error {
kv, err := kernel.GetKernelVersion()
// Only set the bridge's mac address if the kernel version is > 3.3
// before that it was not supported
setBridgeMacAddr := err == nil && (kv.Kernel >= 3 && kv.Major >= 3)
logrus.Debugf("setting bridge mac address = %v", setBridgeMacAddr)
return netlink.CreateBridge(name, setBridgeMacAddr)
}
// Generate a IEEE802 compliant MAC address from the given IP address.
//
// The generator is guaranteed to be consistent: the same IP will always yield the same
// MAC address. This is to avoid ARP cache issues.
func generateMacAddr(ip net.IP) net.HardwareAddr {
hw := make(net.HardwareAddr, 6)
// The first byte of the MAC address has to comply with these rules:
// 1. Unicast: Set the least-significant bit to 0.
// 2. Address is locally administered: Set the second-least-significant bit (U/L) to 1.
// 3. As "small" as possible: The veth address has to be "smaller" than the bridge address.
hw[0] = 0x02
// The first 24 bits of the MAC represent the Organizationally Unique Identifier (OUI).
// Since this address is locally administered, we can do whatever we want as long as
// it doesn't conflict with other addresses.
hw[1] = 0x42
// Insert the IP address into the last 32 bits of the MAC address.
// This is a simple way to guarantee the address will be consistent and unique.
copy(hw[2:], ip.To4())
return hw
}
func linkLocalIPv6FromMac(mac string) (string, error) {
hx := strings.Replace(mac, ":", "", -1)
hw, err := hex.DecodeString(hx)
if err != nil {
return "", errors.New("Could not parse MAC address " + mac)
}
hw[0] ^= 0x2
return fmt.Sprintf("fe80::%x%x:%xff:fe%x:%x%x/64", hw[0], hw[1], hw[2], hw[3], hw[4], hw[5]), nil
}
// Allocate a network interface
func Allocate(job *engine.Job) error {
var (
ip net.IP
mac net.HardwareAddr
err error
id = job.Args[0]
requestedIP = net.ParseIP(job.Getenv("RequestedIP"))
requestedIPv6 = net.ParseIP(job.Getenv("RequestedIPv6"))
globalIPv6 net.IP
)
ip, err = ipAllocator.RequestIP(bridgeIPv4Network, requestedIP)
if err != nil {
return err
}
// If no explicit mac address was given, generate a random one.
if mac, err = net.ParseMAC(job.Getenv("RequestedMac")); err != nil {
mac = generateMacAddr(ip)
}
if globalIPv6Network != nil {
// If globalIPv6Network Size is at least a /80 subnet generate IPv6 address from MAC address
netmaskOnes, _ := globalIPv6Network.Mask.Size()
if requestedIPv6 == nil && netmaskOnes <= 80 {
requestedIPv6 = make(net.IP, len(globalIPv6Network.IP))
copy(requestedIPv6, globalIPv6Network.IP)
for i, h := range mac {
requestedIPv6[i+10] = h
}
}
globalIPv6, err = ipAllocator.RequestIP(globalIPv6Network, requestedIPv6)
if err != nil {
logrus.Errorf("Allocator: RequestIP v6: %v", err)
return err
}
logrus.Infof("Allocated IPv6 %s", globalIPv6)
}
out := engine.Env{}
out.Set("IP", ip.String())
out.Set("Mask", bridgeIPv4Network.Mask.String())
out.Set("Gateway", bridgeIPv4Network.IP.String())
out.Set("MacAddress", mac.String())
out.Set("Bridge", bridgeIface)
size, _ := bridgeIPv4Network.Mask.Size()
out.SetInt("IPPrefixLen", size)
// If linklocal IPv6
localIPv6Net, err := linkLocalIPv6FromMac(mac.String())
if err != nil {
return err
}
localIPv6, _, _ := net.ParseCIDR(localIPv6Net)
out.Set("LinkLocalIPv6", localIPv6.String())
out.Set("MacAddress", mac.String())
if globalIPv6Network != nil {
out.Set("GlobalIPv6", globalIPv6.String())
sizev6, _ := globalIPv6Network.Mask.Size()
out.SetInt("GlobalIPv6PrefixLen", sizev6)
out.Set("IPv6Gateway", bridgeIPv6Addr.String())
}
currentInterfaces.Set(id, &networkInterface{
IP: ip,
IPv6: globalIPv6,
})
out.WriteTo(job.Stdout)
return nil
}
// Release an interface for a select ip
func Release(job *engine.Job) error {
var (
id = job.Args[0]
containerInterface = currentInterfaces.Get(id)
)
if containerInterface == nil {
return fmt.Errorf("No network information to release for %s", id)
}
for _, nat := range containerInterface.PortMappings {
if err := portMapper.Unmap(nat); err != nil {
logrus.Infof("Unable to unmap port %s: %s", nat, err)
}
}
if err := ipAllocator.ReleaseIP(bridgeIPv4Network, containerInterface.IP); err != nil {
logrus.Infof("Unable to release IPv4 %s", err)
}
if globalIPv6Network != nil {
if err := ipAllocator.ReleaseIP(globalIPv6Network, containerInterface.IPv6); err != nil {
logrus.Infof("Unable to release IPv6 %s", err)
}
}
return nil
}
// Allocate an external port and map it to the interface
func AllocatePort(job *engine.Job) error {
var (
err error
ip = defaultBindingIP
id = job.Args[0]
hostIP = job.Getenv("HostIP")
hostPort = job.GetenvInt("HostPort")
containerPort = job.GetenvInt("ContainerPort")
proto = job.Getenv("Proto")
network = currentInterfaces.Get(id)
)
if hostIP != "" {
ip = net.ParseIP(hostIP)
if ip == nil {
return fmt.Errorf("Bad parameter: invalid host ip %s", hostIP)
}
}
// host ip, proto, and host port
var container net.Addr
switch proto {
case "tcp":
container = &net.TCPAddr{IP: network.IP, Port: containerPort}
case "udp":
container = &net.UDPAddr{IP: network.IP, Port: containerPort}
default:
return fmt.Errorf("unsupported address type %s", proto)
}
//
// Try up to 10 times to get a port that's not already allocated.
//
// In the event of failure to bind, return the error that portmapper.Map
// yields.
//
var host net.Addr
for i := 0; i < MaxAllocatedPortAttempts; i++ {
if host, err = portMapper.Map(container, ip, hostPort); err == nil {
break
}
// There is no point in immediately retrying to map an explicitly
// chosen port.
if hostPort != 0 {
logrus.Warnf("Failed to allocate and map port %d: %s", hostPort, err)
break
}
logrus.Warnf("Failed to allocate and map port: %s, retry: %d", err, i+1)
}
if err != nil {
return err
}
network.PortMappings = append(network.PortMappings, host)
out := engine.Env{}
switch netAddr := host.(type) {
case *net.TCPAddr:
out.Set("HostIP", netAddr.IP.String())
out.SetInt("HostPort", netAddr.Port)
case *net.UDPAddr:
out.Set("HostIP", netAddr.IP.String())
out.SetInt("HostPort", netAddr.Port)
}
if _, err := out.WriteTo(job.Stdout); err != nil {
return err
}
return nil
}
func LinkContainers(job *engine.Job) error {
var (
action = job.Args[0]
nfAction iptables.Action
childIP = job.Getenv("ChildIP")
parentIP = job.Getenv("ParentIP")
ignoreErrors = job.GetenvBool("IgnoreErrors")
ports = job.GetenvList("Ports")
)
switch action {
case "-A":
nfAction = iptables.Append
case "-I":
nfAction = iptables.Insert
case "-D":
nfAction = iptables.Delete
default:
return fmt.Errorf("Invalid action '%s' specified", action)
}
ip1 := net.ParseIP(parentIP)
if ip1 == nil {
return fmt.Errorf("Parent IP '%s' is invalid", parentIP)
}
ip2 := net.ParseIP(childIP)
if ip2 == nil {
return fmt.Errorf("Child IP '%s' is invalid", childIP)
}
chain := iptables.Chain{Name: "DOCKER", Bridge: bridgeIface}
for _, p := range ports {
port := nat.Port(p)
if err := chain.Link(nfAction, ip1, ip2, port.Int(), port.Proto()); !ignoreErrors && err != nil {
return err
}
}
return nil
}