From d6aaa8daf198a3f3fe3460f897bbbed792fb8b0e Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 3 May 2019 09:59:54 -0700 Subject: [PATCH 1/4] layer: protect mountedLayer.references Add a mutex to protect concurrent access to mountedLayer.references map. Signed-off-by: Kir Kolyshkin (cherry picked from commit f73b5cb4e8b9a23ad6700577840582d66017a733) Signed-off-by: Sebastiaan van Stijn Upstream-commit: bb80a60be2a9236b0db27222310f568e1d3fba11 Component: engine --- components/engine/layer/mounted_layer.go | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/components/engine/layer/mounted_layer.go b/components/engine/layer/mounted_layer.go index d6858c662c..c5d9e0e488 100644 --- a/components/engine/layer/mounted_layer.go +++ b/components/engine/layer/mounted_layer.go @@ -2,6 +2,7 @@ package layer // import "github.com/docker/docker/layer" import ( "io" + "sync" "github.com/docker/docker/pkg/archive" "github.com/docker/docker/pkg/containerfs" @@ -15,6 +16,7 @@ type mountedLayer struct { path string layerStore *layerStore + sync.Mutex references map[RWLayer]*referencedRWLayer } @@ -62,16 +64,24 @@ func (ml *mountedLayer) getReference() RWLayer { ref := &referencedRWLayer{ mountedLayer: ml, } + ml.Lock() ml.references[ref] = ref + ml.Unlock() return ref } func (ml *mountedLayer) hasReferences() bool { - return len(ml.references) > 0 + ml.Lock() + ret := len(ml.references) > 0 + ml.Unlock() + + return ret } func (ml *mountedLayer) deleteReference(ref RWLayer) error { + ml.Lock() + defer ml.Unlock() if _, ok := ml.references[ref]; !ok { return ErrLayerNotRetained } @@ -81,7 +91,9 @@ func (ml *mountedLayer) deleteReference(ref RWLayer) error { func (ml *mountedLayer) retakeReference(r RWLayer) { if ref, ok := r.(*referencedRWLayer); ok { + ml.Lock() ml.references[ref] = ref + ml.Unlock() } } From 89f76f3cc6036a3ad400690ebe56be9f6efd5c91 Mon Sep 17 00:00:00 2001 From: Xinfeng Liu Date: Tue, 23 Apr 2019 02:33:20 +0000 Subject: [PATCH 2/4] layer: optimize layerStore mountL Goroutine stack analisys shown some lock contention while doing massively (100 instances of `docker rm`) parallel image removal, with many goroutines waiting for the mountL mutex. Optimize it. With this commit, the above operation is about 3x faster, with no noticeable change to container creation times (tested on aufs and overlay2). kolyshkin@: - squashed commits - added description - protected CreateRWLayer against name collisions by temporary assiging nil to ls.mounts[name], and treating nil as "non-existent" in all the other functions. Signed-off-by: Kir Kolyshkin (cherry picked from commit 05250a4f0094e6802dd7d338d632ea632d0c7e34) Signed-off-by: Sebastiaan van Stijn Upstream-commit: 78cbf4d1388a4eef155f08da0d2422e3a2cad11f Component: engine --- components/engine/layer/layer_store.go | 50 ++++++++++++++++---------- components/engine/layer/migration.go | 6 ++-- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/components/engine/layer/layer_store.go b/components/engine/layer/layer_store.go index 1601465c04..737283cc2d 100644 --- a/components/engine/layer/layer_store.go +++ b/components/engine/layer/layer_store.go @@ -189,7 +189,9 @@ func (ls *layerStore) loadLayer(layer ChainID) (*roLayer, error) { } func (ls *layerStore) loadMount(mount string) error { - if _, ok := ls.mounts[mount]; ok { + ls.mountL.Lock() + defer ls.mountL.Unlock() + if m := ls.mounts[mount]; m != nil { return nil } @@ -477,7 +479,7 @@ func (ls *layerStore) Release(l Layer) ([]Metadata, error) { return ls.releaseLayer(layer) } -func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWLayerOpts) (RWLayer, error) { +func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWLayerOpts) (_ RWLayer, err error) { var ( storageOpt map[string]string initFunc MountInit @@ -491,13 +493,21 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL } ls.mountL.Lock() - defer ls.mountL.Unlock() - m, ok := ls.mounts[name] - if ok { + if _, ok := ls.mounts[name]; ok { + ls.mountL.Unlock() return nil, ErrMountNameConflict } + // Avoid name collision by temporary assigning nil + ls.mounts[name] = nil + ls.mountL.Unlock() + defer func() { + if err != nil { + ls.mountL.Lock() + delete(ls.mounts, name) + ls.mountL.Unlock() + } + }() - var err error var pid string var p *roLayer if string(parent) != "" { @@ -517,7 +527,7 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL }() } - m = &mountedLayer{ + m := &mountedLayer{ name: name, parent: p, mountID: ls.mountID(name), @@ -528,7 +538,7 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL if initFunc != nil { pid, err = ls.initMount(m.mountID, pid, mountLabel, initFunc, storageOpt) if err != nil { - return nil, err + return } m.initID = pid } @@ -538,10 +548,10 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL } if err = ls.driver.CreateReadWrite(m.mountID, pid, createOpts); err != nil { - return nil, err + return } if err = ls.saveMount(m); err != nil { - return nil, err + return } return m.getReference(), nil @@ -549,9 +559,9 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL func (ls *layerStore) GetRWLayer(id string) (RWLayer, error) { ls.mountL.Lock() - defer ls.mountL.Unlock() - mount, ok := ls.mounts[id] - if !ok { + mount := ls.mounts[id] + ls.mountL.Unlock() + if mount == nil { return nil, ErrMountDoesNotExist } @@ -561,8 +571,8 @@ func (ls *layerStore) GetRWLayer(id string) (RWLayer, error) { func (ls *layerStore) GetMountID(id string) (string, error) { ls.mountL.Lock() defer ls.mountL.Unlock() - mount, ok := ls.mounts[id] - if !ok { + mount := ls.mounts[id] + if mount == nil { return "", ErrMountDoesNotExist } logrus.Debugf("GetMountID id: %s -> mountID: %s", id, mount.mountID) @@ -572,9 +582,9 @@ func (ls *layerStore) GetMountID(id string) (string, error) { func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) { ls.mountL.Lock() - defer ls.mountL.Unlock() - m, ok := ls.mounts[l.Name()] - if !ok { + m := ls.mounts[l.Name()] + ls.mountL.Unlock() + if m == nil { return []Metadata{}, nil } @@ -606,7 +616,9 @@ func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) { return nil, err } + ls.mountL.Lock() delete(ls.mounts, m.Name()) + ls.mountL.Unlock() ls.layerL.Lock() defer ls.layerL.Unlock() @@ -634,7 +646,9 @@ func (ls *layerStore) saveMount(mount *mountedLayer) error { } } + ls.mountL.Lock() ls.mounts[mount.name] = mount + ls.mountL.Unlock() return nil } diff --git a/components/engine/layer/migration.go b/components/engine/layer/migration.go index 2668ea96bb..775e552fee 100644 --- a/components/engine/layer/migration.go +++ b/components/engine/layer/migration.go @@ -18,9 +18,9 @@ import ( // after migration the layer may be retrieved by the given name. func (ls *layerStore) CreateRWLayerByGraphID(name, graphID string, parent ChainID) (err error) { ls.mountL.Lock() - defer ls.mountL.Unlock() - m, ok := ls.mounts[name] - if ok { + m := ls.mounts[name] + ls.mountL.Unlock() + if m != nil { if m.parent.chainID != parent { return errors.New("name conflict, mismatched parent") } From 8b3a1bef0d23ea84e15406d4780c4b92f4a5bc86 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 16 May 2019 13:51:15 -0700 Subject: [PATCH 3/4] layer/CreateRWLayerByGraphID: remove This is an additon to commit 1fea38856a ("Remove v1.10 migrator") aka PR #38265. Since that one, CreateRWLayerByGraphID() is not used anywhere, so let's drop it. Signed-off-by: Kir Kolyshkin (cherry picked from commit b4e9b507655e7dbdfb44d4ee284dcd658b859b3f) Signed-off-by: Sebastiaan van Stijn Upstream-commit: 1576eaba3357b7aab5121b51b9ebc34e731082fe Component: engine --- components/engine/layer/migration.go | 59 -------- components/engine/layer/migration_test.go | 160 ---------------------- 2 files changed, 219 deletions(-) diff --git a/components/engine/layer/migration.go b/components/engine/layer/migration.go index 775e552fee..12500694f0 100644 --- a/components/engine/layer/migration.go +++ b/components/engine/layer/migration.go @@ -3,7 +3,6 @@ package layer // import "github.com/docker/docker/layer" import ( "compress/gzip" "errors" - "fmt" "io" "os" @@ -13,64 +12,6 @@ import ( "github.com/vbatts/tar-split/tar/storage" ) -// CreateRWLayerByGraphID creates a RWLayer in the layer store using -// the provided name with the given graphID. To get the RWLayer -// after migration the layer may be retrieved by the given name. -func (ls *layerStore) CreateRWLayerByGraphID(name, graphID string, parent ChainID) (err error) { - ls.mountL.Lock() - m := ls.mounts[name] - ls.mountL.Unlock() - if m != nil { - if m.parent.chainID != parent { - return errors.New("name conflict, mismatched parent") - } - if m.mountID != graphID { - return errors.New("mount already exists") - } - - return nil - } - - if !ls.driver.Exists(graphID) { - return fmt.Errorf("graph ID does not exist: %q", graphID) - } - - var p *roLayer - if string(parent) != "" { - p = ls.get(parent) - if p == nil { - return ErrLayerDoesNotExist - } - - // Release parent chain if error - defer func() { - if err != nil { - ls.layerL.Lock() - ls.releaseLayer(p) - ls.layerL.Unlock() - } - }() - } - - // TODO: Ensure graphID has correct parent - - m = &mountedLayer{ - name: name, - parent: p, - mountID: graphID, - layerStore: ls, - references: map[RWLayer]*referencedRWLayer{}, - } - - // Check for existing init layer - initID := fmt.Sprintf("%s-init", graphID) - if ls.driver.Exists(initID) { - m.initID = initID - } - - return ls.saveMount(m) -} - func (ls *layerStore) ChecksumForGraphID(id, parent, oldTarDataPath, newTarDataPath string) (diffID DiffID, size int64, err error) { defer func() { if err != nil { diff --git a/components/engine/layer/migration_test.go b/components/engine/layer/migration_test.go index 923166371c..2b5c3301f8 100644 --- a/components/engine/layer/migration_test.go +++ b/components/engine/layer/migration_test.go @@ -3,7 +3,6 @@ package layer // import "github.com/docker/docker/layer" import ( "bytes" "compress/gzip" - "fmt" "io" "io/ioutil" "os" @@ -12,7 +11,6 @@ import ( "testing" "github.com/docker/docker/daemon/graphdriver" - "github.com/docker/docker/pkg/archive" "github.com/docker/docker/pkg/stringid" "github.com/vbatts/tar-split/tar/asm" "github.com/vbatts/tar-split/tar/storage" @@ -269,161 +267,3 @@ func TestLayerMigrationNoTarsplit(t *testing.T) { assertMetadata(t, metadata, createMetadata(layer2a)) } - -func TestMountMigration(t *testing.T) { - // TODO Windows: Figure out why this is failing (obvious - paths... needs porting) - if runtime.GOOS == "windows" { - t.Skip("Failing on Windows") - } - ls, _, cleanup := newTestStore(t) - defer cleanup() - - baseFiles := []FileApplier{ - newTestFile("/root/.bashrc", []byte("# Boring configuration"), 0644), - newTestFile("/etc/profile", []byte("# Base configuration"), 0644), - } - initFiles := []FileApplier{ - newTestFile("/etc/hosts", []byte{}, 0644), - newTestFile("/etc/resolv.conf", []byte{}, 0644), - } - mountFiles := []FileApplier{ - newTestFile("/etc/hosts", []byte("localhost 127.0.0.1"), 0644), - newTestFile("/root/.bashrc", []byte("# Updated configuration"), 0644), - newTestFile("/root/testfile1.txt", []byte("nothing valuable"), 0644), - } - - initTar, err := tarFromFiles(initFiles...) - if err != nil { - t.Fatal(err) - } - - mountTar, err := tarFromFiles(mountFiles...) - if err != nil { - t.Fatal(err) - } - - graph := ls.(*layerStore).driver - - layer1, err := createLayer(ls, "", initWithFiles(baseFiles...)) - if err != nil { - t.Fatal(err) - } - - graphID1 := layer1.(*referencedCacheLayer).cacheID - - containerID := stringid.GenerateRandomID() - containerInit := fmt.Sprintf("%s-init", containerID) - - if err := graph.Create(containerInit, graphID1, nil); err != nil { - t.Fatal(err) - } - if _, err := graph.ApplyDiff(containerInit, graphID1, bytes.NewReader(initTar)); err != nil { - t.Fatal(err) - } - - if err := graph.Create(containerID, containerInit, nil); err != nil { - t.Fatal(err) - } - if _, err := graph.ApplyDiff(containerID, containerInit, bytes.NewReader(mountTar)); err != nil { - t.Fatal(err) - } - - if err := ls.(*layerStore).CreateRWLayerByGraphID("migration-mount", containerID, layer1.ChainID()); err != nil { - t.Fatal(err) - } - - rwLayer1, err := ls.GetRWLayer("migration-mount") - if err != nil { - t.Fatal(err) - } - - if _, err := rwLayer1.Mount(""); err != nil { - t.Fatal(err) - } - - changes, err := rwLayer1.Changes() - if err != nil { - t.Fatal(err) - } - - if expected := 5; len(changes) != expected { - t.Logf("Changes %#v", changes) - t.Fatalf("Wrong number of changes %d, expected %d", len(changes), expected) - } - - sortChanges(changes) - - assertChange(t, changes[0], archive.Change{ - Path: "/etc", - Kind: archive.ChangeModify, - }) - assertChange(t, changes[1], archive.Change{ - Path: "/etc/hosts", - Kind: archive.ChangeModify, - }) - assertChange(t, changes[2], archive.Change{ - Path: "/root", - Kind: archive.ChangeModify, - }) - assertChange(t, changes[3], archive.Change{ - Path: "/root/.bashrc", - Kind: archive.ChangeModify, - }) - assertChange(t, changes[4], archive.Change{ - Path: "/root/testfile1.txt", - Kind: archive.ChangeAdd, - }) - - if _, err := ls.CreateRWLayer("migration-mount", layer1.ChainID(), nil); err == nil { - t.Fatal("Expected error creating mount with same name") - } else if err != ErrMountNameConflict { - t.Fatal(err) - } - - rwLayer2, err := ls.GetRWLayer("migration-mount") - if err != nil { - t.Fatal(err) - } - - if getMountLayer(rwLayer1) != getMountLayer(rwLayer2) { - t.Fatal("Expected same layer from get with same name as from migrate") - } - - if _, err := rwLayer2.Mount(""); err != nil { - t.Fatal(err) - } - - if _, err := rwLayer2.Mount(""); err != nil { - t.Fatal(err) - } - - if metadata, err := ls.Release(layer1); err != nil { - t.Fatal(err) - } else if len(metadata) > 0 { - t.Fatalf("Expected no layers to be deleted, deleted %#v", metadata) - } - - if err := rwLayer1.Unmount(); err != nil { - t.Fatal(err) - } - - if _, err := ls.ReleaseRWLayer(rwLayer1); err != nil { - t.Fatal(err) - } - - if err := rwLayer2.Unmount(); err != nil { - t.Fatal(err) - } - if err := rwLayer2.Unmount(); err != nil { - t.Fatal(err) - } - metadata, err := ls.ReleaseRWLayer(rwLayer2) - if err != nil { - t.Fatal(err) - } - if len(metadata) == 0 { - t.Fatal("Expected base layer to be deleted when deleting mount") - } - - assertMetadata(t, metadata, createMetadata(layer1)) -} From e0456faac20cfc17d301da046d427e5b1641dce3 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 20 May 2019 14:46:54 -0700 Subject: [PATCH 4/4] layer: protect from same-name races As pointed out by Tonis, there's a race between ReleaseRWLayer() and GetRWLayer(): ``` ----- goroutine 1 ----- ----- goroutine 2 ----- ReleaseRWLayer() m := ls.mounts[l.Name()] ... m.deleteReference(l) m.hasReferences() ... GetRWLayer() ... mount := ls.mounts[id] ls.driver.Remove(m.mountID) ls.store.RemoveMount(m.name) return mount.getReference() delete(ls.mounts, m.Name()) ----------------------- ----------------------- ``` When something like this happens, GetRWLayer will return an RWLayer without a storage. Oops. There might be more races like this, and it seems the best solution is to lock by layer id/name by using pkg/locker. With this in place, name collision could not happen, so remove the part of previous commit that protected against it in CreateRWLayer (temporary nil assigmment and associated rollback). So, now we have * layerStore.mountL sync.Mutex to protect layerStore.mount map[] (against concurrent access); * mountedLayer's embedded `sync.Mutex` to protect its references map[]; * layerStore.layerL (which I haven't touched); * per-id locker, to avoid name conflicts and concurrent operations on the same rw layer. The whole rig seems to look more readable now (mutexes use is straightforward, no nested locks). Reported-by: Tonis Tiigi Signed-off-by: Kir Kolyshkin Signed-off-by: Kir Kolyshkin (cherry picked from commit af433dd200f8287305b1531d5058780be36b7e2e) Signed-off-by: Sebastiaan van Stijn Upstream-commit: 1ebe324c6a481de86a5b84494057a639773624f1 Component: engine --- components/engine/layer/layer_store.go | 42 +++++++++++++++----------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/components/engine/layer/layer_store.go b/components/engine/layer/layer_store.go index 737283cc2d..81730e9d92 100644 --- a/components/engine/layer/layer_store.go +++ b/components/engine/layer/layer_store.go @@ -10,6 +10,7 @@ import ( "github.com/docker/distribution" "github.com/docker/docker/daemon/graphdriver" "github.com/docker/docker/pkg/idtools" + "github.com/docker/docker/pkg/locker" "github.com/docker/docker/pkg/plugingetter" "github.com/docker/docker/pkg/stringid" "github.com/docker/docker/pkg/system" @@ -36,7 +37,11 @@ type layerStore struct { mounts map[string]*mountedLayer mountL sync.Mutex - os string + + // protect *RWLayer() methods from operating on the same name/id + locker *locker.Locker + + os string } // StoreOptions are the options used to create a new Store instance @@ -92,6 +97,7 @@ func newStoreFromGraphDriver(root string, driver graphdriver.Driver, os string) driver: driver, layerMap: map[ChainID]*roLayer{}, mounts: map[string]*mountedLayer{}, + locker: locker.New(), useTarSplit: !caps.ReproducesExactDiffs, os: os, } @@ -191,7 +197,7 @@ func (ls *layerStore) loadLayer(layer ChainID) (*roLayer, error) { func (ls *layerStore) loadMount(mount string) error { ls.mountL.Lock() defer ls.mountL.Unlock() - if m := ls.mounts[mount]; m != nil { + if _, ok := ls.mounts[mount]; ok { return nil } @@ -492,21 +498,15 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL initFunc = opts.InitFunc } + ls.locker.Lock(name) + defer ls.locker.Unlock(name) + ls.mountL.Lock() - if _, ok := ls.mounts[name]; ok { - ls.mountL.Unlock() + _, ok := ls.mounts[name] + ls.mountL.Unlock() + if ok { return nil, ErrMountNameConflict } - // Avoid name collision by temporary assigning nil - ls.mounts[name] = nil - ls.mountL.Unlock() - defer func() { - if err != nil { - ls.mountL.Lock() - delete(ls.mounts, name) - ls.mountL.Unlock() - } - }() var pid string var p *roLayer @@ -558,6 +558,9 @@ func (ls *layerStore) CreateRWLayer(name string, parent ChainID, opts *CreateRWL } func (ls *layerStore) GetRWLayer(id string) (RWLayer, error) { + ls.locker.Lock(id) + defer ls.locker.Unlock(id) + ls.mountL.Lock() mount := ls.mounts[id] ls.mountL.Unlock() @@ -570,8 +573,9 @@ func (ls *layerStore) GetRWLayer(id string) (RWLayer, error) { func (ls *layerStore) GetMountID(id string) (string, error) { ls.mountL.Lock() - defer ls.mountL.Unlock() mount := ls.mounts[id] + ls.mountL.Unlock() + if mount == nil { return "", ErrMountDoesNotExist } @@ -581,8 +585,12 @@ func (ls *layerStore) GetMountID(id string) (string, error) { } func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) { + name := l.Name() + ls.locker.Lock(name) + defer ls.locker.Unlock(name) + ls.mountL.Lock() - m := ls.mounts[l.Name()] + m := ls.mounts[name] ls.mountL.Unlock() if m == nil { return []Metadata{}, nil @@ -617,7 +625,7 @@ func (ls *layerStore) ReleaseRWLayer(l RWLayer) ([]Metadata, error) { } ls.mountL.Lock() - delete(ls.mounts, m.Name()) + delete(ls.mounts, name) ls.mountL.Unlock() ls.layerL.Lock()