Merge pull request #16381 from rhvgoyal/deferred_deletion

devicemapper: Implement deferred deletion capability
Upstream-commit: 8cee3018740374ae44ae5f71b8032cc1b3d98cb4
Component: engine
This commit is contained in:
Alexander Morozov
2015-10-07 09:27:41 -07:00
5 changed files with 280 additions and 36 deletions

View File

@ -40,6 +40,7 @@ var (
logLevel = devicemapper.LogLevelFatal
driverDeferredRemovalSupport = false
enableDeferredRemoval = false
enableDeferredDeletion = false
)
const deviceSetMetaFile string = "deviceset-metadata"
@ -57,6 +58,7 @@ type devInfo struct {
Size uint64 `json:"size"`
TransactionID uint64 `json:"transaction_id"`
Initialized bool `json:"initialized"`
Deleted bool `json:"deleted"`
devices *DeviceSet
mountCount int
@ -106,7 +108,10 @@ type DeviceSet struct {
transaction `json:"-"`
overrideUdevSyncCheck bool
deferredRemove bool // use deferred removal
deferredDelete bool // use deferred deletion
BaseDeviceUUID string //save UUID of base device
nrDeletedDevices uint //number of deleted devices
deletionWorkerTicker *time.Ticker
}
// DiskUsage contains information about disk usage and is used when reporting Status of a device.
@ -141,6 +146,13 @@ type Status struct {
UdevSyncSupported bool
// DeferredRemoveEnabled is true then the device is not unmounted.
DeferredRemoveEnabled bool
// True if deferred deletion is enabled. This is different from
// deferred removal. "removal" means that device mapper device is
// deactivated. Thin device is still in thin pool and can be activated
// again. But "deletion" means that thin device will be deleted from
// thin pool and it can't be activated again.
DeferredDeleteEnabled bool
DeferredDeletedDeviceCount uint
}
// Structure used to export image/container metadata in docker inspect.
@ -375,6 +387,18 @@ func (devices *DeviceSet) lookupDeviceWithLock(hash string) (*devInfo, error) {
return info, err
}
// This function relies on that device hash map has been loaded in advance.
// Should be called with devices.Lock() held.
func (devices *DeviceSet) constructDeviceIDMap() {
logrus.Debugf("[deviceset] constructDeviceIDMap()")
defer logrus.Debugf("[deviceset] constructDeviceIDMap() END")
for _, info := range devices.Devices {
devices.markDeviceIDUsed(info.DeviceID)
logrus.Debugf("Added deviceId=%d to DeviceIdMap", info.DeviceID)
}
}
func (devices *DeviceSet) deviceFileWalkFunction(path string, finfo os.FileInfo) error {
// Skip some of the meta files which are not device files.
@ -405,19 +429,18 @@ func (devices *DeviceSet) deviceFileWalkFunction(path string, finfo os.FileInfo)
hash = ""
}
dinfo := devices.loadMetadata(hash)
if dinfo == nil {
return fmt.Errorf("Error loading device metadata file %s", hash)
// Include deleted devices also as cleanup delete device logic
// will go through it and see if there are any deleted devices.
if _, err := devices.lookupDevice(hash); err != nil {
return fmt.Errorf("Error looking up device %s:%v", hash, err)
}
devices.markDeviceIDUsed(dinfo.DeviceID)
logrus.Debugf("Added deviceID=%d to DeviceIDMap", dinfo.DeviceID)
return nil
}
func (devices *DeviceSet) constructDeviceIDMap() error {
logrus.Debugf("[deviceset] constructDeviceIDMap()")
defer logrus.Debugf("[deviceset] constructDeviceIDMap() END")
func (devices *DeviceSet) loadDeviceFilesOnStart() error {
logrus.Debugf("[deviceset] loadDeviceFilesOnStart()")
defer logrus.Debugf("[deviceset] loadDeviceFilesOnStart() END")
var scan = func(path string, info os.FileInfo, err error) error {
if err != nil {
@ -477,9 +500,13 @@ func (devices *DeviceSet) registerDevice(id int, hash string, size uint64, trans
return info, nil
}
func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo) error {
func (devices *DeviceSet) activateDeviceIfNeeded(info *devInfo, ignoreDeleted bool) error {
logrus.Debugf("activateDeviceIfNeeded(%v)", info.Hash)
if info.Deleted && !ignoreDeleted {
return fmt.Errorf("devmapper: Can't activate device %v as it is marked for deletion", info.Hash)
}
// Make sure deferred removal on device is canceled, if one was
// scheduled.
if err := devices.cancelDeferredRemoval(info); err != nil {
@ -553,6 +580,61 @@ func (devices *DeviceSet) migrateOldMetaData() error {
return nil
}
// Cleanup deleted devices. It assumes that all the devices have been
// loaded in the hash table.
func (devices *DeviceSet) cleanupDeletedDevices() error {
devices.Lock()
// If there are no deleted devices, there is nothing to do.
if devices.nrDeletedDevices == 0 {
return nil
}
var deletedDevices []*devInfo
for _, info := range devices.Devices {
if !info.Deleted {
continue
}
logrus.Debugf("devmapper: Found deleted device %s.", info.Hash)
deletedDevices = append(deletedDevices, info)
}
// Delete the deleted devices. DeleteDevice() first takes the info lock
// and then devices.Lock(). So drop it to avoid deadlock.
devices.Unlock()
for _, info := range deletedDevices {
// This will again try deferred deletion.
if err := devices.DeleteDevice(info.Hash, false); err != nil {
logrus.Warnf("devmapper: Deletion of device %s, device_id=%v failed:%v", info.Hash, info.DeviceID, err)
}
}
return nil
}
func (devices *DeviceSet) countDeletedDevices() {
for _, info := range devices.Devices {
if !info.Deleted {
continue
}
devices.nrDeletedDevices++
}
}
func (devices *DeviceSet) startDeviceDeletionWorker() {
// Deferred deletion is not enabled. Don't do anything.
if !devices.deferredDelete {
return
}
logrus.Debugf("devmapper: Worker to cleanup deleted devices started")
for range devices.deletionWorkerTicker.C {
devices.cleanupDeletedDevices()
}
}
func (devices *DeviceSet) initMetaData() error {
devices.Lock()
defer devices.Unlock()
@ -568,13 +650,19 @@ func (devices *DeviceSet) initMetaData() error {
devices.TransactionID = transactionID
if err := devices.constructDeviceIDMap(); err != nil {
return err
if err := devices.loadDeviceFilesOnStart(); err != nil {
return fmt.Errorf("devmapper: Failed to load device files:%v", err)
}
devices.constructDeviceIDMap()
devices.countDeletedDevices()
if err := devices.processPendingTransaction(); err != nil {
return err
}
// Start a goroutine to cleanup Deleted Devices
go devices.startDeviceDeletionWorker()
return nil
}
@ -739,7 +827,7 @@ func (devices *DeviceSet) verifyBaseDeviceUUID(baseInfo *devInfo) error {
devices.Lock()
defer devices.Unlock()
if err := devices.activateDeviceIfNeeded(baseInfo); err != nil {
if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil {
return err
}
@ -761,7 +849,7 @@ func (devices *DeviceSet) saveBaseDeviceUUID(baseInfo *devInfo) error {
devices.Lock()
defer devices.Unlock()
if err := devices.activateDeviceIfNeeded(baseInfo); err != nil {
if err := devices.activateDeviceIfNeeded(baseInfo, false); err != nil {
return err
}
@ -788,7 +876,7 @@ func (devices *DeviceSet) createBaseImage() error {
logrus.Debugf("Creating filesystem on base device-mapper thin volume")
if err := devices.activateDeviceIfNeeded(info); err != nil {
if err := devices.activateDeviceIfNeeded(info, false); err != nil {
return err
}
@ -851,7 +939,7 @@ func (devices *DeviceSet) setupBaseImage() error {
// fresh.
if oldInfo != nil {
if oldInfo.Initialized {
if oldInfo.Initialized && !oldInfo.Deleted {
if err := devices.setupVerifyBaseImageUUID(oldInfo); err != nil {
return err
}
@ -860,7 +948,10 @@ func (devices *DeviceSet) setupBaseImage() error {
}
logrus.Debugf("Removing uninitialized base image")
if err := devices.DeleteDevice(""); err != nil {
// If previous base device is in deferred delete state,
// that needs to be cleaned up first. So don't try
// deferred deletion.
if err := devices.DeleteDevice("", true); err != nil {
return err
}
}
@ -1303,13 +1394,27 @@ func (devices *DeviceSet) initDevmapper(doInit bool) error {
return graphdriver.ErrNotSupported
}
// If user asked for deferred removal and both library and driver
// supports deferred removal use it.
if enableDeferredRemoval && driverDeferredRemovalSupport && devicemapper.LibraryDeferredRemovalSupport == true {
// If user asked for deferred removal then check both libdm library
// and kernel driver support deferred removal otherwise error out.
if enableDeferredRemoval {
if !driverDeferredRemovalSupport {
return fmt.Errorf("devmapper: Deferred removal can not be enabled as kernel does not support it")
}
if !devicemapper.LibraryDeferredRemovalSupport {
return fmt.Errorf("devmapper: Deferred removal can not be enabled as libdm does not support it")
}
logrus.Debugf("devmapper: Deferred removal support enabled.")
devices.deferredRemove = true
}
if enableDeferredDeletion {
if !devices.deferredRemove {
return fmt.Errorf("devmapper: Deferred deletion can not be enabled as deferred removal is not enabled. Enable deferred removal using --storage-opt dm.use_deferred_removal=true parameter")
}
logrus.Debugf("devmapper: Deferred deletion support enabled.")
devices.deferredDelete = true
}
// https://github.com/docker/docker/issues/4036
if supported := devicemapper.UdevSetSyncSupport(true); !supported {
logrus.Warn("Udev sync is not supported. This will lead to unexpected behavior, data loss and errors. For more information, see https://docs.docker.com/reference/commandline/daemon/#daemon-storage-driver-option")
@ -1480,19 +1585,26 @@ func (devices *DeviceSet) AddDevice(hash, baseHash string) error {
logrus.Debugf("[deviceset] AddDevice(hash=%s basehash=%s)", hash, baseHash)
defer logrus.Debugf("[deviceset] AddDevice(hash=%s basehash=%s) END", hash, baseHash)
// If a deleted device exists, return error.
baseInfo, err := devices.lookupDeviceWithLock(baseHash)
if err != nil {
return err
}
if baseInfo.Deleted {
return fmt.Errorf("devmapper: Base device %v has been marked for deferred deletion", baseInfo.Hash)
}
baseInfo.lock.Lock()
defer baseInfo.lock.Unlock()
devices.Lock()
defer devices.Unlock()
// Also include deleted devices in case hash of new device is
// same as one of the deleted devices.
if info, _ := devices.lookupDevice(hash); info != nil {
return fmt.Errorf("device %s already exists", hash)
return fmt.Errorf("device %s already exists. Deleted=%v", hash, info.Deleted)
}
if err := devices.createRegisterSnapDevice(hash, baseInfo); err != nil {
@ -1502,8 +1614,28 @@ func (devices *DeviceSet) AddDevice(hash, baseHash string) error {
return nil
}
func (devices *DeviceSet) markForDeferredDeletion(info *devInfo) error {
// If device is already in deleted state, there is nothing to be done.
if info.Deleted {
return nil
}
logrus.Debugf("devmapper: Marking device %s for deferred deletion.", info.Hash)
info.Deleted = true
// save device metadata to refelect deleted state.
if err := devices.saveMetadata(info); err != nil {
info.Deleted = false
return err
}
devices.nrDeletedDevices++
return nil
}
// Should be caled with devices.Lock() held.
func (devices *DeviceSet) deleteTransaction(info *devInfo) error {
func (devices *DeviceSet) deleteTransaction(info *devInfo, syncDelete bool) error {
if err := devices.openTransaction(info.Hash, info.DeviceID); err != nil {
logrus.Debugf("Error opening transaction hash = %s deviceId = %d", "", info.DeviceID)
return err
@ -1511,13 +1643,31 @@ func (devices *DeviceSet) deleteTransaction(info *devInfo) error {
defer devices.closeTransaction()
if err := devicemapper.DeleteDevice(devices.getPoolDevName(), info.DeviceID); err != nil {
logrus.Debugf("Error deleting device: %s", err)
return err
err := devicemapper.DeleteDevice(devices.getPoolDevName(), info.DeviceID)
if err != nil {
// If syncDelete is true, we want to return error. If deferred
// deletion is not enabled, we return an error. If error is
// something other then EBUSY, return an error.
if syncDelete || !devices.deferredDelete || err != devicemapper.ErrBusy {
logrus.Debugf("Error deleting device: %s", err)
return err
}
}
if err := devices.unregisterDevice(info.DeviceID, info.Hash); err != nil {
return err
if err == nil {
if err := devices.unregisterDevice(info.DeviceID, info.Hash); err != nil {
return err
}
// If device was already in deferred delete state that means
// deletion was being tried again later. Reduce the deleted
// device count.
if info.Deleted {
devices.nrDeletedDevices--
}
} else {
if err := devices.markForDeferredDeletion(info); err != nil {
return err
}
}
return nil
@ -1529,8 +1679,10 @@ func (devices *DeviceSet) issueDiscard(info *devInfo) error {
defer logrus.Debugf("devmapper: issueDiscard(device: %s). END", info.Hash)
// This is a workaround for the kernel not discarding block so
// on the thin pool when we remove a thinp device, so we do it
// manually
if err := devices.activateDeviceIfNeeded(info); err != nil {
// manually.
// Even if device is deferred deleted, activate it and isue
// discards.
if err := devices.activateDeviceIfNeeded(info, true); err != nil {
return err
}
@ -1551,7 +1703,7 @@ func (devices *DeviceSet) issueDiscard(info *devInfo) error {
}
// Should be called with devices.Lock() held.
func (devices *DeviceSet) deleteDevice(info *devInfo) error {
func (devices *DeviceSet) deleteDevice(info *devInfo, syncDelete bool) error {
if devices.doBlkDiscard {
devices.issueDiscard(info)
}
@ -1562,7 +1714,7 @@ func (devices *DeviceSet) deleteDevice(info *devInfo) error {
return err
}
if err := devices.deleteTransaction(info); err != nil {
if err := devices.deleteTransaction(info, syncDelete); err != nil {
return err
}
@ -1571,8 +1723,12 @@ func (devices *DeviceSet) deleteDevice(info *devInfo) error {
return nil
}
// DeleteDevice deletes a device from the hash.
func (devices *DeviceSet) DeleteDevice(hash string) error {
// DeleteDevice will return success if device has been marked for deferred
// removal. If one wants to override that and want DeleteDevice() to fail if
// device was busy and could not be deleted, set syncDelete=true.
func (devices *DeviceSet) DeleteDevice(hash string, syncDelete bool) error {
logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) START", hash, syncDelete)
defer logrus.Debugf("devmapper: DeleteDevice(hash=%v syncDelete=%v) END", hash, syncDelete)
info, err := devices.lookupDeviceWithLock(hash)
if err != nil {
return err
@ -1591,7 +1747,7 @@ func (devices *DeviceSet) DeleteDevice(hash string) error {
return fmt.Errorf("devmapper: Can't delete device %v as it is still mounted. mntCount=%v", info.Hash, info.mountCount)
}
return devices.deleteDevice(info)
return devices.deleteDevice(info, syncDelete)
}
func (devices *DeviceSet) deactivatePool() error {
@ -1716,6 +1872,13 @@ func (devices *DeviceSet) Shutdown() error {
var devs []*devInfo
// Stop deletion worker. This should start delivering new events to
// ticker channel. That means no new instance of cleanupDeletedDevice()
// will run after this call. If one instance is already running at
// the time of the call, it must be holding devices.Lock() and
// we will block on this lock till cleanup function exits.
devices.deletionWorkerTicker.Stop()
devices.Lock()
// Save DeviceSet Metadata first. Docker kills all threads if they
// don't finish in certain time. It is possible that Shutdown()
@ -1778,6 +1941,10 @@ func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error {
return err
}
if info.Deleted {
return fmt.Errorf("devmapper: Can't mount device %v as it has been marked for deferred deletion", info.Hash)
}
info.lock.Lock()
defer info.lock.Unlock()
@ -1793,7 +1960,7 @@ func (devices *DeviceSet) MountDevice(hash, path, mountLabel string) error {
return nil
}
if err := devices.activateDeviceIfNeeded(info); err != nil {
if err := devices.activateDeviceIfNeeded(info, false); err != nil {
return fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err)
}
@ -1913,7 +2080,7 @@ func (devices *DeviceSet) GetDeviceStatus(hash string) (*DevStatus, error) {
TransactionID: info.TransactionID,
}
if err := devices.activateDeviceIfNeeded(info); err != nil {
if err := devices.activateDeviceIfNeeded(info, false); err != nil {
return nil, fmt.Errorf("Error activating devmapper device for '%s': %s", hash, err)
}
@ -1985,6 +2152,8 @@ func (devices *DeviceSet) Status() *Status {
status.MetadataLoopback = devices.metadataLoopFile
status.UdevSyncSupported = devicemapper.UdevSyncSupported()
status.DeferredRemoveEnabled = devices.deferredRemove
status.DeferredDeleteEnabled = devices.deferredDelete
status.DeferredDeletedDeviceCount = devices.nrDeletedDevices
totalSizeInSectors, _, dataUsed, dataTotal, metadataUsed, metadataTotal, err := devices.poolStatus()
if err == nil {
@ -2049,6 +2218,7 @@ func NewDeviceSet(root string, doInit bool, options []string) (*DeviceSet, error
doBlkDiscard: true,
thinpBlockSize: defaultThinpBlockSize,
deviceIDMap: make([]byte, deviceIDMapSz),
deletionWorkerTicker: time.NewTicker(time.Second * 30),
}
foundBlkDiscard := false
@ -2117,6 +2287,12 @@ func NewDeviceSet(root string, doInit bool, options []string) (*DeviceSet, error
return nil, err
}
case "dm.use_deferred_deletion":
enableDeferredDeletion, err = strconv.ParseBool(val)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("Unknown option %s\n", key)
}

View File

@ -84,6 +84,8 @@ func (d *Driver) Status() [][2]string {
{"Metadata Space Available", fmt.Sprintf("%s", units.HumanSize(float64(s.Metadata.Available)))},
{"Udev Sync Supported", fmt.Sprintf("%v", s.UdevSyncSupported)},
{"Deferred Removal Enabled", fmt.Sprintf("%v", s.DeferredRemoveEnabled)},
{"Deferred Deletion Enabled", fmt.Sprintf("%v", s.DeferredDeleteEnabled)},
{"Deferred Deleted Device Count", fmt.Sprintf("%v", s.DeferredDeletedDeviceCount)},
}
if len(s.DataLoopback) > 0 {
status = append(status, [2]string{"Data loop file", s.DataLoopback})
@ -142,7 +144,7 @@ func (d *Driver) Remove(id string) error {
}
// This assumes the device has been properly Get/Put:ed and thus is unmounted
if err := d.DeviceSet.DeleteDevice(id); err != nil {
if err := d.DeviceSet.DeleteDevice(id, false); err != nil {
return err
}

View File

@ -368,6 +368,46 @@ options for `zfs` start with `zfs`.
> Otherwise, set this flag for migrating existing Docker daemons to
> a daemon with a supported environment.
* `dm.use_deferred_removal`
Enables use of deferred device removal if `libdm` and the kernel driver
support the mechanism.
Deferred device removal means that if device is busy when devices are
being removed/deactivated, then a deferred removal is scheduled on
device. And devices automatically go away when last user of the device
exits.
For example, when a container exits, its associated thin device is removed.
If that device has leaked into some other mount namespace and can't be
removed, the container exit still succeeds and this option causes the
system to schedule the device for deferred removal. It does not wait in a
loop trying to remove a busy device.
Example use: `docker daemon --storage-opt dm.use_deferred_removal=true`
* `dm.use_deferred_deletion`
Enables use of deferred device deletion for thin pool devices. By default,
thin pool device deletion is synchronous. Before a container is deleted,
the Docker daemon removes any associated devices. If the storage driver
can not remove a device, the container deletion fails and daemon returns.
`Error deleting container: Error response from daemon: Cannot destroy container`
To avoid this failure, enable both deferred device deletion and deferred
device removal on the daemon.
`docker daemon --storage-opt dm.use_deferred_deletion=true --storage-opt dm.use_deferred_removal=true`
With these two options enabled, if a device is busy when the driver is
deleting a container, the driver marks the device as deleted. Later, when
the device isn't in use, the driver deletes it.
In general it should be safe to enable this option by default. It will help
when unintentional leaking of mount point happens across multiple mount
namespaces.
Currently supported options of `zfs`:
* `zfs.fsname`

View File

@ -302,6 +302,28 @@ device.
Example use: `docker daemon --storage-opt dm.use_deferred_removal=true`
#### dm.use_deferred_deletion
Enables use of deferred device deletion for thin pool devices. By default,
thin pool device deletion is synchronous. Before a container is deleted, the
Docker daemon removes any associated devices. If the storage driver can not
remove a device, the container deletion fails and daemon returns.
`Error deleting container: Error response from daemon: Cannot destroy container`
To avoid this failure, enable both deferred device deletion and deferred
device removal on the daemon.
`docker daemon --storage-opt dm.use_deferred_deletion=true --storage-opt dm.use_deferred_removal=true`
With these two options enabled, if a device is busy when the driver is
deleting a container, the driver marks the device as deleted. Later, when the
device isn't in use, the driver deletes it.
In general it should be safe to enable this option by default. It will help
when unintentional leaking of mount point happens across multiple mount
namespaces.
#### dm.loopdatasize
**Note**: This option configures devicemapper loopback, which should not be used in production.

View File

@ -750,7 +750,11 @@ func DeleteDevice(poolName string, deviceID int) error {
return fmt.Errorf("Can't set message %s", err)
}
dmSawBusy = false
if err := task.run(); err != nil {
if dmSawBusy {
return ErrBusy
}
return fmt.Errorf("Error running DeleteDevice %s", err)
}
return nil