From cc3c0d322a05b9daa67d9643c7a809eb4681e18b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?To=CC=83nis=20Tiigi?= Date: Thu, 22 Sep 2016 14:38:00 -0700 Subject: [PATCH] Implement build cache based on history array Based on work by KJ Tsanaktsidis Signed-off-by: Tonis Tiigi Signed-off-by: KJ Tsanaktsidis Upstream-commit: 690882c2e79c3f3742c709cf158584e61594ba00 Component: engine --- .../api/server/router/build/build_routes.go | 15 +- components/engine/api/types/client.go | 3 + components/engine/builder/builder.go | 10 +- .../engine/builder/dockerfile/builder.go | 6 + .../engine/builder/dockerfile/internals.go | 10 +- components/engine/cli/command/image/build.go | 3 + components/engine/client/image_build.go | 7 + components/engine/daemon/cache.go | 254 ++++++++++++++++++ components/engine/daemon/image.go | 53 ---- .../docs/reference/api/docker_remote_api.md | 1 + .../reference/api/docker_remote_api_v1.25.md | 1 + components/engine/docs/reference/builder.md | 7 + .../docs/reference/commandline/build.md | 1 + 13 files changed, 307 insertions(+), 64 deletions(-) create mode 100644 components/engine/daemon/cache.go diff --git a/components/engine/api/server/router/build/build_routes.go b/components/engine/api/server/router/build/build_routes.go index 7f66fdd35a..17aa6226e4 100644 --- a/components/engine/api/server/router/build/build_routes.go +++ b/components/engine/api/server/router/build/build_routes.go @@ -70,7 +70,7 @@ func newImageBuildOptions(ctx context.Context, r *http.Request) (*types.ImageBui var buildUlimits = []*units.Ulimit{} ulimitsJSON := r.FormValue("ulimits") if ulimitsJSON != "" { - if err := json.NewDecoder(strings.NewReader(ulimitsJSON)).Decode(&buildUlimits); err != nil { + if err := json.Unmarshal([]byte(ulimitsJSON), &buildUlimits); err != nil { return nil, err } options.Ulimits = buildUlimits @@ -79,7 +79,7 @@ func newImageBuildOptions(ctx context.Context, r *http.Request) (*types.ImageBui var buildArgs = map[string]string{} buildArgsJSON := r.FormValue("buildargs") if buildArgsJSON != "" { - if err := json.NewDecoder(strings.NewReader(buildArgsJSON)).Decode(&buildArgs); err != nil { + if err := json.Unmarshal([]byte(buildArgsJSON), &buildArgs); err != nil { return nil, err } options.BuildArgs = buildArgs @@ -87,12 +87,21 @@ func newImageBuildOptions(ctx context.Context, r *http.Request) (*types.ImageBui var labels = map[string]string{} labelsJSON := r.FormValue("labels") if labelsJSON != "" { - if err := json.NewDecoder(strings.NewReader(labelsJSON)).Decode(&labels); err != nil { + if err := json.Unmarshal([]byte(labelsJSON), &labels); err != nil { return nil, err } options.Labels = labels } + var cacheFrom = []string{} + cacheFromJSON := r.FormValue("cachefrom") + if cacheFromJSON != "" { + if err := json.Unmarshal([]byte(cacheFromJSON), &cacheFrom); err != nil { + return nil, err + } + options.CacheFrom = cacheFrom + } + return options, nil } diff --git a/components/engine/api/types/client.go b/components/engine/api/types/client.go index 636797719d..e39ea72ccb 100644 --- a/components/engine/api/types/client.go +++ b/components/engine/api/types/client.go @@ -151,6 +151,9 @@ type ImageBuildOptions struct { // preserves the original image and creates a new one from the parent with all // the changes applied to a single layer Squash bool + // CacheFrom specifies images that are used for matching cache. Images + // specified here do not need to have a valid parent chain to match cache. + CacheFrom []string } // ImageBuildResponse holds information diff --git a/components/engine/builder/builder.go b/components/engine/builder/builder.go index 1445c26c31..2c91c56045 100644 --- a/components/engine/builder/builder.go +++ b/components/engine/builder/builder.go @@ -153,10 +153,16 @@ type Image interface { RunConfig() *container.Config } -// ImageCache abstracts an image cache store. +// ImageCacheBuilder represents a generator for stateful image cache. +type ImageCacheBuilder interface { + // MakeImageCache creates a stateful image cache. + MakeImageCache(cacheFrom []string) ImageCache +} + +// ImageCache abstracts an image cache. // (parent image, child runconfig) -> child image type ImageCache interface { // GetCachedImageOnBuild returns a reference to a cached image whose parent equals `parent` // and runconfig equals `cfg`. A cache miss is expected to return an empty ID and a nil error. - GetCachedImageOnBuild(parentID string, cfg *container.Config) (imageID string, err error) + GetCache(parentID string, cfg *container.Config) (imageID string, err error) } diff --git a/components/engine/builder/dockerfile/builder.go b/components/engine/builder/dockerfile/builder.go index ec067eaa2e..89c8862d39 100644 --- a/components/engine/builder/dockerfile/builder.go +++ b/components/engine/builder/dockerfile/builder.go @@ -75,6 +75,8 @@ type Builder struct { // TODO: remove once docker.Commit can receive a tag id string + + imageCache builder.ImageCache } // BuildManager implements builder.Backend and is shared across all Builder objects. @@ -136,6 +138,10 @@ func NewBuilder(clientCtx context.Context, config *types.ImageBuildOptions, back LookingForDirectives: true, }, } + if icb, ok := backend.(builder.ImageCacheBuilder); ok { + b.imageCache = icb.MakeImageCache(config.CacheFrom) + } + parser.SetEscapeToken(parser.DefaultEscapeToken, &b.directive) // Assume the default token for escape if dockerfile != nil { diff --git a/components/engine/builder/dockerfile/internals.go b/components/engine/builder/dockerfile/internals.go index 46fe9b7fcb..f31d863012 100644 --- a/components/engine/builder/dockerfile/internals.go +++ b/components/engine/builder/dockerfile/internals.go @@ -438,18 +438,16 @@ func (b *Builder) processImageFrom(img builder.Image) error { return nil } -// probeCache checks if `b.docker` implements builder.ImageCache and image-caching -// is enabled (`b.UseCache`). -// If so attempts to look up the current `b.image` and `b.runConfig` pair with `b.docker`. +// probeCache checks if cache match can be found for current build instruction. // If an image is found, probeCache returns `(true, nil)`. // If no image is found, it returns `(false, nil)`. // If there is any error, it returns `(false, err)`. func (b *Builder) probeCache() (bool, error) { - c, ok := b.docker.(builder.ImageCache) - if !ok || b.options.NoCache || b.cacheBusted { + c := b.imageCache + if c == nil || b.options.NoCache || b.cacheBusted { return false, nil } - cache, err := c.GetCachedImageOnBuild(b.image, b.runConfig) + cache, err := c.GetCache(b.image, b.runConfig) if err != nil { return false, err } diff --git a/components/engine/cli/command/image/build.go b/components/engine/cli/command/image/build.go index 17be405bd5..51d0ea9f08 100644 --- a/components/engine/cli/command/image/build.go +++ b/components/engine/cli/command/image/build.go @@ -55,6 +55,7 @@ type buildOptions struct { rm bool forceRm bool pull bool + cacheFrom []string } // NewBuildCommand creates a new `docker build` command @@ -98,6 +99,7 @@ func NewBuildCommand(dockerCli *command.DockerCli) *cobra.Command { flags.BoolVar(&options.forceRm, "force-rm", false, "Always remove intermediate containers") flags.BoolVarP(&options.quiet, "quiet", "q", false, "Suppress the build output and print image ID on success") flags.BoolVar(&options.pull, "pull", false, "Always attempt to pull a newer version of the image") + flags.StringSliceVar(&options.cacheFrom, "cache-from", []string{}, "Images to consider as cache sources") command.AddTrustedFlags(flags, true) @@ -289,6 +291,7 @@ func runBuild(dockerCli *command.DockerCli, options buildOptions) error { BuildArgs: runconfigopts.ConvertKVStringsToMap(options.buildArgs.GetAll()), AuthConfigs: authConfig, Labels: runconfigopts.ConvertKVStringsToMap(options.labels), + CacheFrom: options.cacheFrom, } response, err := dockerCli.Client().ImageBuild(ctx, body, buildOptions) diff --git a/components/engine/client/image_build.go b/components/engine/client/image_build.go index a84bf57821..0094602a6e 100644 --- a/components/engine/client/image_build.go +++ b/components/engine/client/image_build.go @@ -110,6 +110,13 @@ func imageBuildOptionsToQuery(options types.ImageBuildOptions) (url.Values, erro return query, err } query.Set("labels", string(labelsJSON)) + + cacheFromJSON, err := json.Marshal(options.CacheFrom) + if err != nil { + return query, err + } + query.Set("cachefrom", string(cacheFromJSON)) + return query, nil } diff --git a/components/engine/daemon/cache.go b/components/engine/daemon/cache.go new file mode 100644 index 0000000000..a2c2c137f5 --- /dev/null +++ b/components/engine/daemon/cache.go @@ -0,0 +1,254 @@ +package daemon + +import ( + "encoding/json" + "fmt" + "reflect" + "strings" + + "github.com/Sirupsen/logrus" + containertypes "github.com/docker/docker/api/types/container" + "github.com/docker/docker/builder" + "github.com/docker/docker/dockerversion" + "github.com/docker/docker/image" + "github.com/docker/docker/layer" + "github.com/docker/docker/runconfig" + "github.com/pkg/errors" +) + +// getLocalCachedImage returns the most recent created image that is a child +// of the image with imgID, that had the same config when it was +// created. nil is returned if a child cannot be found. An error is +// returned if the parent image cannot be found. +func (daemon *Daemon) getLocalCachedImage(imgID image.ID, config *containertypes.Config) (*image.Image, error) { + // Loop on the children of the given image and check the config + getMatch := func(siblings []image.ID) (*image.Image, error) { + var match *image.Image + for _, id := range siblings { + img, err := daemon.imageStore.Get(id) + if err != nil { + return nil, fmt.Errorf("unable to find image %q", id) + } + + if runconfig.Compare(&img.ContainerConfig, config) { + // check for the most up to date match + if match == nil || match.Created.Before(img.Created) { + match = img + } + } + } + return match, nil + } + + // In this case, this is `FROM scratch`, which isn't an actual image. + if imgID == "" { + images := daemon.imageStore.Map() + var siblings []image.ID + for id, img := range images { + if img.Parent == imgID { + siblings = append(siblings, id) + } + } + return getMatch(siblings) + } + + // find match from child images + siblings := daemon.imageStore.Children(imgID) + return getMatch(siblings) +} + +// MakeImageCache creates a stateful image cache. +func (daemon *Daemon) MakeImageCache(sourceRefs []string) builder.ImageCache { + if len(sourceRefs) == 0 { + return &localImageCache{daemon} + } + + cache := &imageCache{daemon: daemon, localImageCache: &localImageCache{daemon}} + + for _, ref := range sourceRefs { + img, err := daemon.GetImage(ref) + if err != nil { + logrus.Warnf("Could not look up %s for cache resolution, skipping: %+v", ref, err) + continue + } + cache.sources = append(cache.sources, img) + } + + return cache +} + +// localImageCache is cache based on parent chain. +type localImageCache struct { + daemon *Daemon +} + +func (lic *localImageCache) GetCache(imgID string, config *containertypes.Config) (string, error) { + return getImageIDAndError(lic.daemon.getLocalCachedImage(image.ID(imgID), config)) +} + +// imageCache is cache based on history objects. Requires initial set of images. +type imageCache struct { + sources []*image.Image + daemon *Daemon + localImageCache *localImageCache +} + +func (ic *imageCache) restoreCachedImage(parent, target *image.Image, cfg *containertypes.Config) (image.ID, error) { + var history []image.History + rootFS := image.NewRootFS() + lenHistory := 0 + if parent != nil { + history = parent.History + rootFS = parent.RootFS + lenHistory = len(parent.History) + } + history = append(history, target.History[lenHistory]) + if layer := getLayerForHistoryIndex(target, lenHistory); layer != "" { + rootFS.Append(layer) + } + + config, err := json.Marshal(&image.Image{ + V1Image: image.V1Image{ + DockerVersion: dockerversion.Version, + Config: cfg, + Architecture: target.Architecture, + OS: target.OS, + Author: target.Author, + Created: history[len(history)-1].Created, + }, + RootFS: rootFS, + History: history, + OSFeatures: target.OSFeatures, + OSVersion: target.OSVersion, + }) + if err != nil { + return "", errors.Wrap(err, "failed to marshal image config") + } + + imgID, err := ic.daemon.imageStore.Create(config) + if err != nil { + return "", errors.Wrap(err, "failed to create cache image") + } + + if parent != nil { + if err := ic.daemon.imageStore.SetParent(imgID, parent.ID()); err != nil { + return "", errors.Wrapf(err, "failed to set parent for %v to %v", target.ID(), parent.ID()) + } + } + return imgID, nil +} + +func (ic *imageCache) isParent(imgID, parentID image.ID) bool { + nextParent, err := ic.daemon.imageStore.GetParent(imgID) + if err != nil { + return false + } + if nextParent == parentID { + return true + } + return ic.isParent(nextParent, parentID) +} + +func (ic *imageCache) GetCache(parentID string, cfg *containertypes.Config) (string, error) { + imgID, err := ic.localImageCache.GetCache(parentID, cfg) + if err != nil { + return "", err + } + if imgID != "" { + for _, s := range ic.sources { + if ic.isParent(s.ID(), image.ID(imgID)) { + return imgID, nil + } + } + } + + var parent *image.Image + lenHistory := 0 + if parentID != "" { + parent, err = ic.daemon.imageStore.Get(image.ID(parentID)) + if err != nil { + return "", errors.Wrapf(err, "unable to find image %v", parentID) + } + lenHistory = len(parent.History) + } + + for _, target := range ic.sources { + if !isValidParent(target, parent) || !isValidConfig(cfg, target.History[lenHistory]) { + continue + } + + if len(target.History)-1 == lenHistory { // last + if parent != nil { + if err := ic.daemon.imageStore.SetParent(target.ID(), parent.ID()); err != nil { + return "", errors.Wrapf(err, "failed to set parent for %v to %v", target.ID(), parent.ID()) + } + } + return target.ID().String(), nil + } + + imgID, err := ic.restoreCachedImage(parent, target, cfg) + if err != nil { + return "", errors.Wrapf(err, "failed to restore cached image from %q to %v", parentID, target.ID()) + } + + ic.sources = []*image.Image{target} // avoid jumping to different target, tuned for safety atm + return imgID.String(), nil + } + + return "", nil +} + +func getImageIDAndError(img *image.Image, err error) (string, error) { + if img == nil || err != nil { + return "", err + } + return img.ID().String(), nil +} + +func isValidParent(img, parent *image.Image) bool { + if len(img.History) == 0 { + return false + } + if parent == nil || len(parent.History) == 0 && len(parent.RootFS.DiffIDs) == 0 { + return true + } + if len(parent.History) >= len(img.History) { + return false + } + if len(parent.RootFS.DiffIDs) >= len(img.RootFS.DiffIDs) { + return false + } + + for i, h := range parent.History { + if !reflect.DeepEqual(h, img.History[i]) { + return false + } + } + for i, d := range parent.RootFS.DiffIDs { + if d != img.RootFS.DiffIDs[i] { + return false + } + } + return true +} + +func getLayerForHistoryIndex(image *image.Image, index int) layer.DiffID { + layerIndex := 0 + for i, h := range image.History { + if i == index { + if h.EmptyLayer { + return "" + } + break + } + if !h.EmptyLayer { + layerIndex++ + } + } + return image.RootFS.DiffIDs[layerIndex] // validate? +} + +func isValidConfig(cfg *containertypes.Config, h image.History) bool { + // todo: make this format better than join that loses data + return strings.Join(cfg.Cmd, " ") == h.CreatedBy +} diff --git a/components/engine/daemon/image.go b/components/engine/daemon/image.go index fb2b997264..b7e5f07269 100644 --- a/components/engine/daemon/image.go +++ b/components/engine/daemon/image.go @@ -3,11 +3,9 @@ package daemon import ( "fmt" - containertypes "github.com/docker/docker/api/types/container" "github.com/docker/docker/builder" "github.com/docker/docker/image" "github.com/docker/docker/reference" - "github.com/docker/docker/runconfig" ) // ErrImageDoesNotExist is error returned when no image can be found for a reference. @@ -71,54 +69,3 @@ func (daemon *Daemon) GetImageOnBuild(name string) (builder.Image, error) { } return img, nil } - -// GetCachedImage returns the most recent created image that is a child -// of the image with imgID, that had the same config when it was -// created. nil is returned if a child cannot be found. An error is -// returned if the parent image cannot be found. -func (daemon *Daemon) GetCachedImage(imgID image.ID, config *containertypes.Config) (*image.Image, error) { - // Loop on the children of the given image and check the config - getMatch := func(siblings []image.ID) (*image.Image, error) { - var match *image.Image - for _, id := range siblings { - img, err := daemon.imageStore.Get(id) - if err != nil { - return nil, fmt.Errorf("unable to find image %q", id) - } - - if runconfig.Compare(&img.ContainerConfig, config) { - // check for the most up to date match - if match == nil || match.Created.Before(img.Created) { - match = img - } - } - } - return match, nil - } - - // In this case, this is `FROM scratch`, which isn't an actual image. - if imgID == "" { - images := daemon.imageStore.Map() - var siblings []image.ID - for id, img := range images { - if img.Parent == imgID { - siblings = append(siblings, id) - } - } - return getMatch(siblings) - } - - // find match from child images - siblings := daemon.imageStore.Children(imgID) - return getMatch(siblings) -} - -// GetCachedImageOnBuild returns a reference to a cached image whose parent equals `parent` -// and runconfig equals `cfg`. A cache miss is expected to return an empty ID and a nil error. -func (daemon *Daemon) GetCachedImageOnBuild(imgID string, cfg *containertypes.Config) (string, error) { - cache, err := daemon.GetCachedImage(image.ID(imgID), cfg) - if cache == nil || err != nil { - return "", err - } - return cache.ID().String(), nil -} diff --git a/components/engine/docs/reference/api/docker_remote_api.md b/components/engine/docs/reference/api/docker_remote_api.md index 5f8abec141..6053db96d6 100644 --- a/components/engine/docs/reference/api/docker_remote_api.md +++ b/components/engine/docs/reference/api/docker_remote_api.md @@ -124,6 +124,7 @@ This section lists each version from latest to oldest. Each listing includes a * `POST /containers/create` now validates IPAMConfig in NetworkingConfig, and returns error for invalid IPv4 and IPv6 addresses (`--ip` and `--ip6` in `docker create/run`). * `POST /containers/create` now takes a `Mounts` field in `HostConfig` which replaces `Binds` and `Volumes`. *note*: `Binds` and `Volumes` are still available but are exclusive with `Mounts` * `POST /build` now performs a preliminary validation of the `Dockerfile` before starting the build, and returns an error if the syntax is incorrect. Note that this change is _unversioned_ and applied to all API versions. +* `POST /build` accepts `cachefrom` parameter to specify images used for build cache. ### v1.24 API changes diff --git a/components/engine/docs/reference/api/docker_remote_api_v1.25.md b/components/engine/docs/reference/api/docker_remote_api_v1.25.md index 94f55bd9f9..b71c1c9d67 100644 --- a/components/engine/docs/reference/api/docker_remote_api_v1.25.md +++ b/components/engine/docs/reference/api/docker_remote_api_v1.25.md @@ -1715,6 +1715,7 @@ or being killed. there must be a file with the corresponding path inside the tarball. - **q** – Suppress verbose build output. - **nocache** – Do not use the cache when building the image. +- **cachefrom** - JSON array of images used for build cache resolution. - **pull** - Attempt to pull the image even if an older image exists locally. - **rm** - Remove intermediate containers after a successful build (default behavior). - **forcerm** - Always remove intermediate containers (includes `rm`). diff --git a/components/engine/docs/reference/builder.md b/components/engine/docs/reference/builder.md index a4352bb914..bef2b85535 100644 --- a/components/engine/docs/reference/builder.md +++ b/components/engine/docs/reference/builder.md @@ -106,6 +106,13 @@ the `Using cache` message in the console output. ---> 7ea8aef582cc Successfully built 7ea8aef582cc +Build cache is only used from images that have a local parent chain. This means +that these images were created by previous builds or the whole chain of images +was loaded with `docker load`. If you wish to use build cache of a specific +image you can specify it with `--cache-from` option. Images specified with +`--cache-from` do not need to have a parent chain and may be pulled from other +registries. + When you're done with your build, you're ready to look into [*Pushing a repository to its registry*](../tutorials/dockerrepos.md#contributing-to-docker-hub). diff --git a/components/engine/docs/reference/commandline/build.md b/components/engine/docs/reference/commandline/build.md index 0f700c7a20..68e39dfd77 100644 --- a/components/engine/docs/reference/commandline/build.md +++ b/components/engine/docs/reference/commandline/build.md @@ -17,6 +17,7 @@ Build an image from a Dockerfile Options: --build-arg value Set build-time variables (default []) + --cache-from value Images to consider as cache sources (default []) --cgroup-parent string Optional parent cgroup for the container --cpu-period int Limit the CPU CFS (Completely Fair Scheduler) period --cpu-quota int Limit the CPU CFS (Completely Fair Scheduler) quota