From e778cd68a0bfdd166eec43a267bdc91e2b85b51e Mon Sep 17 00:00:00 2001 From: Josh Hawn Date: Thu, 2 Apr 2015 10:42:40 -0700 Subject: [PATCH 1/2] [builder] Make build cache ignore mtime Build cache uses pgk/tarsum to get a digest of content which is ADD'd or COPY'd during a build. The builder has always used v0 of the tarsum algorithm which includes mtimes however since the whole file is hashed anyway, the mtime doesn't really provide any extra information about whether the file has changed and many version control tools like Git strip mtime from files when they are cloned. This patch updates the build subsystem to use v1 of Tarsum which explicitly ignores mtime when calculating a digest. Now ADD and COPY will result in a cache hit if only the mtime and not the file contents have changed. NOTE: Tarsum is NOT a meant to be a cryptographically secure hash function. It is a best-effort approach to determining if two sets of filesystem content are different. Docker-DCO-1.1-Signed-off-by: Josh Hawn (github: jlhawn) Upstream-commit: 0e10507a1c5985e6fda0ff48e9313ba7a4de761b Component: engine --- components/engine/builder/internals.go | 4 +-- .../integration-cli/docker_cli_build_test.go | 31 +++++-------------- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/components/engine/builder/internals.go b/components/engine/builder/internals.go index c167bb7300..6b1c54952a 100644 --- a/components/engine/builder/internals.go +++ b/components/engine/builder/internals.go @@ -50,7 +50,7 @@ func (b *Builder) readContext(context io.Reader) error { return err } - if b.context, err = tarsum.NewTarSum(decompressedStream, true, tarsum.Version0); err != nil { + if b.context, err = tarsum.NewTarSum(decompressedStream, true, tarsum.Version1); err != nil { return err } @@ -345,7 +345,7 @@ func calcCopyInfo(b *Builder, cmdName string, cInfos *[]*copyInfo, origPath stri if err != nil { return err } - tarSum, err := tarsum.NewTarSum(r, true, tarsum.Version0) + tarSum, err := tarsum.NewTarSum(r, true, tarsum.Version1) if err != nil { return err } diff --git a/components/engine/integration-cli/docker_cli_build_test.go b/components/engine/integration-cli/docker_cli_build_test.go index 2907c8084c..3839a0f25b 100644 --- a/components/engine/integration-cli/docker_cli_build_test.go +++ b/components/engine/integration-cli/docker_cli_build_test.go @@ -2768,7 +2768,6 @@ func (s *DockerSuite) TestBuildAddCurrentDirWithCache(c *check.C) { name2 := name + "2" name3 := name + "3" name4 := name + "4" - name5 := name + "5" dockerfile := ` FROM scratch MAINTAINER dockerio @@ -2806,7 +2805,8 @@ func (s *DockerSuite) TestBuildAddCurrentDirWithCache(c *check.C) { if id2 == id3 { c.Fatal("The cache should have been invalided but hasn't.") } - // Check that changing file to same content invalidate cache of "ADD ." + // Check that changing file to same content with different mtime does not + // invalidate cache of "ADD ." time.Sleep(1 * time.Second) // wait second because of mtime precision if err := ctx.Add("foo", "hello1"); err != nil { c.Fatal(err) @@ -2815,14 +2815,7 @@ func (s *DockerSuite) TestBuildAddCurrentDirWithCache(c *check.C) { if err != nil { c.Fatal(err) } - if id3 == id4 { - c.Fatal("The cache should have been invalided but hasn't.") - } - id5, err := buildImageFromContext(name5, ctx, true) - if err != nil { - c.Fatal(err) - } - if id4 != id5 { + if id3 != id4 { c.Fatal("The cache should have been used but hasn't.") } } @@ -2921,7 +2914,6 @@ func (s *DockerSuite) TestBuildAddRemoteFileMTime(c *check.C) { name := "testbuildaddremotefilemtime" name2 := name + "2" name3 := name + "3" - name4 := name + "4" files := map[string]string{"baz": "hello"} server, err := fakeStorage(files) @@ -2951,8 +2943,8 @@ func (s *DockerSuite) TestBuildAddRemoteFileMTime(c *check.C) { c.Fatal("The cache should have been used but wasn't - #1") } - // Now create a different server withsame contents (causes different mtim) - // This time the cache should not be used + // Now create a different server with same contents (causes different mtime) + // The cache should still be used // allow some time for clock to pass as mtime precision is only 1s time.Sleep(2 * time.Second) @@ -2974,17 +2966,8 @@ func (s *DockerSuite) TestBuildAddRemoteFileMTime(c *check.C) { if err != nil { c.Fatal(err) } - if id1 == id3 { - c.Fatal("The cache should not have been used but was") - } - - // And for good measure do it again and make sure cache is used this time - id4, err := buildImageFromContext(name4, ctx2, true) - if err != nil { - c.Fatal(err) - } - if id3 != id4 { - c.Fatal("The cache should have been used but wasn't - #2") + if id1 != id3 { + c.Fatal("The cache should have been used but wasn't") } } From 4b582bde62169b5239365735fc807b929d0e6057 Mon Sep 17 00:00:00 2001 From: Josh Hawn Date: Mon, 8 Jun 2015 11:42:00 -0700 Subject: [PATCH 2/2] [docs] Update builder docs on last-modified times Docker-DCO-1.1-Signed-off-by: Josh Hawn (github: jlhawn) Upstream-commit: cd7e2a6b2b529cb4180926352a3fe9e82040d073 Component: engine --- .../engine/docs/sources/articles/dockerfile_best-practices.md | 3 ++- components/engine/docs/sources/reference/builder.md | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/components/engine/docs/sources/articles/dockerfile_best-practices.md b/components/engine/docs/sources/articles/dockerfile_best-practices.md index 2604b22453..8c9c6ed515 100644 --- a/components/engine/docs/sources/articles/dockerfile_best-practices.md +++ b/components/engine/docs/sources/articles/dockerfile_best-practices.md @@ -103,7 +103,8 @@ a little more examination and explanation. being put into the image are examined. Specifically, a checksum is done of the file(s) and then that checksum is used during the cache lookup. If anything has changed in the file(s), including its metadata, -then the cache is invalidated. +then the cache is invalidated. The last-modified and last-accessed times of the +file(s) are not considered in these checksums. * Aside from the `ADD` and `COPY` commands cache checking will not look at the files in the container to determine a cache match. For example, when processing diff --git a/components/engine/docs/sources/reference/builder.md b/components/engine/docs/sources/reference/builder.md index fb4d5ce1d7..485ad39b88 100644 --- a/components/engine/docs/sources/reference/builder.md +++ b/components/engine/docs/sources/reference/builder.md @@ -519,8 +519,8 @@ All new files and directories are created with a UID and GID of 0. In the case where `` is a remote file URL, the destination will have permissions of 600. If the remote file being retrieved has an HTTP `Last-Modified` header, the timestamp from that header will be used -to set the `mtime` on the destination file. Then, like any other file -processed during an `ADD`, `mtime` will be included in the determination +to set the `mtime` on the destination file. However, like any other file +processed during an `ADD`, `mtime` will not be included in the determination of whether or not the file has changed and the cache should be updated. > **Note**: