From 87d83cfa00bd281a3f47ff51a87a3099a7052896 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Mon, 15 Sep 2014 14:45:53 -0400 Subject: [PATCH 1/3] archive: preserve hardlinks in Tar and Untar * integration test for preserving hardlinks Signed-off-by: Vincent Batts Signed-off-by: Vincent Batts Upstream-commit: f9f80443638fc2d703ee6205c8ef3db8e38db9a3 Component: engine --- .../integration-cli/docker_cli_commit_test.go | 52 +++++++++++++++++ components/engine/pkg/archive/archive.go | 52 ++++++++++++----- components/engine/pkg/archive/archive_test.go | 58 +++++++++++++++++++ components/engine/pkg/archive/changes.go | 18 +++--- 4 files changed, 160 insertions(+), 20 deletions(-) diff --git a/components/engine/integration-cli/docker_cli_commit_test.go b/components/engine/integration-cli/docker_cli_commit_test.go index ddc7a2e041..7715e81bf5 100644 --- a/components/engine/integration-cli/docker_cli_commit_test.go +++ b/components/engine/integration-cli/docker_cli_commit_test.go @@ -101,6 +101,58 @@ func TestCommitNewFile(t *testing.T) { logDone("commit - commit file and read") } +func TestCommitHardlink(t *testing.T) { + cmd := exec.Command(dockerBinary, "run", "-t", "--name", "hardlinks", "busybox", "sh", "-c", "touch file1 && ln file1 file2 && ls -di file1 file2") + firstOuput, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(err) + } + + chunks := strings.Split(strings.TrimSpace(firstOuput), " ") + inode := chunks[0] + found := false + for _, chunk := range chunks[1:] { + if chunk == inode { + found = true + break + } + } + if !found { + t.Fatalf("Failed to create hardlink in a container. Expected to find %q in %q", inode, chunks[1:]) + } + + cmd = exec.Command(dockerBinary, "commit", "hardlinks", "hardlinks") + imageID, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(imageID, err) + } + imageID = strings.Trim(imageID, "\r\n") + + cmd = exec.Command(dockerBinary, "run", "-t", "hardlinks", "ls", "-di", "file1", "file2") + secondOuput, _, err := runCommandWithOutput(cmd) + if err != nil { + t.Fatal(err) + } + + chunks = strings.Split(strings.TrimSpace(secondOuput), " ") + inode = chunks[0] + found = false + for _, chunk := range chunks[1:] { + if chunk == inode { + found = true + break + } + } + if !found { + t.Fatalf("Failed to create hardlink in a container. Expected to find %q in %q", inode, chunks[1:]) + } + + deleteAllContainers() + deleteImages(imageID) + + logDone("commit - commit hardlinks") +} + func TestCommitTTY(t *testing.T) { cmd := exec.Command(dockerBinary, "run", "-t", "--name", "tty", "busybox", "/bin/ls") if _, err := runCommand(cmd); err != nil { diff --git a/components/engine/pkg/archive/archive.go b/components/engine/pkg/archive/archive.go index 9c4d881cfa..dd14b778f2 100644 --- a/components/engine/pkg/archive/archive.go +++ b/components/engine/pkg/archive/archive.go @@ -153,7 +153,15 @@ func (compression *Compression) Extension() string { return "" } -func addTarFile(path, name string, tw *tar.Writer, twBuf *bufio.Writer) error { +type tarAppender struct { + TarWriter *tar.Writer + Buffer *bufio.Writer + + // for hardlink mapping + SeenFiles map[uint64]string +} + +func (ta *tarAppender) addTarFile(path, name string) error { fi, err := os.Lstat(path) if err != nil { return err @@ -188,13 +196,28 @@ func addTarFile(path, name string, tw *tar.Writer, twBuf *bufio.Writer) error { } + // if it's a regular file and has more than 1 link, + // it's hardlinked, so set the type flag accordingly + if fi.Mode().IsRegular() && stat.Nlink > 1 { + // a link should have a name that it links too + // and that linked name should be first in the tar archive + ino := uint64(stat.Ino) + if oldpath, ok := ta.SeenFiles[ino]; ok { + hdr.Typeflag = tar.TypeLink + hdr.Linkname = oldpath + hdr.Size = 0 // This Must be here for the writer math to add up! + } else { + ta.SeenFiles[ino] = name + } + } + capability, _ := system.Lgetxattr(path, "security.capability") if capability != nil { hdr.Xattrs = make(map[string]string) hdr.Xattrs["security.capability"] = string(capability) } - if err := tw.WriteHeader(hdr); err != nil { + if err := ta.TarWriter.WriteHeader(hdr); err != nil { return err } @@ -204,17 +227,17 @@ func addTarFile(path, name string, tw *tar.Writer, twBuf *bufio.Writer) error { return err } - twBuf.Reset(tw) - _, err = io.Copy(twBuf, file) + ta.Buffer.Reset(ta.TarWriter) + _, err = io.Copy(ta.Buffer, file) file.Close() if err != nil { return err } - err = twBuf.Flush() + err = ta.Buffer.Flush() if err != nil { return err } - twBuf.Reset(nil) + ta.Buffer.Reset(nil) } return nil @@ -345,9 +368,15 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) return nil, err } - tw := tar.NewWriter(compressWriter) - go func() { + ta := &tarAppender{ + TarWriter: tar.NewWriter(compressWriter), + Buffer: pools.BufioWriter32KPool.Get(nil), + SeenFiles: make(map[uint64]string), + } + // this buffer is needed for the duration of this piped stream + defer pools.BufioWriter32KPool.Put(ta.Buffer) + // In general we log errors here but ignore them because // during e.g. a diff operation the container can continue // mutating the filesystem and we can see transient errors @@ -357,9 +386,6 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) options.Includes = []string{"."} } - twBuf := pools.BufioWriter32KPool.Get(nil) - defer pools.BufioWriter32KPool.Put(twBuf) - var renamedRelFilePath string // For when tar.Options.Name is set for _, include := range options.Includes { filepath.Walk(filepath.Join(srcPath, include), func(filePath string, f os.FileInfo, err error) error { @@ -395,7 +421,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) relFilePath = strings.Replace(relFilePath, renamedRelFilePath, options.Name, 1) } - if err := addTarFile(filePath, relFilePath, tw, twBuf); err != nil { + if err := ta.addTarFile(filePath, relFilePath); err != nil { log.Debugf("Can't add file %s to tar: %s", srcPath, err) } return nil @@ -403,7 +429,7 @@ func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) } // Make sure to check the error on Close. - if err := tw.Close(); err != nil { + if err := ta.TarWriter.Close(); err != nil { log.Debugf("Can't close tar writer: %s", err) } if err := compressWriter.Close(); err != nil { diff --git a/components/engine/pkg/archive/archive_test.go b/components/engine/pkg/archive/archive_test.go index 900fff5f01..3516aca8f0 100644 --- a/components/engine/pkg/archive/archive_test.go +++ b/components/engine/pkg/archive/archive_test.go @@ -249,6 +249,64 @@ func TestUntarUstarGnuConflict(t *testing.T) { } } +func TestTarWithHardLink(t *testing.T) { + origin, err := ioutil.TempDir("", "docker-test-tar-hardlink") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(origin) + if err := ioutil.WriteFile(path.Join(origin, "1"), []byte("hello world"), 0700); err != nil { + t.Fatal(err) + } + if err := os.Link(path.Join(origin, "1"), path.Join(origin, "2")); err != nil { + t.Fatal(err) + } + + var i1, i2 uint64 + if i1, err = getNlink(path.Join(origin, "1")); err != nil { + t.Fatal(err) + } + // sanity check that we can hardlink + if i1 != 2 { + t.Skipf("skipping since hardlinks don't work here; expected 2 links, got %d", i1) + } + + dest, err := ioutil.TempDir("", "docker-test-tar-hardlink-dest") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(dest) + + // we'll do this in two steps to separate failure + fh, err := Tar(origin, Uncompressed) + if err != nil { + t.Fatal(err) + } + + // ensure we can read the whole thing with no error, before writing back out + buf, err := ioutil.ReadAll(fh) + if err != nil { + t.Fatal(err) + } + + bRdr := bytes.NewReader(buf) + err = Untar(bRdr, dest, &TarOptions{Compression: Uncompressed}) + if err != nil { + t.Fatal(err) + } + + if i1, err = getInode(path.Join(dest, "1")); err != nil { + t.Fatal(err) + } + if i2, err = getInode(path.Join(dest, "2")); err != nil { + t.Fatal(err) + } + + if i1 != i2 { + t.Errorf("expected matching inodes, but got %d and %d", i1, i2) + } +} + func getNlink(path string) (uint64, error) { stat, err := os.Stat(path) if err != nil { diff --git a/components/engine/pkg/archive/changes.go b/components/engine/pkg/archive/changes.go index 557b5db583..3e9ab45267 100644 --- a/components/engine/pkg/archive/changes.go +++ b/components/engine/pkg/archive/changes.go @@ -368,11 +368,15 @@ func minor(device uint64) uint64 { // ExportChanges produces an Archive from the provided changes, relative to dir. func ExportChanges(dir string, changes []Change) (Archive, error) { reader, writer := io.Pipe() - tw := tar.NewWriter(writer) - go func() { - twBuf := pools.BufioWriter32KPool.Get(nil) - defer pools.BufioWriter32KPool.Put(twBuf) + ta := &tarAppender{ + TarWriter: tar.NewWriter(writer), + Buffer: pools.BufioWriter32KPool.Get(nil), + SeenFiles: make(map[uint64]string), + } + // this buffer is needed for the duration of this piped stream + defer pools.BufioWriter32KPool.Put(ta.Buffer) + // In general we log errors here but ignore them because // during e.g. a diff operation the container can continue // mutating the filesystem and we can see transient errors @@ -390,19 +394,19 @@ func ExportChanges(dir string, changes []Change) (Archive, error) { AccessTime: timestamp, ChangeTime: timestamp, } - if err := tw.WriteHeader(hdr); err != nil { + if err := ta.TarWriter.WriteHeader(hdr); err != nil { log.Debugf("Can't write whiteout header: %s", err) } } else { path := filepath.Join(dir, change.Path) - if err := addTarFile(path, change.Path[1:], tw, twBuf); err != nil { + if err := ta.addTarFile(path, change.Path[1:]); err != nil { log.Debugf("Can't add file %s to tar: %s", path, err) } } } // Make sure to check the error on Close. - if err := tw.Close(); err != nil { + if err := ta.TarWriter.Close(); err != nil { log.Debugf("Can't close layer: %s", err) } writer.Close() From c5a11d2981bb830fb4a0fe27831e1925dd12b0b1 Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 28 Oct 2014 16:59:27 -0400 Subject: [PATCH 2/3] archive: example app for diffing directories By default is a demo of file differences, but can be used to create a tar of changes between an old and new path. Signed-off-by: Vincent Batts Signed-off-by: Vincent Batts Upstream-commit: f710a8d7746df5f54e2cbe7b90885e4eb75920b4 Component: engine --- .../engine/pkg/archive/example_changes.go | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 components/engine/pkg/archive/example_changes.go diff --git a/components/engine/pkg/archive/example_changes.go b/components/engine/pkg/archive/example_changes.go new file mode 100644 index 0000000000..cedd46a408 --- /dev/null +++ b/components/engine/pkg/archive/example_changes.go @@ -0,0 +1,97 @@ +// +build ignore + +// Simple tool to create an archive stream from an old and new directory +// +// By default it will stream the comparison of two temporary directories with junk files +package main + +import ( + "flag" + "fmt" + "io" + "io/ioutil" + "os" + "path" + + "github.com/Sirupsen/logrus" + "github.com/docker/docker/pkg/archive" +) + +var ( + flDebug = flag.Bool("D", false, "debugging output") + flNewDir = flag.String("newdir", "", "") + flOldDir = flag.String("olddir", "", "") + log = logrus.New() +) + +func main() { + flag.Usage = func() { + fmt.Println("Produce a tar from comparing two directory paths. By default a demo tar is created of around 200 files (including hardlinks)") + fmt.Printf("%s [OPTIONS]\n", os.Args[0]) + flag.PrintDefaults() + } + flag.Parse() + log.Out = os.Stderr + if (len(os.Getenv("DEBUG")) > 0) || *flDebug { + logrus.SetLevel(logrus.DebugLevel) + } + var newDir, oldDir string + + if len(*flNewDir) == 0 { + var err error + newDir, err = ioutil.TempDir("", "docker-test-newDir") + if err != nil { + log.Fatal(err) + } + defer os.RemoveAll(newDir) + if _, err := prepareUntarSourceDirectory(100, newDir, true); err != nil { + log.Fatal(err) + } + } else { + newDir = *flNewDir + } + + if len(*flOldDir) == 0 { + oldDir, err := ioutil.TempDir("", "docker-test-oldDir") + if err != nil { + log.Fatal(err) + } + defer os.RemoveAll(oldDir) + } else { + oldDir = *flOldDir + } + + changes, err := archive.ChangesDirs(newDir, oldDir) + if err != nil { + log.Fatal(err) + } + + a, err := archive.ExportChanges(newDir, changes) + if err != nil { + log.Fatal(err) + } + defer a.Close() + + i, err := io.Copy(os.Stdout, a) + if err != nil && err != io.EOF { + log.Fatal(err) + } + fmt.Fprintf(os.Stderr, "wrote archive of %d bytes", i) +} + +func prepareUntarSourceDirectory(numberOfFiles int, targetPath string, makeLinks bool) (int, error) { + fileData := []byte("fooo") + for n := 0; n < numberOfFiles; n++ { + fileName := fmt.Sprintf("file-%d", n) + if err := ioutil.WriteFile(path.Join(targetPath, fileName), fileData, 0700); err != nil { + return 0, err + } + if makeLinks { + if err := os.Link(path.Join(targetPath, fileName), path.Join(targetPath, fileName+"-link")); err != nil { + return 0, err + } + } + } + totalSize := numberOfFiles * len(fileData) + return totalSize, nil +} From 7239493b343354a11d9a5ab073dc2b4c45669d6f Mon Sep 17 00:00:00 2001 From: Vincent Batts Date: Tue, 28 Oct 2014 17:01:10 -0400 Subject: [PATCH 3/3] archive: cleanup and more information Signed-off-by: Vincent Batts Signed-off-by: Vincent Batts Upstream-commit: f14a9ed011d9b73104631310a13eab447d53be3a Component: engine --- components/engine/pkg/archive/archive.go | 3 +-- components/engine/pkg/archive/changes.go | 6 +++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/components/engine/pkg/archive/archive.go b/components/engine/pkg/archive/archive.go index dd14b778f2..37b312e5b0 100644 --- a/components/engine/pkg/archive/archive.go +++ b/components/engine/pkg/archive/archive.go @@ -193,7 +193,6 @@ func (ta *tarAppender) addTarFile(path, name string) error { hdr.Devmajor = int64(major(uint64(stat.Rdev))) hdr.Devminor = int64(minor(uint64(stat.Rdev))) } - } // if it's a regular file and has more than 1 link, @@ -228,6 +227,7 @@ func (ta *tarAppender) addTarFile(path, name string) error { } ta.Buffer.Reset(ta.TarWriter) + defer ta.Buffer.Reset(nil) _, err = io.Copy(ta.Buffer, file) file.Close() if err != nil { @@ -237,7 +237,6 @@ func (ta *tarAppender) addTarFile(path, name string) error { if err != nil { return err } - ta.Buffer.Reset(nil) } return nil diff --git a/components/engine/pkg/archive/changes.go b/components/engine/pkg/archive/changes.go index 3e9ab45267..0a1f741c41 100644 --- a/components/engine/pkg/archive/changes.go +++ b/components/engine/pkg/archive/changes.go @@ -333,6 +333,8 @@ func ChangesDirs(newDir, oldDir string) ([]Change, error) { newRoot, err2 = collectFileInfo(newDir) errs <- err2 }() + + // block until both routines have returned for i := 0; i < 2; i++ { if err := <-errs; err != nil { return nil, err @@ -409,7 +411,9 @@ func ExportChanges(dir string, changes []Change) (Archive, error) { if err := ta.TarWriter.Close(); err != nil { log.Debugf("Can't close layer: %s", err) } - writer.Close() + if err := writer.Close(); err != nil { + log.Debugf("failed close Changes writer: %s", err) + } }() return reader, nil }