From a4e38c5935e644425e1318abf4e9c12b37670996 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 5 Sep 2018 19:11:04 -0700 Subject: [PATCH 1/9] integration/build: add TestBuildHugeFile Add a test case for creating a 8GB file inside a container. Due to a bug in tar-split this was failing in Docker 18.06. The file being created is sparse, so there's not much I/O happening or disk space being used -- meaning the test is fast and does not require a lot of disk space. Signed-off-by: Kir Kolyshkin (cherry picked from commit b3165f5b2d7c1063c2a76a7ed5c2dd18f868276b) Upstream-commit: fc1d808c44f77e3929c4eeba7066501890aecd4e Component: engine --- .../engine/integration/build/build_test.go | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/components/engine/integration/build/build_test.go b/components/engine/integration/build/build_test.go index 25c5e635bd..9dde5d4557 100644 --- a/components/engine/integration/build/build_test.go +++ b/components/engine/integration/build/build_test.go @@ -423,6 +423,38 @@ RUN [ ! -f foo ] assert.Check(t, is.Contains(out.String(), "Successfully built")) } +// #37581 +func TestBuildWithHugeFile(t *testing.T) { + ctx := context.TODO() + defer setupTest(t)() + + dockerfile := `FROM busybox +# create a sparse file with size over 8GB +RUN for g in $(seq 0 8); do dd if=/dev/urandom of=rnd bs=1K count=1 seek=$((1024*1024*g)) status=none; done && \ + ls -la rnd && du -sk rnd` + + buf := bytes.NewBuffer(nil) + w := tar.NewWriter(buf) + writeTarRecord(t, w, "Dockerfile", dockerfile) + err := w.Close() + assert.NilError(t, err) + + apiclient := testEnv.APIClient() + resp, err := apiclient.ImageBuild(ctx, + buf, + types.ImageBuildOptions{ + Remove: true, + ForceRemove: true, + }) + + out := bytes.NewBuffer(nil) + assert.NilError(t, err) + _, err = io.Copy(out, resp.Body) + resp.Body.Close() + assert.NilError(t, err) + assert.Check(t, is.Contains(out.String(), "Successfully built")) +} + func writeTarRecord(t *testing.T, w *tar.Writer, fn, contents string) { err := w.WriteHeader(&tar.Header{ Name: fn, From f75eb8f2a77e59207b14f4a72389636abe369f01 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 6 Sep 2018 09:43:41 -0700 Subject: [PATCH 2/9] vendor: update tar-split To include https://github.com/vbatts/tar-split/pull/48 which fixes the issue of creating an image with >8GB file in it. Signed-off-by: Kir Kolyshkin (cherry picked from commit 92e75439037d205c218208945c70cfb3633e87aa) Upstream-commit: d7085abec2e445630bedd3e79782c5ec33f62682 Component: engine --- components/engine/vendor.conf | 2 +- .../github.com/vbatts/tar-split/README.md | 3 +- .../vbatts/tar-split/archive/tar/common.go | 689 ++++++-- .../vbatts/tar-split/archive/tar/format.go | 303 ++++ .../vbatts/tar-split/archive/tar/reader.go | 1463 ++++++++--------- .../tar/{stat_atim.go => stat_actime1.go} | 0 .../{stat_atimespec.go => stat_actime2.go} | 0 .../vbatts/tar-split/archive/tar/stat_unix.go | 72 +- .../vbatts/tar-split/archive/tar/strconv.go | 326 ++++ .../vbatts/tar-split/archive/tar/writer.go | 925 +++++++---- 10 files changed, 2477 insertions(+), 1306 deletions(-) create mode 100644 components/engine/vendor/github.com/vbatts/tar-split/archive/tar/format.go rename components/engine/vendor/github.com/vbatts/tar-split/archive/tar/{stat_atim.go => stat_actime1.go} (100%) rename components/engine/vendor/github.com/vbatts/tar-split/archive/tar/{stat_atimespec.go => stat_actime2.go} (100%) create mode 100644 components/engine/vendor/github.com/vbatts/tar-split/archive/tar/strconv.go diff --git a/components/engine/vendor.conf b/components/engine/vendor.conf index edcdffe6f9..349204387d 100644 --- a/components/engine/vendor.conf +++ b/components/engine/vendor.conf @@ -65,7 +65,7 @@ github.com/ishidawataru/sctp 07191f837fedd2f13d1ec7b5f885f0f3ec54b1cb # get graph and distribution packages github.com/docker/distribution 83389a148052d74ac602f5f1d62f86ff2f3c4aa5 -github.com/vbatts/tar-split v0.10.2 +github.com/vbatts/tar-split v0.11.0 github.com/opencontainers/go-digest v1.0.0-rc1 # get go-zfs packages diff --git a/components/engine/vendor/github.com/vbatts/tar-split/README.md b/components/engine/vendor/github.com/vbatts/tar-split/README.md index 03e3ec4308..fe997f69b5 100644 --- a/components/engine/vendor/github.com/vbatts/tar-split/README.md +++ b/components/engine/vendor/github.com/vbatts/tar-split/README.md @@ -67,7 +67,7 @@ Do not break the API of stdlib `archive/tar` in our fork (ideally find an upstre ## Std Version -The version of golang stdlib `archive/tar` is from go1.6 +The version of golang stdlib `archive/tar` is from go1.11 It is minimally extended to expose the raw bytes of the TAR, rather than just the marshalled headers and file stream. @@ -135,4 +135,3 @@ bytes-per-file rate for the storage implications. ## License See [LICENSE](LICENSE) - diff --git a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/common.go b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/common.go index 36f4e23980..dee9e47e4a 100644 --- a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/common.go +++ b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/common.go @@ -3,70 +3,528 @@ // license that can be found in the LICENSE file. // Package tar implements access to tar archives. -// It aims to cover most of the variations, including those produced -// by GNU and BSD tars. // -// References: -// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 -// http://www.gnu.org/software/tar/manual/html_node/Standard.html -// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html +// Tape archives (tar) are a file format for storing a sequence of files that +// can be read and written in a streaming manner. +// This package aims to cover most variations of the format, +// including those produced by GNU and BSD tar tools. package tar import ( - "bytes" "errors" "fmt" + "math" "os" "path" + "reflect" + "strconv" + "strings" "time" ) -const ( - blockSize = 512 +// BUG: Use of the Uid and Gid fields in Header could overflow on 32-bit +// architectures. If a large value is encountered when decoding, the result +// stored in Header will be the truncated version. - // Types - TypeReg = '0' // regular file - TypeRegA = '\x00' // regular file - TypeLink = '1' // hard link - TypeSymlink = '2' // symbolic link - TypeChar = '3' // character device node - TypeBlock = '4' // block device node - TypeDir = '5' // directory - TypeFifo = '6' // fifo node - TypeCont = '7' // reserved - TypeXHeader = 'x' // extended header - TypeXGlobalHeader = 'g' // global extended header - TypeGNULongName = 'L' // Next file has a long name - TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name - TypeGNUSparse = 'S' // sparse file +var ( + ErrHeader = errors.New("archive/tar: invalid tar header") + ErrWriteTooLong = errors.New("archive/tar: write too long") + ErrFieldTooLong = errors.New("archive/tar: header field too long") + ErrWriteAfterClose = errors.New("archive/tar: write after close") + errMissData = errors.New("archive/tar: sparse file references non-existent data") + errUnrefData = errors.New("archive/tar: sparse file contains unreferenced data") + errWriteHole = errors.New("archive/tar: write non-NUL byte in sparse hole") ) +type headerError []string + +func (he headerError) Error() string { + const prefix = "archive/tar: cannot encode header" + var ss []string + for _, s := range he { + if s != "" { + ss = append(ss, s) + } + } + if len(ss) == 0 { + return prefix + } + return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and ")) +} + +// Type flags for Header.Typeflag. +const ( + // Type '0' indicates a regular file. + TypeReg = '0' + TypeRegA = '\x00' // Deprecated: Use TypeReg instead. + + // Type '1' to '6' are header-only flags and may not have a data body. + TypeLink = '1' // Hard link + TypeSymlink = '2' // Symbolic link + TypeChar = '3' // Character device node + TypeBlock = '4' // Block device node + TypeDir = '5' // Directory + TypeFifo = '6' // FIFO node + + // Type '7' is reserved. + TypeCont = '7' + + // Type 'x' is used by the PAX format to store key-value records that + // are only relevant to the next file. + // This package transparently handles these types. + TypeXHeader = 'x' + + // Type 'g' is used by the PAX format to store key-value records that + // are relevant to all subsequent files. + // This package only supports parsing and composing such headers, + // but does not currently support persisting the global state across files. + TypeXGlobalHeader = 'g' + + // Type 'S' indicates a sparse file in the GNU format. + TypeGNUSparse = 'S' + + // Types 'L' and 'K' are used by the GNU format for a meta file + // used to store the path or link name for the next file. + // This package transparently handles these types. + TypeGNULongName = 'L' + TypeGNULongLink = 'K' +) + +// Keywords for PAX extended header records. +const ( + paxNone = "" // Indicates that no PAX key is suitable + paxPath = "path" + paxLinkpath = "linkpath" + paxSize = "size" + paxUid = "uid" + paxGid = "gid" + paxUname = "uname" + paxGname = "gname" + paxMtime = "mtime" + paxAtime = "atime" + paxCtime = "ctime" // Removed from later revision of PAX spec, but was valid + paxCharset = "charset" // Currently unused + paxComment = "comment" // Currently unused + + paxSchilyXattr = "SCHILY.xattr." + + // Keywords for GNU sparse files in a PAX extended header. + paxGNUSparse = "GNU.sparse." + paxGNUSparseNumBlocks = "GNU.sparse.numblocks" + paxGNUSparseOffset = "GNU.sparse.offset" + paxGNUSparseNumBytes = "GNU.sparse.numbytes" + paxGNUSparseMap = "GNU.sparse.map" + paxGNUSparseName = "GNU.sparse.name" + paxGNUSparseMajor = "GNU.sparse.major" + paxGNUSparseMinor = "GNU.sparse.minor" + paxGNUSparseSize = "GNU.sparse.size" + paxGNUSparseRealSize = "GNU.sparse.realsize" +) + +// basicKeys is a set of the PAX keys for which we have built-in support. +// This does not contain "charset" or "comment", which are both PAX-specific, +// so adding them as first-class features of Header is unlikely. +// Users can use the PAXRecords field to set it themselves. +var basicKeys = map[string]bool{ + paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true, + paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true, +} + // A Header represents a single header in a tar archive. // Some fields may not be populated. +// +// For forward compatibility, users that retrieve a Header from Reader.Next, +// mutate it in some ways, and then pass it back to Writer.WriteHeader +// should do so by creating a new Header and copying the fields +// that they are interested in preserving. type Header struct { - Name string // name of header file entry - Mode int64 // permission and mode bits - Uid int // user id of owner - Gid int // group id of owner - Size int64 // length in bytes - ModTime time.Time // modified time - Typeflag byte // type of header entry - Linkname string // target name of link - Uname string // user name of owner - Gname string // group name of owner - Devmajor int64 // major number of character or block device - Devminor int64 // minor number of character or block device - AccessTime time.Time // access time - ChangeTime time.Time // status change time - Xattrs map[string]string + // Typeflag is the type of header entry. + // The zero value is automatically promoted to either TypeReg or TypeDir + // depending on the presence of a trailing slash in Name. + Typeflag byte + + Name string // Name of file entry + Linkname string // Target name of link (valid for TypeLink or TypeSymlink) + + Size int64 // Logical file size in bytes + Mode int64 // Permission and mode bits + Uid int // User ID of owner + Gid int // Group ID of owner + Uname string // User name of owner + Gname string // Group name of owner + + // If the Format is unspecified, then Writer.WriteHeader rounds ModTime + // to the nearest second and ignores the AccessTime and ChangeTime fields. + // + // To use AccessTime or ChangeTime, specify the Format as PAX or GNU. + // To use sub-second resolution, specify the Format as PAX. + ModTime time.Time // Modification time + AccessTime time.Time // Access time (requires either PAX or GNU support) + ChangeTime time.Time // Change time (requires either PAX or GNU support) + + Devmajor int64 // Major device number (valid for TypeChar or TypeBlock) + Devminor int64 // Minor device number (valid for TypeChar or TypeBlock) + + // Xattrs stores extended attributes as PAX records under the + // "SCHILY.xattr." namespace. + // + // The following are semantically equivalent: + // h.Xattrs[key] = value + // h.PAXRecords["SCHILY.xattr."+key] = value + // + // When Writer.WriteHeader is called, the contents of Xattrs will take + // precedence over those in PAXRecords. + // + // Deprecated: Use PAXRecords instead. + Xattrs map[string]string + + // PAXRecords is a map of PAX extended header records. + // + // User-defined records should have keys of the following form: + // VENDOR.keyword + // Where VENDOR is some namespace in all uppercase, and keyword may + // not contain the '=' character (e.g., "GOLANG.pkg.version"). + // The key and value should be non-empty UTF-8 strings. + // + // When Writer.WriteHeader is called, PAX records derived from the + // other fields in Header take precedence over PAXRecords. + PAXRecords map[string]string + + // Format specifies the format of the tar header. + // + // This is set by Reader.Next as a best-effort guess at the format. + // Since the Reader liberally reads some non-compliant files, + // it is possible for this to be FormatUnknown. + // + // If the format is unspecified when Writer.WriteHeader is called, + // then it uses the first format (in the order of USTAR, PAX, GNU) + // capable of encoding this Header (see Format). + Format Format } -// File name constants from the tar spec. -const ( - fileNameSize = 100 // Maximum number of bytes in a standard tar name. - fileNamePrefixSize = 155 // Maximum number of ustar extension bytes. +// sparseEntry represents a Length-sized fragment at Offset in the file. +type sparseEntry struct{ Offset, Length int64 } + +func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length } + +// A sparse file can be represented as either a sparseDatas or a sparseHoles. +// As long as the total size is known, they are equivalent and one can be +// converted to the other form and back. The various tar formats with sparse +// file support represent sparse files in the sparseDatas form. That is, they +// specify the fragments in the file that has data, and treat everything else as +// having zero bytes. As such, the encoding and decoding logic in this package +// deals with sparseDatas. +// +// However, the external API uses sparseHoles instead of sparseDatas because the +// zero value of sparseHoles logically represents a normal file (i.e., there are +// no holes in it). On the other hand, the zero value of sparseDatas implies +// that the file has no data in it, which is rather odd. +// +// As an example, if the underlying raw file contains the 10-byte data: +// var compactFile = "abcdefgh" +// +// And the sparse map has the following entries: +// var spd sparseDatas = []sparseEntry{ +// {Offset: 2, Length: 5}, // Data fragment for 2..6 +// {Offset: 18, Length: 3}, // Data fragment for 18..20 +// } +// var sph sparseHoles = []sparseEntry{ +// {Offset: 0, Length: 2}, // Hole fragment for 0..1 +// {Offset: 7, Length: 11}, // Hole fragment for 7..17 +// {Offset: 21, Length: 4}, // Hole fragment for 21..24 +// } +// +// Then the content of the resulting sparse file with a Header.Size of 25 is: +// var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 +type ( + sparseDatas []sparseEntry + sparseHoles []sparseEntry ) +// validateSparseEntries reports whether sp is a valid sparse map. +// It does not matter whether sp represents data fragments or hole fragments. +func validateSparseEntries(sp []sparseEntry, size int64) bool { + // Validate all sparse entries. These are the same checks as performed by + // the BSD tar utility. + if size < 0 { + return false + } + var pre sparseEntry + for _, cur := range sp { + switch { + case cur.Offset < 0 || cur.Length < 0: + return false // Negative values are never okay + case cur.Offset > math.MaxInt64-cur.Length: + return false // Integer overflow with large length + case cur.endOffset() > size: + return false // Region extends beyond the actual size + case pre.endOffset() > cur.Offset: + return false // Regions cannot overlap and must be in order + } + pre = cur + } + return true +} + +// alignSparseEntries mutates src and returns dst where each fragment's +// starting offset is aligned up to the nearest block edge, and each +// ending offset is aligned down to the nearest block edge. +// +// Even though the Go tar Reader and the BSD tar utility can handle entries +// with arbitrary offsets and lengths, the GNU tar utility can only handle +// offsets and lengths that are multiples of blockSize. +func alignSparseEntries(src []sparseEntry, size int64) []sparseEntry { + dst := src[:0] + for _, s := range src { + pos, end := s.Offset, s.endOffset() + pos += blockPadding(+pos) // Round-up to nearest blockSize + if end != size { + end -= blockPadding(-end) // Round-down to nearest blockSize + } + if pos < end { + dst = append(dst, sparseEntry{Offset: pos, Length: end - pos}) + } + } + return dst +} + +// invertSparseEntries converts a sparse map from one form to the other. +// If the input is sparseHoles, then it will output sparseDatas and vice-versa. +// The input must have been already validated. +// +// This function mutates src and returns a normalized map where: +// * adjacent fragments are coalesced together +// * only the last fragment may be empty +// * the endOffset of the last fragment is the total size +func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry { + dst := src[:0] + var pre sparseEntry + for _, cur := range src { + if cur.Length == 0 { + continue // Skip empty fragments + } + pre.Length = cur.Offset - pre.Offset + if pre.Length > 0 { + dst = append(dst, pre) // Only add non-empty fragments + } + pre.Offset = cur.endOffset() + } + pre.Length = size - pre.Offset // Possibly the only empty fragment + return append(dst, pre) +} + +// fileState tracks the number of logical (includes sparse holes) and physical +// (actual in tar archive) bytes remaining for the current file. +// +// Invariant: LogicalRemaining >= PhysicalRemaining +type fileState interface { + LogicalRemaining() int64 + PhysicalRemaining() int64 +} + +// allowedFormats determines which formats can be used. +// The value returned is the logical OR of multiple possible formats. +// If the value is FormatUnknown, then the input Header cannot be encoded +// and an error is returned explaining why. +// +// As a by-product of checking the fields, this function returns paxHdrs, which +// contain all fields that could not be directly encoded. +// A value receiver ensures that this method does not mutate the source Header. +func (h Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) { + format = FormatUSTAR | FormatPAX | FormatGNU + paxHdrs = make(map[string]string) + + var whyNoUSTAR, whyNoPAX, whyNoGNU string + var preferPAX bool // Prefer PAX over USTAR + verifyString := func(s string, size int, name, paxKey string) { + // NUL-terminator is optional for path and linkpath. + // Technically, it is required for uname and gname, + // but neither GNU nor BSD tar checks for it. + tooLong := len(s) > size + allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath + if hasNUL(s) || (tooLong && !allowLongGNU) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s) + format.mustNotBe(FormatGNU) + } + if !isASCII(s) || tooLong { + canSplitUSTAR := paxKey == paxPath + if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s) + format.mustNotBe(FormatUSTAR) + } + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = s + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == s { + paxHdrs[paxKey] = v + } + } + verifyNumeric := func(n int64, size int, name, paxKey string) { + if !fitsInBase256(size, n) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n) + format.mustNotBe(FormatGNU) + } + if !fitsInOctal(size, n) { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n) + format.mustNotBe(FormatUSTAR) + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = strconv.FormatInt(n, 10) + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) { + paxHdrs[paxKey] = v + } + } + verifyTime := func(ts time.Time, size int, name, paxKey string) { + if ts.IsZero() { + return // Always okay + } + if !fitsInBase256(size, ts.Unix()) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts) + format.mustNotBe(FormatGNU) + } + isMtime := paxKey == paxMtime + fitsOctal := fitsInOctal(size, ts.Unix()) + if (isMtime && !fitsOctal) || !isMtime { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts) + format.mustNotBe(FormatUSTAR) + } + needsNano := ts.Nanosecond() != 0 + if !isMtime || !fitsOctal || needsNano { + preferPAX = true // USTAR may truncate sub-second measurements + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = formatPAXTime(ts) + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) { + paxHdrs[paxKey] = v + } + } + + // Check basic fields. + var blk block + v7 := blk.V7() + ustar := blk.USTAR() + gnu := blk.GNU() + verifyString(h.Name, len(v7.Name()), "Name", paxPath) + verifyString(h.Linkname, len(v7.LinkName()), "Linkname", paxLinkpath) + verifyString(h.Uname, len(ustar.UserName()), "Uname", paxUname) + verifyString(h.Gname, len(ustar.GroupName()), "Gname", paxGname) + verifyNumeric(h.Mode, len(v7.Mode()), "Mode", paxNone) + verifyNumeric(int64(h.Uid), len(v7.UID()), "Uid", paxUid) + verifyNumeric(int64(h.Gid), len(v7.GID()), "Gid", paxGid) + verifyNumeric(h.Size, len(v7.Size()), "Size", paxSize) + verifyNumeric(h.Devmajor, len(ustar.DevMajor()), "Devmajor", paxNone) + verifyNumeric(h.Devminor, len(ustar.DevMinor()), "Devminor", paxNone) + verifyTime(h.ModTime, len(v7.ModTime()), "ModTime", paxMtime) + verifyTime(h.AccessTime, len(gnu.AccessTime()), "AccessTime", paxAtime) + verifyTime(h.ChangeTime, len(gnu.ChangeTime()), "ChangeTime", paxCtime) + + // Check for header-only types. + var whyOnlyPAX, whyOnlyGNU string + switch h.Typeflag { + case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse: + // Exclude TypeLink and TypeSymlink, since they may reference directories. + if strings.HasSuffix(h.Name, "/") { + return FormatUnknown, nil, headerError{"filename may not have trailing slash"} + } + case TypeXHeader, TypeGNULongName, TypeGNULongLink: + return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"} + case TypeXGlobalHeader: + h2 := Header{Name: h.Name, Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format} + if !reflect.DeepEqual(h, h2) { + return FormatUnknown, nil, headerError{"only PAXRecords should be set for TypeXGlobalHeader"} + } + whyOnlyPAX = "only PAX supports TypeXGlobalHeader" + format.mayOnlyBe(FormatPAX) + } + if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 { + return FormatUnknown, nil, headerError{"negative size on header-only type"} + } + + // Check PAX records. + if len(h.Xattrs) > 0 { + for k, v := range h.Xattrs { + paxHdrs[paxSchilyXattr+k] = v + } + whyOnlyPAX = "only PAX supports Xattrs" + format.mayOnlyBe(FormatPAX) + } + if len(h.PAXRecords) > 0 { + for k, v := range h.PAXRecords { + switch _, exists := paxHdrs[k]; { + case exists: + continue // Do not overwrite existing records + case h.Typeflag == TypeXGlobalHeader: + paxHdrs[k] = v // Copy all records + case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse): + paxHdrs[k] = v // Ignore local records that may conflict + } + } + whyOnlyPAX = "only PAX supports PAXRecords" + format.mayOnlyBe(FormatPAX) + } + for k, v := range paxHdrs { + if !validPAXRecord(k, v) { + return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)} + } + } + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Check sparse files. + if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse { + if isHeaderOnlyType(h.Typeflag) { + return FormatUnknown, nil, headerError{"header-only type cannot be sparse"} + } + if !validateSparseEntries(h.SparseHoles, h.Size) { + return FormatUnknown, nil, headerError{"invalid sparse holes"} + } + if h.Typeflag == TypeGNUSparse { + whyOnlyGNU = "only GNU supports TypeGNUSparse" + format.mayOnlyBe(FormatGNU) + } else { + whyNoGNU = "GNU supports sparse files only with TypeGNUSparse" + format.mustNotBe(FormatGNU) + } + whyNoUSTAR = "USTAR does not support sparse files" + format.mustNotBe(FormatUSTAR) + } + */ + + // Check desired format. + if wantFormat := h.Format; wantFormat != FormatUnknown { + if wantFormat.has(FormatPAX) && !preferPAX { + wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too + } + format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted + } + if format == FormatUnknown { + switch h.Format { + case FormatUSTAR: + err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU} + case FormatPAX: + err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU} + case FormatGNU: + err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX} + default: + err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU} + } + } + return format, paxHdrs, err +} + // FileInfo returns an os.FileInfo for the Header. func (h *Header) FileInfo() os.FileInfo { return headerFileInfo{h} @@ -97,63 +555,43 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) { // Set setuid, setgid and sticky bits. if fi.h.Mode&c_ISUID != 0 { - // setuid mode |= os.ModeSetuid } if fi.h.Mode&c_ISGID != 0 { - // setgid mode |= os.ModeSetgid } if fi.h.Mode&c_ISVTX != 0 { - // sticky mode |= os.ModeSticky } - // Set file mode bits. - // clear perm, setuid, setgid and sticky bits. - m := os.FileMode(fi.h.Mode) &^ 07777 - if m == c_ISDIR { - // directory + // Set file mode bits; clear perm, setuid, setgid, and sticky bits. + switch m := os.FileMode(fi.h.Mode) &^ 07777; m { + case c_ISDIR: mode |= os.ModeDir - } - if m == c_ISFIFO { - // named pipe (FIFO) + case c_ISFIFO: mode |= os.ModeNamedPipe - } - if m == c_ISLNK { - // symbolic link + case c_ISLNK: mode |= os.ModeSymlink - } - if m == c_ISBLK { - // device file + case c_ISBLK: mode |= os.ModeDevice - } - if m == c_ISCHR { - // Unix character device + case c_ISCHR: mode |= os.ModeDevice mode |= os.ModeCharDevice - } - if m == c_ISSOCK { - // Unix domain socket + case c_ISSOCK: mode |= os.ModeSocket } switch fi.h.Typeflag { case TypeSymlink: - // symbolic link mode |= os.ModeSymlink case TypeChar: - // character device node mode |= os.ModeDevice mode |= os.ModeCharDevice case TypeBlock: - // block device node mode |= os.ModeDevice case TypeDir: - // directory mode |= os.ModeDir case TypeFifo: - // fifo node mode |= os.ModeNamedPipe } @@ -163,11 +601,15 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) { // sysStat, if non-nil, populates h from system-dependent fields of fi. var sysStat func(fi os.FileInfo, h *Header) error -// Mode constants from the tar spec. const ( - c_ISUID = 04000 // Set uid - c_ISGID = 02000 // Set gid - c_ISVTX = 01000 // Save text (sticky bit) + // Mode constants from the USTAR spec: + // See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 + c_ISUID = 04000 // Set uid + c_ISGID = 02000 // Set gid + c_ISVTX = 01000 // Save text (sticky bit) + + // Common Unix mode constants; these are not defined in any common tar standard. + // Header.FileInfo understands these, but FileInfoHeader will never produce these. c_ISDIR = 040000 // Directory c_ISFIFO = 010000 // FIFO c_ISREG = 0100000 // Regular file @@ -177,33 +619,16 @@ const ( c_ISSOCK = 0140000 // Socket ) -// Keywords for the PAX Extended Header -const ( - paxAtime = "atime" - paxCharset = "charset" - paxComment = "comment" - paxCtime = "ctime" // please note that ctime is not a valid pax header. - paxGid = "gid" - paxGname = "gname" - paxLinkpath = "linkpath" - paxMtime = "mtime" - paxPath = "path" - paxSize = "size" - paxUid = "uid" - paxUname = "uname" - paxXattr = "SCHILY.xattr." - paxNone = "" -) - // FileInfoHeader creates a partially-populated Header from fi. // If fi describes a symlink, FileInfoHeader records link as the link target. // If fi describes a directory, a slash is appended to the name. -// Because os.FileInfo's Name method returns only the base name of -// the file it describes, it may be necessary to modify the Name field -// of the returned header to provide the full path name of the file. +// +// Since os.FileInfo's Name method only returns the base name of +// the file it describes, it may be necessary to modify Header.Name +// to provide the full path name of the file. func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { if fi == nil { - return nil, errors.New("tar: FileInfo is nil") + return nil, errors.New("archive/tar: FileInfo is nil") } fm := fi.Mode() h := &Header{ @@ -213,30 +638,24 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { } switch { case fm.IsRegular(): - h.Mode |= c_ISREG h.Typeflag = TypeReg h.Size = fi.Size() case fi.IsDir(): h.Typeflag = TypeDir - h.Mode |= c_ISDIR h.Name += "/" case fm&os.ModeSymlink != 0: h.Typeflag = TypeSymlink - h.Mode |= c_ISLNK h.Linkname = link case fm&os.ModeDevice != 0: if fm&os.ModeCharDevice != 0 { - h.Mode |= c_ISCHR h.Typeflag = TypeChar } else { - h.Mode |= c_ISBLK h.Typeflag = TypeBlock } case fm&os.ModeNamedPipe != 0: h.Typeflag = TypeFifo - h.Mode |= c_ISFIFO case fm&os.ModeSocket != 0: - h.Mode |= c_ISSOCK + return nil, fmt.Errorf("archive/tar: sockets not supported") default: return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm) } @@ -272,6 +691,12 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { h.Size = 0 h.Linkname = sys.Linkname } + if sys.PAXRecords != nil { + h.PAXRecords = make(map[string]string) + for k, v := range sys.PAXRecords { + h.PAXRecords[k] = v + } + } } if sysStat != nil { return h, sysStat(fi, h) @@ -279,55 +704,6 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { return h, nil } -var zeroBlock = make([]byte, blockSize) - -// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values. -// We compute and return both. -func checksum(header []byte) (unsigned int64, signed int64) { - for i := 0; i < len(header); i++ { - if i == 148 { - // The chksum field (header[148:156]) is special: it should be treated as space bytes. - unsigned += ' ' * 8 - signed += ' ' * 8 - i += 7 - continue - } - unsigned += int64(header[i]) - signed += int64(int8(header[i])) - } - return -} - -type slicer []byte - -func (sp *slicer) next(n int) (b []byte) { - s := *sp - b, *sp = s[0:n], s[n:] - return -} - -func isASCII(s string) bool { - for _, c := range s { - if c >= 0x80 { - return false - } - } - return true -} - -func toASCII(s string) string { - if isASCII(s) { - return s - } - var buf bytes.Buffer - for _, c := range s { - if c < 0x80 { - buf.WriteByte(byte(c)) - } - } - return buf.String() -} - // isHeaderOnlyType checks if the given type flag is of the type that has no // data section even if a size is specified. func isHeaderOnlyType(flag byte) bool { @@ -338,3 +714,10 @@ func isHeaderOnlyType(flag byte) bool { return false } } + +func min(a, b int64) int64 { + if a < b { + return a + } + return b +} diff --git a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/format.go b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/format.go new file mode 100644 index 0000000000..1f89d0c59a --- /dev/null +++ b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/format.go @@ -0,0 +1,303 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import "strings" + +// Format represents the tar archive format. +// +// The original tar format was introduced in Unix V7. +// Since then, there have been multiple competing formats attempting to +// standardize or extend the V7 format to overcome its limitations. +// The most common formats are the USTAR, PAX, and GNU formats, +// each with their own advantages and limitations. +// +// The following table captures the capabilities of each format: +// +// | USTAR | PAX | GNU +// ------------------+--------+-----------+---------- +// Name | 256B | unlimited | unlimited +// Linkname | 100B | unlimited | unlimited +// Size | uint33 | unlimited | uint89 +// Mode | uint21 | uint21 | uint57 +// Uid/Gid | uint21 | unlimited | uint57 +// Uname/Gname | 32B | unlimited | 32B +// ModTime | uint33 | unlimited | int89 +// AccessTime | n/a | unlimited | int89 +// ChangeTime | n/a | unlimited | int89 +// Devmajor/Devminor | uint21 | uint21 | uint57 +// ------------------+--------+-----------+---------- +// string encoding | ASCII | UTF-8 | binary +// sub-second times | no | yes | no +// sparse files | no | yes | yes +// +// The table's upper portion shows the Header fields, where each format reports +// the maximum number of bytes allowed for each string field and +// the integer type used to store each numeric field +// (where timestamps are stored as the number of seconds since the Unix epoch). +// +// The table's lower portion shows specialized features of each format, +// such as supported string encodings, support for sub-second timestamps, +// or support for sparse files. +// +// The Writer currently provides no support for sparse files. +type Format int + +// Constants to identify various tar formats. +const ( + // Deliberately hide the meaning of constants from public API. + _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc... + + // FormatUnknown indicates that the format is unknown. + FormatUnknown + + // The format of the original Unix V7 tar tool prior to standardization. + formatV7 + + // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988. + // + // While this format is compatible with most tar readers, + // the format has several limitations making it unsuitable for some usages. + // Most notably, it cannot support sparse files, files larger than 8GiB, + // filenames larger than 256 characters, and non-ASCII filenames. + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 + FormatUSTAR + + // FormatPAX represents the PAX header format defined in POSIX.1-2001. + // + // PAX extends USTAR by writing a special file with Typeflag TypeXHeader + // preceding the original header. This file contains a set of key-value + // records, which are used to overcome USTAR's shortcomings, in addition to + // providing the ability to have sub-second resolution for timestamps. + // + // Some newer formats add their own extensions to PAX by defining their + // own keys and assigning certain semantic meaning to the associated values. + // For example, sparse file support in PAX is implemented using keys + // defined by the GNU manual (e.g., "GNU.sparse.map"). + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html + FormatPAX + + // FormatGNU represents the GNU header format. + // + // The GNU header format is older than the USTAR and PAX standards and + // is not compatible with them. The GNU format supports + // arbitrary file sizes, filenames of arbitrary encoding and length, + // sparse files, and other features. + // + // It is recommended that PAX be chosen over GNU unless the target + // application can only parse GNU formatted archives. + // + // Reference: + // https://www.gnu.org/software/tar/manual/html_node/Standard.html + FormatGNU + + // Schily's tar format, which is incompatible with USTAR. + // This does not cover STAR extensions to the PAX format; these fall under + // the PAX format. + formatSTAR + + formatMax +) + +func (f Format) has(f2 Format) bool { return f&f2 != 0 } +func (f *Format) mayBe(f2 Format) { *f |= f2 } +func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 } +func (f *Format) mustNotBe(f2 Format) { *f &^= f2 } + +var formatNames = map[Format]string{ + formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR", +} + +func (f Format) String() string { + var ss []string + for f2 := Format(1); f2 < formatMax; f2 <<= 1 { + if f.has(f2) { + ss = append(ss, formatNames[f2]) + } + } + switch len(ss) { + case 0: + return "" + case 1: + return ss[0] + default: + return "(" + strings.Join(ss, " | ") + ")" + } +} + +// Magics used to identify various formats. +const ( + magicGNU, versionGNU = "ustar ", " \x00" + magicUSTAR, versionUSTAR = "ustar\x00", "00" + trailerSTAR = "tar\x00" +) + +// Size constants from various tar specifications. +const ( + blockSize = 512 // Size of each block in a tar stream + nameSize = 100 // Max length of the name field in USTAR format + prefixSize = 155 // Max length of the prefix field in USTAR format +) + +// blockPadding computes the number of bytes needed to pad offset up to the +// nearest block edge where 0 <= n < blockSize. +func blockPadding(offset int64) (n int64) { + return -offset & (blockSize - 1) +} + +var zeroBlock block + +type block [blockSize]byte + +// Convert block to any number of formats. +func (b *block) V7() *headerV7 { return (*headerV7)(b) } +func (b *block) GNU() *headerGNU { return (*headerGNU)(b) } +func (b *block) STAR() *headerSTAR { return (*headerSTAR)(b) } +func (b *block) USTAR() *headerUSTAR { return (*headerUSTAR)(b) } +func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) } + +// GetFormat checks that the block is a valid tar header based on the checksum. +// It then attempts to guess the specific format based on magic values. +// If the checksum fails, then FormatUnknown is returned. +func (b *block) GetFormat() Format { + // Verify checksum. + var p parser + value := p.parseOctal(b.V7().Chksum()) + chksum1, chksum2 := b.ComputeChecksum() + if p.err != nil || (value != chksum1 && value != chksum2) { + return FormatUnknown + } + + // Guess the magic values. + magic := string(b.USTAR().Magic()) + version := string(b.USTAR().Version()) + trailer := string(b.STAR().Trailer()) + switch { + case magic == magicUSTAR && trailer == trailerSTAR: + return formatSTAR + case magic == magicUSTAR: + return FormatUSTAR | FormatPAX + case magic == magicGNU && version == versionGNU: + return FormatGNU + default: + return formatV7 + } +} + +// SetFormat writes the magic values necessary for specified format +// and then updates the checksum accordingly. +func (b *block) SetFormat(format Format) { + // Set the magic values. + switch { + case format.has(formatV7): + // Do nothing. + case format.has(FormatGNU): + copy(b.GNU().Magic(), magicGNU) + copy(b.GNU().Version(), versionGNU) + case format.has(formatSTAR): + copy(b.STAR().Magic(), magicUSTAR) + copy(b.STAR().Version(), versionUSTAR) + copy(b.STAR().Trailer(), trailerSTAR) + case format.has(FormatUSTAR | FormatPAX): + copy(b.USTAR().Magic(), magicUSTAR) + copy(b.USTAR().Version(), versionUSTAR) + default: + panic("invalid format") + } + + // Update checksum. + // This field is special in that it is terminated by a NULL then space. + var f formatter + field := b.V7().Chksum() + chksum, _ := b.ComputeChecksum() // Possible values are 256..128776 + f.formatOctal(field[:7], chksum) // Never fails since 128776 < 262143 + field[7] = ' ' +} + +// ComputeChecksum computes the checksum for the header block. +// POSIX specifies a sum of the unsigned byte values, but the Sun tar used +// signed byte values. +// We compute and return both. +func (b *block) ComputeChecksum() (unsigned, signed int64) { + for i, c := range b { + if 148 <= i && i < 156 { + c = ' ' // Treat the checksum field itself as all spaces. + } + unsigned += int64(c) + signed += int64(int8(c)) + } + return unsigned, signed +} + +// Reset clears the block with all zeros. +func (b *block) Reset() { + *b = block{} +} + +type headerV7 [blockSize]byte + +func (h *headerV7) Name() []byte { return h[000:][:100] } +func (h *headerV7) Mode() []byte { return h[100:][:8] } +func (h *headerV7) UID() []byte { return h[108:][:8] } +func (h *headerV7) GID() []byte { return h[116:][:8] } +func (h *headerV7) Size() []byte { return h[124:][:12] } +func (h *headerV7) ModTime() []byte { return h[136:][:12] } +func (h *headerV7) Chksum() []byte { return h[148:][:8] } +func (h *headerV7) TypeFlag() []byte { return h[156:][:1] } +func (h *headerV7) LinkName() []byte { return h[157:][:100] } + +type headerGNU [blockSize]byte + +func (h *headerGNU) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerGNU) Magic() []byte { return h[257:][:6] } +func (h *headerGNU) Version() []byte { return h[263:][:2] } +func (h *headerGNU) UserName() []byte { return h[265:][:32] } +func (h *headerGNU) GroupName() []byte { return h[297:][:32] } +func (h *headerGNU) DevMajor() []byte { return h[329:][:8] } +func (h *headerGNU) DevMinor() []byte { return h[337:][:8] } +func (h *headerGNU) AccessTime() []byte { return h[345:][:12] } +func (h *headerGNU) ChangeTime() []byte { return h[357:][:12] } +func (h *headerGNU) Sparse() sparseArray { return (sparseArray)(h[386:][:24*4+1]) } +func (h *headerGNU) RealSize() []byte { return h[483:][:12] } + +type headerSTAR [blockSize]byte + +func (h *headerSTAR) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerSTAR) Magic() []byte { return h[257:][:6] } +func (h *headerSTAR) Version() []byte { return h[263:][:2] } +func (h *headerSTAR) UserName() []byte { return h[265:][:32] } +func (h *headerSTAR) GroupName() []byte { return h[297:][:32] } +func (h *headerSTAR) DevMajor() []byte { return h[329:][:8] } +func (h *headerSTAR) DevMinor() []byte { return h[337:][:8] } +func (h *headerSTAR) Prefix() []byte { return h[345:][:131] } +func (h *headerSTAR) AccessTime() []byte { return h[476:][:12] } +func (h *headerSTAR) ChangeTime() []byte { return h[488:][:12] } +func (h *headerSTAR) Trailer() []byte { return h[508:][:4] } + +type headerUSTAR [blockSize]byte + +func (h *headerUSTAR) V7() *headerV7 { return (*headerV7)(h) } +func (h *headerUSTAR) Magic() []byte { return h[257:][:6] } +func (h *headerUSTAR) Version() []byte { return h[263:][:2] } +func (h *headerUSTAR) UserName() []byte { return h[265:][:32] } +func (h *headerUSTAR) GroupName() []byte { return h[297:][:32] } +func (h *headerUSTAR) DevMajor() []byte { return h[329:][:8] } +func (h *headerUSTAR) DevMinor() []byte { return h[337:][:8] } +func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } + +type sparseArray []byte + +func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) } +func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } +func (s sparseArray) MaxEntries() int { return len(s) / 24 } + +type sparseElem []byte + +func (s sparseElem) Offset() []byte { return s[00:][:12] } +func (s sparseElem) Length() []byte { return s[12:][:12] } diff --git a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/reader.go b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/reader.go index adf32122e1..ea64a38207 100644 --- a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/reader.go +++ b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/reader.go @@ -4,44 +4,38 @@ package tar -// TODO(dsymonds): -// - pax extensions - import ( "bytes" - "errors" "io" "io/ioutil" - "math" - "os" "strconv" "strings" "time" ) -var ( - ErrHeader = errors.New("archive/tar: invalid tar header") -) - -const maxNanoSecondIntSize = 9 - -// A Reader provides sequential access to the contents of a tar archive. -// A tar archive consists of a sequence of files. -// The Next method advances to the next file in the archive (including the first), -// and then it can be treated as an io.Reader to access the file's data. +// Reader provides sequential access to the contents of a tar archive. +// Reader.Next advances to the next file in the archive (including the first), +// and then Reader can be treated as an io.Reader to access the file's data. type Reader struct { - r io.Reader - err error - pad int64 // amount of padding (ignored) after current file entry - curr numBytesReader // reader for current file entry - hdrBuff [blockSize]byte // buffer to use in readHeader + r io.Reader + pad int64 // Amount of padding (ignored) after current file entry + curr fileReader // Reader for current file entry + blk block // Buffer to use as temporary local storage + + // err is a persistent error. + // It is only the responsibility of every exported method of Reader to + // ensure that this error is sticky. + err error RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this. rawBytes *bytes.Buffer // last raw bits } -type parser struct { - err error // Last error seen +type fileReader interface { + io.Reader + fileState + + WriteTo(io.Writer) (int64, error) } // RawBytes accesses the raw bytes of the archive, apart from the file payload itself. @@ -57,87 +51,35 @@ func (tr *Reader) RawBytes() []byte { if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) } - // if we've read them, then flush them. - defer tr.rawBytes.Reset() + defer tr.rawBytes.Reset() // if we've read them, then flush them. + return tr.rawBytes.Bytes() + } -// A numBytesReader is an io.Reader with a numBytes method, returning the number -// of bytes remaining in the underlying encoded data. -type numBytesReader interface { - io.Reader - numBytes() int64 -} - -// A regFileReader is a numBytesReader for reading file data from a tar archive. -type regFileReader struct { - r io.Reader // underlying reader - nb int64 // number of unread bytes for current file entry -} - -// A sparseFileReader is a numBytesReader for reading sparse file data from a -// tar archive. -type sparseFileReader struct { - rfr numBytesReader // Reads the sparse-encoded file data - sp []sparseEntry // The sparse map for the file - pos int64 // Keeps track of file position - total int64 // Total size of the file -} - -// A sparseEntry holds a single entry in a sparse file's sparse map. -// -// Sparse files are represented using a series of sparseEntrys. -// Despite the name, a sparseEntry represents an actual data fragment that -// references data found in the underlying archive stream. All regions not -// covered by a sparseEntry are logically filled with zeros. -// -// For example, if the underlying raw file contains the 10-byte data: -// var compactData = "abcdefgh" -// -// And the sparse map has the following entries: -// var sp = []sparseEntry{ -// {offset: 2, numBytes: 5} // Data fragment for [2..7] -// {offset: 18, numBytes: 3} // Data fragment for [18..21] -// } -// -// Then the content of the resulting sparse file with a "real" size of 25 is: -// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 -type sparseEntry struct { - offset int64 // Starting position of the fragment - numBytes int64 // Length of the fragment -} - -// Keywords for GNU sparse files in a PAX extended header -const ( - paxGNUSparseNumBlocks = "GNU.sparse.numblocks" - paxGNUSparseOffset = "GNU.sparse.offset" - paxGNUSparseNumBytes = "GNU.sparse.numbytes" - paxGNUSparseMap = "GNU.sparse.map" - paxGNUSparseName = "GNU.sparse.name" - paxGNUSparseMajor = "GNU.sparse.major" - paxGNUSparseMinor = "GNU.sparse.minor" - paxGNUSparseSize = "GNU.sparse.size" - paxGNUSparseRealSize = "GNU.sparse.realsize" -) - -// Keywords for old GNU sparse headers -const ( - oldGNUSparseMainHeaderOffset = 386 - oldGNUSparseMainHeaderIsExtendedOffset = 482 - oldGNUSparseMainHeaderNumEntries = 4 - oldGNUSparseExtendedHeaderIsExtendedOffset = 504 - oldGNUSparseExtendedHeaderNumEntries = 21 - oldGNUSparseOffsetSize = 12 - oldGNUSparseNumBytesSize = 12 -) - // NewReader creates a new Reader reading from r. -func NewReader(r io.Reader) *Reader { return &Reader{r: r} } +func NewReader(r io.Reader) *Reader { + return &Reader{r: r, curr: ®FileReader{r, 0}} +} // Next advances to the next entry in the tar archive. +// The Header.Size determines how many bytes can be read for the next file. +// Any remaining data in the current file is automatically discarded. // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { + if tr.err != nil { + return nil, tr.err + } + hdr, err := tr.next() + tr.err = err + return hdr, err +} + +func (tr *Reader) next() (*Header, error) { + var paxHdrs map[string]string + var gnuLongName, gnuLongLink string + if tr.RawAccounting { if tr.rawBytes == nil { tr.rawBytes = bytes.NewBuffer(nil) @@ -146,250 +88,247 @@ func (tr *Reader) Next() (*Header, error) { } } - if tr.err != nil { - return nil, tr.err - } - - var hdr *Header - var extHdrs map[string]string - // Externally, Next iterates through the tar archive as if it is a series of // files. Internally, the tar format often uses fake "files" to add meta // data that describes the next file. These meta data "files" should not // normally be visible to the outside. As such, this loop iterates through // one or more "header files" until it finds a "normal file". -loop: + format := FormatUSTAR | FormatPAX | FormatGNU for { - tr.err = tr.skipUnread() - if tr.err != nil { - return nil, tr.err + // Discard the remainder of the file and any padding. + if err := discard(tr, tr.curr.PhysicalRemaining()); err != nil { + return nil, err } + n, err := tryReadFull(tr.r, tr.blk[:tr.pad]) + if err != nil { + return nil, err + } + if tr.RawAccounting { + tr.rawBytes.Write(tr.blk[:n]) + } + tr.pad = 0 - hdr = tr.readHeader() - if tr.err != nil { - return nil, tr.err + hdr, rawHdr, err := tr.readHeader() + if err != nil { + return nil, err } + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err + } + format.mayOnlyBe(hdr.Format) + // Check for PAX/GNU special headers and files. switch hdr.Typeflag { - case TypeXHeader: - extHdrs, tr.err = parsePAX(tr) - if tr.err != nil { - return nil, tr.err + case TypeXHeader, TypeXGlobalHeader: + format.mayOnlyBe(FormatPAX) + paxHdrs, err = parsePAX(tr) + if err != nil { + return nil, err } - continue loop // This is a meta header affecting the next header + if hdr.Typeflag == TypeXGlobalHeader { + mergePAX(hdr, paxHdrs) + return &Header{ + Name: hdr.Name, + Typeflag: hdr.Typeflag, + Xattrs: hdr.Xattrs, + PAXRecords: hdr.PAXRecords, + Format: format, + }, nil + } + continue // This is a meta header affecting the next header case TypeGNULongName, TypeGNULongLink: - var realname []byte - realname, tr.err = ioutil.ReadAll(tr) - if tr.err != nil { - return nil, tr.err + format.mayOnlyBe(FormatGNU) + realname, err := ioutil.ReadAll(tr) + if err != nil { + return nil, err } if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(realname); tr.err != nil { - return nil, tr.err - } + tr.rawBytes.Write(realname) } - // Convert GNU extensions to use PAX headers. - if extHdrs == nil { - extHdrs = make(map[string]string) - } var p parser switch hdr.Typeflag { case TypeGNULongName: - extHdrs[paxPath] = p.parseString(realname) + gnuLongName = p.parseString(realname) case TypeGNULongLink: - extHdrs[paxLinkpath] = p.parseString(realname) + gnuLongLink = p.parseString(realname) } - if p.err != nil { - tr.err = p.err - return nil, tr.err - } - continue loop // This is a meta header affecting the next header + continue // This is a meta header affecting the next header default: - mergePAX(hdr, extHdrs) + // The old GNU sparse format is handled here since it is technically + // just a regular file with additional attributes. - // Check for a PAX format sparse file - sp, err := tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) - if err != nil { - tr.err = err + if err := mergePAX(hdr, paxHdrs); err != nil { return nil, err } - if sp != nil { - // Current file is a PAX format GNU sparse file. - // Set the current file reader to a sparse file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) - if tr.err != nil { - return nil, tr.err + if gnuLongName != "" { + hdr.Name = gnuLongName + } + if gnuLongLink != "" { + hdr.Linkname = gnuLongLink + } + if hdr.Typeflag == TypeRegA { + if strings.HasSuffix(hdr.Name, "/") { + hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories + } else { + hdr.Typeflag = TypeReg } } - break loop // This is a file, so stop + + // The extended headers may have updated the size. + // Thus, setup the regFileReader again after merging PAX headers. + if err := tr.handleRegularFile(hdr); err != nil { + return nil, err + } + + // Sparse formats rely on being able to read from the logical data + // section; there must be a preceding call to handleRegularFile. + if err := tr.handleSparseFile(hdr, rawHdr); err != nil { + return nil, err + } + + // Set the final guess at the format. + if format.has(FormatUSTAR) && format.has(FormatPAX) { + format.mayOnlyBe(FormatUSTAR) + } + hdr.Format = format + return hdr, nil // This is a file, so stop } } - return hdr, nil } -// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then -// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to -// be treated as a regular file. -func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { - var sparseFormat string - - // Check for sparse format indicators - major, majorOk := headers[paxGNUSparseMajor] - minor, minorOk := headers[paxGNUSparseMinor] - sparseName, sparseNameOk := headers[paxGNUSparseName] - _, sparseMapOk := headers[paxGNUSparseMap] - sparseSize, sparseSizeOk := headers[paxGNUSparseSize] - sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] - - // Identify which, if any, sparse format applies from which PAX headers are set - if majorOk && minorOk { - sparseFormat = major + "." + minor - } else if sparseNameOk && sparseMapOk { - sparseFormat = "0.1" - } else if sparseSizeOk { - sparseFormat = "0.0" - } else { - // Not a PAX format GNU sparse file. - return nil, nil +// handleRegularFile sets up the current file reader and padding such that it +// can only read the following logical data section. It will properly handle +// special headers that contain no data section. +func (tr *Reader) handleRegularFile(hdr *Header) error { + nb := hdr.Size + if isHeaderOnlyType(hdr.Typeflag) { + nb = 0 + } + if nb < 0 { + return ErrHeader } - // Check for unknown sparse format - if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { - return nil, nil - } + tr.pad = blockPadding(nb) + tr.curr = ®FileReader{r: tr.r, nb: nb} + return nil +} - // Update hdr from GNU sparse PAX headers - if sparseNameOk { - hdr.Name = sparseName - } - if sparseSizeOk { - realSize, err := strconv.ParseInt(sparseSize, 10, 0) - if err != nil { - return nil, ErrHeader - } - hdr.Size = realSize - } else if sparseRealSizeOk { - realSize, err := strconv.ParseInt(sparseRealSize, 10, 0) - if err != nil { - return nil, ErrHeader - } - hdr.Size = realSize - } - - // Set up the sparse map, according to the particular sparse format in use - var sp []sparseEntry +// handleSparseFile checks if the current file is a sparse format of any type +// and sets the curr reader appropriately. +func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error { + var spd sparseDatas var err error - switch sparseFormat { - case "0.0", "0.1": - sp, err = readGNUSparseMap0x1(headers) - case "1.0": - sp, err = readGNUSparseMap1x0(tr.curr) + if hdr.Typeflag == TypeGNUSparse { + spd, err = tr.readOldGNUSparseMap(hdr, rawHdr) + } else { + spd, err = tr.readGNUSparsePAXHeaders(hdr) } - return sp, err + + // If sp is non-nil, then this is a sparse file. + // Note that it is possible for len(sp) == 0. + if err == nil && spd != nil { + if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) { + return ErrHeader + } + sph := invertSparseEntries(spd, hdr.Size) + tr.curr = &sparseFileReader{tr.curr, sph, 0} + } + return err } -// mergePAX merges well known headers according to PAX standard. -// In general headers with the same name as those found -// in the header struct overwrite those found in the header -// struct with higher precision or longer values. Esp. useful -// for name and linkname fields. -func mergePAX(hdr *Header, headers map[string]string) error { - for k, v := range headers { +// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. +// If they are found, then this function reads the sparse map and returns it. +// This assumes that 0.0 headers have already been converted to 0.1 headers +// by the PAX header parsing logic. +func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) { + // Identify the version of GNU headers. + var is1x0 bool + major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor] + switch { + case major == "0" && (minor == "0" || minor == "1"): + is1x0 = false + case major == "1" && minor == "0": + is1x0 = true + case major != "" || minor != "": + return nil, nil // Unknown GNU sparse PAX version + case hdr.PAXRecords[paxGNUSparseMap] != "": + is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess + default: + return nil, nil // Not a PAX format GNU sparse file. + } + hdr.Format.mayOnlyBe(FormatPAX) + + // Update hdr from GNU sparse PAX headers. + if name := hdr.PAXRecords[paxGNUSparseName]; name != "" { + hdr.Name = name + } + size := hdr.PAXRecords[paxGNUSparseSize] + if size == "" { + size = hdr.PAXRecords[paxGNUSparseRealSize] + } + if size != "" { + n, err := strconv.ParseInt(size, 10, 64) + if err != nil { + return nil, ErrHeader + } + hdr.Size = n + } + + // Read the sparse map according to the appropriate format. + if is1x0 { + return readGNUSparseMap1x0(tr.curr) + } + return readGNUSparseMap0x1(hdr.PAXRecords) +} + +// mergePAX merges paxHdrs into hdr for all relevant fields of Header. +func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) { + for k, v := range paxHdrs { + if v == "" { + continue // Keep the original USTAR value + } + var id64 int64 switch k { case paxPath: hdr.Name = v case paxLinkpath: hdr.Linkname = v - case paxGname: - hdr.Gname = v case paxUname: hdr.Uname = v + case paxGname: + hdr.Gname = v case paxUid: - uid, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Uid = int(uid) + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Uid = int(id64) // Integer overflow possible case paxGid: - gid, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Gid = int(gid) + id64, err = strconv.ParseInt(v, 10, 64) + hdr.Gid = int(id64) // Integer overflow possible case paxAtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.AccessTime = t + hdr.AccessTime, err = parsePAXTime(v) case paxMtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.ModTime = t + hdr.ModTime, err = parsePAXTime(v) case paxCtime: - t, err := parsePAXTime(v) - if err != nil { - return err - } - hdr.ChangeTime = t + hdr.ChangeTime, err = parsePAXTime(v) case paxSize: - size, err := strconv.ParseInt(v, 10, 0) - if err != nil { - return err - } - hdr.Size = int64(size) + hdr.Size, err = strconv.ParseInt(v, 10, 64) default: - if strings.HasPrefix(k, paxXattr) { + if strings.HasPrefix(k, paxSchilyXattr) { if hdr.Xattrs == nil { hdr.Xattrs = make(map[string]string) } - hdr.Xattrs[k[len(paxXattr):]] = v + hdr.Xattrs[k[len(paxSchilyXattr):]] = v } } + if err != nil { + return ErrHeader + } } + hdr.PAXRecords = paxHdrs return nil } -// parsePAXTime takes a string of the form %d.%d as described in -// the PAX specification. -func parsePAXTime(t string) (time.Time, error) { - buf := []byte(t) - pos := bytes.IndexByte(buf, '.') - var seconds, nanoseconds int64 - var err error - if pos == -1 { - seconds, err = strconv.ParseInt(t, 10, 0) - if err != nil { - return time.Time{}, err - } - } else { - seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0) - if err != nil { - return time.Time{}, err - } - nano_buf := string(buf[pos+1:]) - // Pad as needed before converting to a decimal. - // For example .030 -> .030000000 -> 30000000 nanoseconds - if len(nano_buf) < maxNanoSecondIntSize { - // Right pad - nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf)) - } else if len(nano_buf) > maxNanoSecondIntSize { - // Right truncate - nano_buf = nano_buf[:maxNanoSecondIntSize] - } - nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0) - if err != nil { - return time.Time{}, err - } - } - ts := time.Unix(seconds, nanoseconds) - return ts, nil -} - // parsePAX parses PAX headers. // If an extended header (type 'x') is invalid, ErrHeader is returned func parsePAX(r io.Reader) (map[string]string, error) { @@ -406,12 +345,11 @@ func parsePAX(r io.Reader) (map[string]string, error) { sbuf := string(buf) // For GNU PAX sparse format 0.0 support. - // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers. - var sparseMap bytes.Buffer + // This function transforms the sparse format 0.0 headers into format 0.1 + // headers since 0.0 headers were not PAX compliant. + var sparseMap []string - headers := make(map[string]string) - // Each record is constructed as - // "%d %s=%s\n", length, keyword, value + paxHdrs := make(map[string]string) for len(sbuf) > 0 { key, value, residual, err := parsePAXRecord(sbuf) if err != nil { @@ -419,422 +357,234 @@ func parsePAX(r io.Reader) (map[string]string, error) { } sbuf = residual - keyStr := string(key) - if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes { - // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map. - sparseMap.WriteString(value) - sparseMap.Write([]byte{','}) - } else { - // Normal key. Set the value in the headers map. - headers[keyStr] = string(value) - } - } - if sparseMap.Len() != 0 { - // Add sparse info to headers, chopping off the extra comma - sparseMap.Truncate(sparseMap.Len() - 1) - headers[paxGNUSparseMap] = sparseMap.String() - } - return headers, nil -} - -// parsePAXRecord parses the input PAX record string into a key-value pair. -// If parsing is successful, it will slice off the currently read record and -// return the remainder as r. -// -// A PAX record is of the following form: -// "%d %s=%s\n" % (size, key, value) -func parsePAXRecord(s string) (k, v, r string, err error) { - // The size field ends at the first space. - sp := strings.IndexByte(s, ' ') - if sp == -1 { - return "", "", s, ErrHeader - } - - // Parse the first token as a decimal integer. - n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int - if perr != nil || n < 5 || int64(len(s)) < n { - return "", "", s, ErrHeader - } - - // Extract everything between the space and the final newline. - rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] - if nl != "\n" { - return "", "", s, ErrHeader - } - - // The first equals separates the key from the value. - eq := strings.IndexByte(rec, '=') - if eq == -1 { - return "", "", s, ErrHeader - } - return rec[:eq], rec[eq+1:], rem, nil -} - -// parseString parses bytes as a NUL-terminated C-style string. -// If a NUL byte is not found then the whole slice is returned as a string. -func (*parser) parseString(b []byte) string { - n := 0 - for n < len(b) && b[n] != 0 { - n++ - } - return string(b[0:n]) -} - -// parseNumeric parses the input as being encoded in either base-256 or octal. -// This function may return negative numbers. -// If parsing fails or an integer overflow occurs, err will be set. -func (p *parser) parseNumeric(b []byte) int64 { - // Check for base-256 (binary) format first. - // If the first bit is set, then all following bits constitute a two's - // complement encoded number in big-endian byte order. - if len(b) > 0 && b[0]&0x80 != 0 { - // Handling negative numbers relies on the following identity: - // -a-1 == ^a - // - // If the number is negative, we use an inversion mask to invert the - // data bytes and treat the value as an unsigned number. - var inv byte // 0x00 if positive or zero, 0xff if negative - if b[0]&0x40 != 0 { - inv = 0xff - } - - var x uint64 - for i, c := range b { - c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing - if i == 0 { - c &= 0x7f // Ignore signal bit in first byte + switch key { + case paxGNUSparseOffset, paxGNUSparseNumBytes: + // Validate sparse header order and value. + if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) || + (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) || + strings.Contains(value, ",") { + return nil, ErrHeader } - if (x >> 56) > 0 { - p.err = ErrHeader // Integer overflow - return 0 - } - x = x<<8 | uint64(c) - } - if (x >> 63) > 0 { - p.err = ErrHeader // Integer overflow - return 0 - } - if inv == 0xff { - return ^int64(x) - } - return int64(x) - } - - // Normal case is base-8 (octal) format. - return p.parseOctal(b) -} - -func (p *parser) parseOctal(b []byte) int64 { - // Because unused fields are filled with NULs, we need - // to skip leading NULs. Fields may also be padded with - // spaces or NULs. - // So we remove leading and trailing NULs and spaces to - // be sure. - b = bytes.Trim(b, " \x00") - - if len(b) == 0 { - return 0 - } - x, perr := strconv.ParseUint(p.parseString(b), 8, 64) - if perr != nil { - p.err = ErrHeader - } - return int64(x) -} - -// skipUnread skips any unread bytes in the existing file entry, as well as any -// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is -// encountered in the data portion; it is okay to hit io.EOF in the padding. -// -// Note that this function still works properly even when sparse files are being -// used since numBytes returns the bytes remaining in the underlying io.Reader. -func (tr *Reader) skipUnread() error { - dataSkip := tr.numBytes() // Number of data bytes to skip - totalSkip := dataSkip + tr.pad // Total number of bytes to skip - tr.curr, tr.pad = nil, 0 - if tr.RawAccounting { - _, tr.err = io.CopyN(tr.rawBytes, tr.r, totalSkip) - return tr.err - } - // If possible, Seek to the last byte before the end of the data section. - // Do this because Seek is often lazy about reporting errors; this will mask - // the fact that the tar stream may be truncated. We can rely on the - // io.CopyN done shortly afterwards to trigger any IO errors. - var seekSkipped int64 // Number of bytes skipped via Seek - if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { - // Not all io.Seeker can actually Seek. For example, os.Stdin implements - // io.Seeker, but calling Seek always returns an error and performs - // no action. Thus, we try an innocent seek to the current position - // to see if Seek is really supported. - pos1, err := sr.Seek(0, os.SEEK_CUR) - if err == nil { - // Seek seems supported, so perform the real Seek. - pos2, err := sr.Seek(dataSkip-1, os.SEEK_CUR) - if err != nil { - tr.err = err - return tr.err - } - seekSkipped = pos2 - pos1 + sparseMap = append(sparseMap, value) + default: + paxHdrs[key] = value } } - - var copySkipped int64 // Number of bytes skipped via CopyN - copySkipped, tr.err = io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) - if tr.err == io.EOF && seekSkipped+copySkipped < dataSkip { - tr.err = io.ErrUnexpectedEOF + if len(sparseMap) > 0 { + paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") } - return tr.err -} - -func (tr *Reader) verifyChecksum(header []byte) bool { - if tr.err != nil { - return false - } - - var p parser - given := p.parseOctal(header[148:156]) - unsigned, signed := checksum(header) - return p.err == nil && (given == unsigned || given == signed) + return paxHdrs, nil } // readHeader reads the next block header and assumes that the underlying reader -// is already aligned to a block boundary. +// is already aligned to a block boundary. It returns the raw block of the +// header in case further processing is required. // // The err will be set to io.EOF only when one of the following occurs: // * Exactly 0 bytes are read and EOF is hit. // * Exactly 1 block of zeros is read and EOF is hit. // * At least 2 blocks of zeros are read. -func (tr *Reader) readHeader() *Header { - header := tr.hdrBuff[:] - copy(header, zeroBlock) - - if n, err := io.ReadFull(tr.r, header); err != nil { - tr.err = err - // because it could read some of the block, but reach EOF first - if tr.err == io.EOF && tr.RawAccounting { - if _, err := tr.rawBytes.Write(header[:n]); err != nil { - tr.err = err - } - } - return nil // io.EOF is okay here - } - if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { - return nil - } - } - +func (tr *Reader) readHeader() (*Header, *block, error) { // Two blocks of zero bytes marks the end of the archive. - if bytes.Equal(header, zeroBlock[0:blockSize]) { - if n, err := io.ReadFull(tr.r, header); err != nil { - tr.err = err - // because it could read some of the block, but reach EOF first - if tr.err == io.EOF && tr.RawAccounting { - if _, err := tr.rawBytes.Write(header[:n]); err != nil { - tr.err = err - } - } - return nil // io.EOF is okay here - } - if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(header); tr.err != nil { - return nil - } - } - if bytes.Equal(header, zeroBlock[0:blockSize]) { - tr.err = io.EOF - } else { - tr.err = ErrHeader // zero block and then non-zero block - } - return nil + n, err := io.ReadFull(tr.r, tr.blk[:]) + if tr.RawAccounting && (err == nil || err == io.EOF) { + tr.rawBytes.Write(tr.blk[:n]) + } + if err != nil { + return nil, nil, err // EOF is okay here; exactly 0 bytes read } - if !tr.verifyChecksum(header) { - tr.err = ErrHeader - return nil + if bytes.Equal(tr.blk[:], zeroBlock[:]) { + n, err = io.ReadFull(tr.r, tr.blk[:]) + if tr.RawAccounting && (err == nil || err == io.EOF) { + tr.rawBytes.Write(tr.blk[:n]) + } + if err != nil { + return nil, nil, err // EOF is okay here; exactly 1 block of zeros read + } + if bytes.Equal(tr.blk[:], zeroBlock[:]) { + return nil, nil, io.EOF // normal EOF; exactly 2 block of zeros read + } + return nil, nil, ErrHeader // Zero block and then non-zero block + } + + // Verify the header matches a known format. + format := tr.blk.GetFormat() + if format == FormatUnknown { + return nil, nil, ErrHeader } - // Unpack var p parser hdr := new(Header) - s := slicer(header) - hdr.Name = p.parseString(s.next(100)) - hdr.Mode = p.parseNumeric(s.next(8)) - hdr.Uid = int(p.parseNumeric(s.next(8))) - hdr.Gid = int(p.parseNumeric(s.next(8))) - hdr.Size = p.parseNumeric(s.next(12)) - hdr.ModTime = time.Unix(p.parseNumeric(s.next(12)), 0) - s.next(8) // chksum - hdr.Typeflag = s.next(1)[0] - hdr.Linkname = p.parseString(s.next(100)) + // Unpack the V7 header. + v7 := tr.blk.V7() + hdr.Typeflag = v7.TypeFlag()[0] + hdr.Name = p.parseString(v7.Name()) + hdr.Linkname = p.parseString(v7.LinkName()) + hdr.Size = p.parseNumeric(v7.Size()) + hdr.Mode = p.parseNumeric(v7.Mode()) + hdr.Uid = int(p.parseNumeric(v7.UID())) + hdr.Gid = int(p.parseNumeric(v7.GID())) + hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) - // The remainder of the header depends on the value of magic. - // The original (v7) version of tar had no explicit magic field, - // so its magic bytes, like the rest of the block, are NULs. - magic := string(s.next(8)) // contains version field as well. - var format string - switch { - case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988) - if string(header[508:512]) == "tar\x00" { - format = "star" - } else { - format = "posix" - } - case magic == "ustar \x00": // old GNU tar - format = "gnu" - } + // Unpack format specific fields. + if format > formatV7 { + ustar := tr.blk.USTAR() + hdr.Uname = p.parseString(ustar.UserName()) + hdr.Gname = p.parseString(ustar.GroupName()) + hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) + hdr.Devminor = p.parseNumeric(ustar.DevMinor()) - switch format { - case "posix", "gnu", "star": - hdr.Uname = p.parseString(s.next(32)) - hdr.Gname = p.parseString(s.next(32)) - devmajor := s.next(8) - devminor := s.next(8) - if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { - hdr.Devmajor = p.parseNumeric(devmajor) - hdr.Devminor = p.parseNumeric(devminor) - } var prefix string - switch format { - case "posix", "gnu": - prefix = p.parseString(s.next(155)) - case "star": - prefix = p.parseString(s.next(131)) - hdr.AccessTime = time.Unix(p.parseNumeric(s.next(12)), 0) - hdr.ChangeTime = time.Unix(p.parseNumeric(s.next(12)), 0) + switch { + case format.has(FormatUSTAR | FormatPAX): + hdr.Format = format + ustar := tr.blk.USTAR() + prefix = p.parseString(ustar.Prefix()) + + // For Format detection, check if block is properly formatted since + // the parser is more liberal than what USTAR actually permits. + notASCII := func(r rune) bool { return r >= 0x80 } + if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 { + hdr.Format = FormatUnknown // Non-ASCII characters in block. + } + nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 } + if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) && + nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) { + hdr.Format = FormatUnknown // Numeric fields must end in NUL + } + case format.has(formatSTAR): + star := tr.blk.STAR() + prefix = p.parseString(star.Prefix()) + hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) + hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) + case format.has(FormatGNU): + hdr.Format = format + var p2 parser + gnu := tr.blk.GNU() + if b := gnu.AccessTime(); b[0] != 0 { + hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0) + } + if b := gnu.ChangeTime(); b[0] != 0 { + hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0) + } + + // Prior to Go1.8, the Writer had a bug where it would output + // an invalid tar file in certain rare situations because the logic + // incorrectly believed that the old GNU format had a prefix field. + // This is wrong and leads to an output file that mangles the + // atime and ctime fields, which are often left unused. + // + // In order to continue reading tar files created by former, buggy + // versions of Go, we skeptically parse the atime and ctime fields. + // If we are unable to parse them and the prefix field looks like + // an ASCII string, then we fallback on the pre-Go1.8 behavior + // of treating these fields as the USTAR prefix field. + // + // Note that this will not use the fallback logic for all possible + // files generated by a pre-Go1.8 toolchain. If the generated file + // happened to have a prefix field that parses as valid + // atime and ctime fields (e.g., when they are valid octal strings), + // then it is impossible to distinguish between an valid GNU file + // and an invalid pre-Go1.8 file. + // + // See https://golang.org/issues/12594 + // See https://golang.org/issues/21005 + if p2.err != nil { + hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{} + ustar := tr.blk.USTAR() + if s := p.parseString(ustar.Prefix()); isASCII(s) { + prefix = s + } + hdr.Format = FormatUnknown // Buggy file is not GNU + } } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name } } - - if p.err != nil { - tr.err = p.err - return nil - } - - nb := hdr.Size - if isHeaderOnlyType(hdr.Typeflag) { - nb = 0 - } - if nb < 0 { - tr.err = ErrHeader - return nil - } - - // Set the current file reader. - tr.pad = -nb & (blockSize - 1) // blockSize is a power of two - tr.curr = ®FileReader{r: tr.r, nb: nb} - - // Check for old GNU sparse format entry. - if hdr.Typeflag == TypeGNUSparse { - // Get the real size of the file. - hdr.Size = p.parseNumeric(header[483:495]) - if p.err != nil { - tr.err = p.err - return nil - } - - // Read the sparse map. - sp := tr.readOldGNUSparseMap(header) - if tr.err != nil { - return nil - } - - // Current file is a GNU sparse file. Update the current file reader. - tr.curr, tr.err = newSparseFileReader(tr.curr, sp, hdr.Size) - if tr.err != nil { - return nil - } - } - - return hdr + return hdr, &tr.blk, p.err } -// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format. -// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries, -// then one or more extension headers are used to store the rest of the sparse map. -func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry { +// readOldGNUSparseMap reads the sparse map from the old GNU sparse format. +// The sparse map is stored in the tar header if it's small enough. +// If it's larger than four entries, then one or more extension headers are used +// to store the rest of the sparse map. +// +// The Header.Size does not reflect the size of any extended headers used. +// Thus, this function will read from the raw io.Reader to fetch extra headers. +// This method mutates blk in the process. +func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) { + // Make sure that the input format is GNU. + // Unfortunately, the STAR format also has a sparse header format that uses + // the same type flag but has a completely different layout. + if blk.GetFormat() != FormatGNU { + return nil, ErrHeader + } + hdr.Format.mayOnlyBe(FormatGNU) + var p parser - isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0 - spCap := oldGNUSparseMainHeaderNumEntries - if isExtended { - spCap += oldGNUSparseExtendedHeaderNumEntries + hdr.Size = p.parseNumeric(blk.GNU().RealSize()) + if p.err != nil { + return nil, p.err } - sp := make([]sparseEntry, 0, spCap) - s := slicer(header[oldGNUSparseMainHeaderOffset:]) - - // Read the four entries from the main tar header - for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ { - offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) - numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) - if p.err != nil { - tr.err = p.err - return nil - } - if offset == 0 && numBytes == 0 { - break - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) - } - - for isExtended { - // There are more entries. Read an extension header and parse its entries. - sparseHeader := make([]byte, blockSize) - if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil { - return nil - } - if tr.RawAccounting { - if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil { - return nil + s := blk.GNU().Sparse() + spd := make(sparseDatas, 0, s.MaxEntries()) + for { + for i := 0; i < s.MaxEntries(); i++ { + // This termination condition is identical to GNU and BSD tar. + if s.Entry(i).Offset()[0] == 0x00 { + break // Don't return, need to process extended headers (even if empty) } - } - - isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0 - s = slicer(sparseHeader) - for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ { - offset := p.parseNumeric(s.next(oldGNUSparseOffsetSize)) - numBytes := p.parseNumeric(s.next(oldGNUSparseNumBytesSize)) + offset := p.parseNumeric(s.Entry(i).Offset()) + length := p.parseNumeric(s.Entry(i).Length()) if p.err != nil { - tr.err = p.err - return nil + return nil, p.err } - if offset == 0 && numBytes == 0 { - break - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, sparseEntry{Offset: offset, Length: length}) } + + if s.IsExtended()[0] > 0 { + // There are more entries. Read an extension header and parse its entries. + if _, err := mustReadFull(tr.r, blk[:]); err != nil { + return nil, err + } + if tr.RawAccounting { + tr.rawBytes.Write(blk[:]) + } + s = blk.Sparse() + continue + } + return spd, nil // Done } - return sp } // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format // version 1.0. The format of the sparse map consists of a series of // newline-terminated numeric fields. The first field is the number of entries // and is always present. Following this are the entries, consisting of two -// fields (offset, numBytes). This function must stop reading at the end +// fields (offset, length). This function must stop reading at the end // boundary of the block containing the last newline. // // Note that the GNU manual says that numeric values should be encoded in octal // format. However, the GNU tar utility itself outputs these values in decimal. // As such, this library treats values as being encoded in decimal. -func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { - var cntNewline int64 - var buf bytes.Buffer - var blk = make([]byte, blockSize) +func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) { + var ( + cntNewline int64 + buf bytes.Buffer + blk block + ) - // feedTokens copies data in numBlock chunks from r into buf until there are + // feedTokens copies data in blocks from r into buf until there are // at least cnt newlines in buf. It will not read more blocks than needed. - var feedTokens = func(cnt int64) error { - for cntNewline < cnt { - if _, err := io.ReadFull(r, blk); err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } + feedTokens := func(n int64) error { + for cntNewline < n { + if _, err := mustReadFull(r, blk[:]); err != nil { return err } - buf.Write(blk) + buf.Write(blk[:]) for _, c := range blk { if c == '\n' { cntNewline++ @@ -846,10 +596,10 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { // nextToken gets the next token delimited by a newline. This assumes that // at least one newline exists in the buffer. - var nextToken = func() string { + nextToken := func() string { cntNewline-- tok, _ := buf.ReadString('\n') - return tok[:len(tok)-1] // Cut off newline + return strings.TrimRight(tok, "\n") } // Parse for the number of entries. @@ -868,197 +618,306 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { if err := feedTokens(2 * numEntries); err != nil { return nil, err } - sp := make([]sparseEntry, 0, numEntries) + spd := make(sparseDatas, 0, numEntries) for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(nextToken(), 10, 64) - if err != nil { + offset, err1 := strconv.ParseInt(nextToken(), 10, 64) + length, err2 := strconv.ParseInt(nextToken(), 10, 64) + if err1 != nil || err2 != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(nextToken(), 10, 64) - if err != nil { - return nil, ErrHeader - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, sparseEntry{Offset: offset, Length: length}) } - return sp, nil + return spd, nil } // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format // version 0.1. The sparse map is stored in the PAX headers. -func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { +func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) { // Get number of entries. // Use integer overflow resistant math to check this. - numEntriesStr := extHdrs[paxGNUSparseNumBlocks] + numEntriesStr := paxHdrs[paxGNUSparseNumBlocks] numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { return nil, ErrHeader } // There should be two numbers in sparseMap for each entry. - sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") + sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",") + if len(sparseMap) == 1 && sparseMap[0] == "" { + sparseMap = sparseMap[:0] + } if int64(len(sparseMap)) != 2*numEntries { return nil, ErrHeader } // Loop through the entries in the sparse map. // numEntries is trusted now. - sp := make([]sparseEntry, 0, numEntries) - for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) - if err != nil { + spd := make(sparseDatas, 0, numEntries) + for len(sparseMap) >= 2 { + offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64) + length, err2 := strconv.ParseInt(sparseMap[1], 10, 64) + if err1 != nil || err2 != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) - if err != nil { - return nil, ErrHeader - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, sparseEntry{Offset: offset, Length: length}) + sparseMap = sparseMap[2:] } - return sp, nil + return spd, nil } -// numBytes returns the number of bytes left to read in the current file's entry -// in the tar archive, or 0 if there is no current file. -func (tr *Reader) numBytes() int64 { - if tr.curr == nil { - // No current file, so no bytes - return 0 - } - return tr.curr.numBytes() -} - -// Read reads from the current entry in the tar archive. -// It returns 0, io.EOF when it reaches the end of that entry, -// until Next is called to advance to the next entry. +// Read reads from the current file in the tar archive. +// It returns (0, io.EOF) when it reaches the end of that file, +// until Next is called to advance to the next file. // -// Calling Read on special types like TypeLink, TypeSymLink, TypeChar, -// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what +// If the current file is sparse, then the regions marked as a hole +// are read back as NUL-bytes. +// +// Calling Read on special types like TypeLink, TypeSymlink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what // the Header.Size claims. -func (tr *Reader) Read(b []byte) (n int, err error) { +func (tr *Reader) Read(b []byte) (int, error) { if tr.err != nil { return 0, tr.err } - if tr.curr == nil { - return 0, io.EOF - } - - n, err = tr.curr.Read(b) + n, err := tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err } - return -} - -func (rfr *regFileReader) Read(b []byte) (n int, err error) { - if rfr.nb == 0 { - // file consumed - return 0, io.EOF - } - if int64(len(b)) > rfr.nb { - b = b[0:rfr.nb] - } - n, err = rfr.r.Read(b) - rfr.nb -= int64(n) - - if err == io.EOF && rfr.nb > 0 { - err = io.ErrUnexpectedEOF - } - return -} - -// numBytes returns the number of bytes left to read in the file's data in the tar archive. -func (rfr *regFileReader) numBytes() int64 { - return rfr.nb -} - -// newSparseFileReader creates a new sparseFileReader, but validates all of the -// sparse entries before doing so. -func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { - if total < 0 { - return nil, ErrHeader // Total size cannot be negative - } - - // Validate all sparse entries. These are the same checks as performed by - // the BSD tar utility. - for i, s := range sp { - switch { - case s.offset < 0 || s.numBytes < 0: - return nil, ErrHeader // Negative values are never okay - case s.offset > math.MaxInt64-s.numBytes: - return nil, ErrHeader // Integer overflow with large length - case s.offset+s.numBytes > total: - return nil, ErrHeader // Region extends beyond the "real" size - case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: - return nil, ErrHeader // Regions can't overlap and must be in order - } - } - return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil -} - -// readHole reads a sparse hole ending at endOffset. -func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { - n64 := endOffset - sfr.pos - if n64 > int64(len(b)) { - n64 = int64(len(b)) - } - n := int(n64) - for i := 0; i < n; i++ { - b[i] = 0 - } - sfr.pos += n64 - return n -} - -// Read reads the sparse file data in expanded form. -func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { - // Skip past all empty fragments. - for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { - sfr.sp = sfr.sp[1:] - } - - // If there are no more fragments, then it is possible that there - // is one last sparse hole. - if len(sfr.sp) == 0 { - // This behavior matches the BSD tar utility. - // However, GNU tar stops returning data even if sfr.total is unmet. - if sfr.pos < sfr.total { - return sfr.readHole(b, sfr.total), nil - } - return 0, io.EOF - } - - // In front of a data fragment, so read a hole. - if sfr.pos < sfr.sp[0].offset { - return sfr.readHole(b, sfr.sp[0].offset), nil - } - - // In a data fragment, so read from it. - // This math is overflow free since we verify that offset and numBytes can - // be safely added when creating the sparseFileReader. - endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment - bytesLeft := endPos - sfr.pos // Bytes left in fragment - if int64(len(b)) > bytesLeft { - b = b[:bytesLeft] - } - - n, err = sfr.rfr.Read(b) - sfr.pos += int64(n) - if err == io.EOF { - if sfr.pos < endPos { - err = io.ErrUnexpectedEOF // There was supposed to be more data - } else if sfr.pos < sfr.total { - err = nil // There is still an implicit sparse hole at the end - } - } - - if sfr.pos == endPos { - sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it - } return n, err } -// numBytes returns the number of bytes left to read in the sparse file's -// sparse-encoded data in the tar archive. -func (sfr *sparseFileReader) numBytes() int64 { - return sfr.rfr.numBytes() +// writeTo writes the content of the current file to w. +// The bytes written matches the number of remaining bytes in the current file. +// +// If the current file is sparse and w is an io.WriteSeeker, +// then writeTo uses Seek to skip past holes defined in Header.SparseHoles, +// assuming that skipped regions are filled with NULs. +// This always writes the last byte to ensure w is the right size. +// +// TODO(dsnet): Re-export this when adding sparse file support. +// See https://golang.org/issue/22735 +func (tr *Reader) writeTo(w io.Writer) (int64, error) { + if tr.err != nil { + return 0, tr.err + } + n, err := tr.curr.WriteTo(w) + if err != nil { + tr.err = err + } + return n, err +} + +// regFileReader is a fileReader for reading data from a regular file entry. +type regFileReader struct { + r io.Reader // Underlying Reader + nb int64 // Number of remaining bytes to read +} + +func (fr *regFileReader) Read(b []byte) (n int, err error) { + if int64(len(b)) > fr.nb { + b = b[:fr.nb] + } + if len(b) > 0 { + n, err = fr.r.Read(b) + fr.nb -= int64(n) + } + switch { + case err == io.EOF && fr.nb > 0: + return n, io.ErrUnexpectedEOF + case err == nil && fr.nb == 0: + return n, io.EOF + default: + return n, err + } +} + +func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) { + return io.Copy(w, struct{ io.Reader }{fr}) +} + +func (fr regFileReader) LogicalRemaining() int64 { + return fr.nb +} + +func (fr regFileReader) PhysicalRemaining() int64 { + return fr.nb +} + +// sparseFileReader is a fileReader for reading data from a sparse file entry. +type sparseFileReader struct { + fr fileReader // Underlying fileReader + sp sparseHoles // Normalized list of sparse holes + pos int64 // Current position in sparse file +} + +func (sr *sparseFileReader) Read(b []byte) (n int, err error) { + finished := int64(len(b)) >= sr.LogicalRemaining() + if finished { + b = b[:sr.LogicalRemaining()] + } + + b0 := b + endPos := sr.pos + int64(len(b)) + for endPos > sr.pos && err == nil { + var nf int // Bytes read in fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + bf := b[:min(int64(len(b)), holeStart-sr.pos)] + nf, err = tryReadFull(sr.fr, bf) + } else { // In a hole fragment + bf := b[:min(int64(len(b)), holeEnd-sr.pos)] + nf, err = tryReadFull(zeroReader{}, bf) + } + b = b[nf:] + sr.pos += int64(nf) + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + n = len(b0) - len(b) + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + case finished: + return n, io.EOF + default: + return n, nil + } +} + +func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) { + ws, ok := w.(io.WriteSeeker) + if ok { + if _, err := ws.Seek(0, io.SeekCurrent); err != nil { + ok = false // Not all io.Seeker can really seek + } + } + if !ok { + return io.Copy(w, struct{ io.Reader }{sr}) + } + + var writeLastByte bool + pos0 := sr.pos + for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil { + var nf int64 // Size of fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + nf = holeStart - sr.pos + nf, err = io.CopyN(ws, sr.fr, nf) + } else { // In a hole fragment + nf = holeEnd - sr.pos + if sr.PhysicalRemaining() == 0 { + writeLastByte = true + nf-- + } + _, err = ws.Seek(nf, io.SeekCurrent) + } + sr.pos += nf + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + // If the last fragment is a hole, then seek to 1-byte before EOF, and + // write a single byte to ensure the file is the right size. + if writeLastByte && err == nil { + _, err = ws.Write([]byte{0}) + sr.pos++ + } + + n = sr.pos - pos0 + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + default: + return n, nil + } +} + +func (sr sparseFileReader) LogicalRemaining() int64 { + return sr.sp[len(sr.sp)-1].endOffset() - sr.pos +} +func (sr sparseFileReader) PhysicalRemaining() int64 { + return sr.fr.PhysicalRemaining() +} + +type zeroReader struct{} + +func (zeroReader) Read(b []byte) (int, error) { + for i := range b { + b[i] = 0 + } + return len(b), nil +} + +// mustReadFull is like io.ReadFull except it returns +// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read. +func mustReadFull(r io.Reader, b []byte) (int, error) { + n, err := tryReadFull(r, b) + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return n, err +} + +// tryReadFull is like io.ReadFull except it returns +// io.EOF when it is hit before len(b) bytes are read. +func tryReadFull(r io.Reader, b []byte) (n int, err error) { + for len(b) > n && err == nil { + var nn int + nn, err = r.Read(b[n:]) + n += nn + } + if len(b) == n && err == io.EOF { + err = nil + } + return n, err +} + +// discard skips n bytes in r, reporting an error if unable to do so. +func discard(tr *Reader, n int64) error { + var seekSkipped, copySkipped int64 + var err error + r := tr.r + if tr.RawAccounting { + + copySkipped, err = io.CopyN(tr.rawBytes, tr.r, n) + goto out + } + + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + if sr, ok := r.(io.Seeker); ok && n > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, io.SeekCurrent) + if pos1 >= 0 && err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(n-1, io.SeekCurrent) + if pos2 < 0 || err != nil { + return err + } + seekSkipped = pos2 - pos1 + } + } + + copySkipped, err = io.CopyN(ioutil.Discard, r, n-seekSkipped) +out: + if err == io.EOF && seekSkipped+copySkipped < n { + err = io.ErrUnexpectedEOF + } + return err } diff --git a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_atim.go b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_actime1.go similarity index 100% rename from components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_atim.go rename to components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_actime1.go diff --git a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_atimespec.go b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_actime2.go similarity index 100% rename from components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_atimespec.go rename to components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_actime2.go diff --git a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_unix.go b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_unix.go index cb843db4cf..868105f338 100644 --- a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_unix.go +++ b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/stat_unix.go @@ -8,6 +8,10 @@ package tar import ( "os" + "os/user" + "runtime" + "strconv" + "sync" "syscall" ) @@ -15,6 +19,10 @@ func init() { sysStat = statUnix } +// userMap and groupMap caches UID and GID lookups for performance reasons. +// The downside is that renaming uname or gname by the OS never takes effect. +var userMap, groupMap sync.Map // map[int]string + func statUnix(fi os.FileInfo, h *Header) error { sys, ok := fi.Sys().(*syscall.Stat_t) if !ok { @@ -22,11 +30,67 @@ func statUnix(fi os.FileInfo, h *Header) error { } h.Uid = int(sys.Uid) h.Gid = int(sys.Gid) - // TODO(bradfitz): populate username & group. os/user - // doesn't cache LookupId lookups, and lacks group - // lookup functions. + + // Best effort at populating Uname and Gname. + // The os/user functions may fail for any number of reasons + // (not implemented on that platform, cgo not enabled, etc). + if u, ok := userMap.Load(h.Uid); ok { + h.Uname = u.(string) + } else if u, err := user.LookupId(strconv.Itoa(h.Uid)); err == nil { + h.Uname = u.Username + userMap.Store(h.Uid, h.Uname) + } + if g, ok := groupMap.Load(h.Gid); ok { + h.Gname = g.(string) + } else if g, err := user.LookupGroupId(strconv.Itoa(h.Gid)); err == nil { + h.Gname = g.Name + groupMap.Store(h.Gid, h.Gname) + } + h.AccessTime = statAtime(sys) h.ChangeTime = statCtime(sys) - // TODO(bradfitz): major/minor device numbers? + + // Best effort at populating Devmajor and Devminor. + if h.Typeflag == TypeChar || h.Typeflag == TypeBlock { + dev := uint64(sys.Rdev) // May be int32 or uint32 + switch runtime.GOOS { + case "linux": + // Copied from golang.org/x/sys/unix/dev_linux.go. + major := uint32((dev & 0x00000000000fff00) >> 8) + major |= uint32((dev & 0xfffff00000000000) >> 32) + minor := uint32((dev & 0x00000000000000ff) >> 0) + minor |= uint32((dev & 0x00000ffffff00000) >> 12) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "darwin": + // Copied from golang.org/x/sys/unix/dev_darwin.go. + major := uint32((dev >> 24) & 0xff) + minor := uint32(dev & 0xffffff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "dragonfly": + // Copied from golang.org/x/sys/unix/dev_dragonfly.go. + major := uint32((dev >> 8) & 0xff) + minor := uint32(dev & 0xffff00ff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "freebsd": + // Copied from golang.org/x/sys/unix/dev_freebsd.go. + major := uint32((dev >> 8) & 0xff) + minor := uint32(dev & 0xffff00ff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "netbsd": + // Copied from golang.org/x/sys/unix/dev_netbsd.go. + major := uint32((dev & 0x000fff00) >> 8) + minor := uint32((dev & 0x000000ff) >> 0) + minor |= uint32((dev & 0xfff00000) >> 12) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "openbsd": + // Copied from golang.org/x/sys/unix/dev_openbsd.go. + major := uint32((dev & 0x0000ff00) >> 8) + minor := uint32((dev & 0x000000ff) >> 0) + minor |= uint32((dev & 0xffff0000) >> 8) + h.Devmajor, h.Devminor = int64(major), int64(minor) + default: + // TODO: Implement solaris (see https://golang.org/issue/8106) + } + } return nil } diff --git a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/strconv.go b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/strconv.go new file mode 100644 index 0000000000..d144485a49 --- /dev/null +++ b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/strconv.go @@ -0,0 +1,326 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package tar + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "time" +) + +// hasNUL reports whether the NUL character exists within s. +func hasNUL(s string) bool { + return strings.IndexByte(s, 0) >= 0 +} + +// isASCII reports whether the input is an ASCII C-style string. +func isASCII(s string) bool { + for _, c := range s { + if c >= 0x80 || c == 0x00 { + return false + } + } + return true +} + +// toASCII converts the input to an ASCII C-style string. +// This a best effort conversion, so invalid characters are dropped. +func toASCII(s string) string { + if isASCII(s) { + return s + } + b := make([]byte, 0, len(s)) + for _, c := range s { + if c < 0x80 && c != 0x00 { + b = append(b, byte(c)) + } + } + return string(b) +} + +type parser struct { + err error // Last error seen +} + +type formatter struct { + err error // Last error seen +} + +// parseString parses bytes as a NUL-terminated C-style string. +// If a NUL byte is not found then the whole slice is returned as a string. +func (*parser) parseString(b []byte) string { + if i := bytes.IndexByte(b, 0); i >= 0 { + return string(b[:i]) + } + return string(b) +} + +// formatString copies s into b, NUL-terminating if possible. +func (f *formatter) formatString(b []byte, s string) { + if len(s) > len(b) { + f.err = ErrFieldTooLong + } + copy(b, s) + if len(s) < len(b) { + b[len(s)] = 0 + } + + // Some buggy readers treat regular files with a trailing slash + // in the V7 path field as a directory even though the full path + // recorded elsewhere (e.g., via PAX record) contains no trailing slash. + if len(s) > len(b) && b[len(b)-1] == '/' { + n := len(strings.TrimRight(s[:len(b)], "/")) + b[n] = 0 // Replace trailing slash with NUL terminator + } +} + +// fitsInBase256 reports whether x can be encoded into n bytes using base-256 +// encoding. Unlike octal encoding, base-256 encoding does not require that the +// string ends with a NUL character. Thus, all n bytes are available for output. +// +// If operating in binary mode, this assumes strict GNU binary mode; which means +// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is +// equivalent to the sign bit in two's complement form. +func fitsInBase256(n int, x int64) bool { + binBits := uint(n-1) * 8 + return n >= 9 || (x >= -1< 0 && b[0]&0x80 != 0 { + // Handling negative numbers relies on the following identity: + // -a-1 == ^a + // + // If the number is negative, we use an inversion mask to invert the + // data bytes and treat the value as an unsigned number. + var inv byte // 0x00 if positive or zero, 0xff if negative + if b[0]&0x40 != 0 { + inv = 0xff + } + + var x uint64 + for i, c := range b { + c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing + if i == 0 { + c &= 0x7f // Ignore signal bit in first byte + } + if (x >> 56) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + x = x<<8 | uint64(c) + } + if (x >> 63) > 0 { + p.err = ErrHeader // Integer overflow + return 0 + } + if inv == 0xff { + return ^int64(x) + } + return int64(x) + } + + // Normal case is base-8 (octal) format. + return p.parseOctal(b) +} + +// formatNumeric encodes x into b using base-8 (octal) encoding if possible. +// Otherwise it will attempt to use base-256 (binary) encoding. +func (f *formatter) formatNumeric(b []byte, x int64) { + if fitsInOctal(len(b), x) { + f.formatOctal(b, x) + return + } + + if fitsInBase256(len(b), x) { + for i := len(b) - 1; i >= 0; i-- { + b[i] = byte(x) + x >>= 8 + } + b[0] |= 0x80 // Highest bit indicates binary format + return + } + + f.formatOctal(b, 0) // Last resort, just write zero + f.err = ErrFieldTooLong +} + +func (p *parser) parseOctal(b []byte) int64 { + // Because unused fields are filled with NULs, we need + // to skip leading NULs. Fields may also be padded with + // spaces or NULs. + // So we remove leading and trailing NULs and spaces to + // be sure. + b = bytes.Trim(b, " \x00") + + if len(b) == 0 { + return 0 + } + x, perr := strconv.ParseUint(p.parseString(b), 8, 64) + if perr != nil { + p.err = ErrHeader + } + return int64(x) +} + +func (f *formatter) formatOctal(b []byte, x int64) { + if !fitsInOctal(len(b), x) { + x = 0 // Last resort, just write zero + f.err = ErrFieldTooLong + } + + s := strconv.FormatInt(x, 8) + // Add leading zeros, but leave room for a NUL. + if n := len(b) - len(s) - 1; n > 0 { + s = strings.Repeat("0", n) + s + } + f.formatString(b, s) +} + +// fitsInOctal reports whether the integer x fits in a field n-bytes long +// using octal encoding with the appropriate NUL terminator. +func fitsInOctal(n int, x int64) bool { + octBits := uint(n-1) * 3 + return x >= 0 && (n >= 22 || x < 1<= 0 { + ss, sn = s[:pos], s[pos+1:] + } + + // Parse the seconds. + secs, err := strconv.ParseInt(ss, 10, 64) + if err != nil { + return time.Time{}, ErrHeader + } + if len(sn) == 0 { + return time.Unix(secs, 0), nil // No sub-second values + } + + // Parse the nanoseconds. + if strings.Trim(sn, "0123456789") != "" { + return time.Time{}, ErrHeader + } + if len(sn) < maxNanoSecondDigits { + sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad + } else { + sn = sn[:maxNanoSecondDigits] // Right truncate + } + nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed + if len(ss) > 0 && ss[0] == '-' { + return time.Unix(secs, -1*nsecs), nil // Negative correction + } + return time.Unix(secs, nsecs), nil +} + +// formatPAXTime converts ts into a time of the form %d.%d as described in the +// PAX specification. This function is capable of negative timestamps. +func formatPAXTime(ts time.Time) (s string) { + secs, nsecs := ts.Unix(), ts.Nanosecond() + if nsecs == 0 { + return strconv.FormatInt(secs, 10) + } + + // If seconds is negative, then perform correction. + sign := "" + if secs < 0 { + sign = "-" // Remember sign + secs = -(secs + 1) // Add a second to secs + nsecs = -(nsecs - 1E9) // Take that second away from nsecs + } + return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0") +} + +// parsePAXRecord parses the input PAX record string into a key-value pair. +// If parsing is successful, it will slice off the currently read record and +// return the remainder as r. +func parsePAXRecord(s string) (k, v, r string, err error) { + // The size field ends at the first space. + sp := strings.IndexByte(s, ' ') + if sp == -1 { + return "", "", s, ErrHeader + } + + // Parse the first token as a decimal integer. + n, perr := strconv.ParseInt(s[:sp], 10, 0) // Intentionally parse as native int + if perr != nil || n < 5 || int64(len(s)) < n { + return "", "", s, ErrHeader + } + + // Extract everything between the space and the final newline. + rec, nl, rem := s[sp+1:n-1], s[n-1:n], s[n:] + if nl != "\n" { + return "", "", s, ErrHeader + } + + // The first equals separates the key from the value. + eq := strings.IndexByte(rec, '=') + if eq == -1 { + return "", "", s, ErrHeader + } + k, v = rec[:eq], rec[eq+1:] + + if !validPAXRecord(k, v) { + return "", "", s, ErrHeader + } + return k, v, rem, nil +} + +// formatPAXRecord formats a single PAX record, prefixing it with the +// appropriate length. +func formatPAXRecord(k, v string) (string, error) { + if !validPAXRecord(k, v) { + return "", ErrHeader + } + + const padding = 3 // Extra padding for ' ', '=', and '\n' + size := len(k) + len(v) + padding + size += len(strconv.Itoa(size)) + record := strconv.Itoa(size) + " " + k + "=" + v + "\n" + + // Final adjustment if adding size field increased the record size. + if len(record) != size { + size = len(record) + record = strconv.Itoa(size) + " " + k + "=" + v + "\n" + } + return record, nil +} + +// validPAXRecord reports whether the key-value pair is valid where each +// record is formatted as: +// "%d %s=%s\n" % (size, key, value) +// +// Keys and values should be UTF-8, but the number of bad writers out there +// forces us to be a more liberal. +// Thus, we only reject all keys with NUL, and only reject NULs in values +// for the PAX version of the USTAR string fields. +// The key must not contain an '=' character. +func validPAXRecord(k, v string) bool { + if k == "" || strings.IndexByte(k, '=') >= 0 { + return false + } + switch k { + case paxPath, paxLinkpath, paxUname, paxGname: + return !hasNUL(v) + default: + return !hasNUL(k) + } +} diff --git a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/writer.go b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/writer.go index 042638175c..e80498d03e 100644 --- a/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/writer.go +++ b/components/engine/vendor/github.com/vbatts/tar-split/archive/tar/writer.go @@ -4,295 +4,410 @@ package tar -// TODO(dsymonds): -// - catch more errors (no first header, etc.) - import ( - "bytes" - "errors" "fmt" "io" "path" "sort" - "strconv" "strings" "time" ) -var ( - ErrWriteTooLong = errors.New("archive/tar: write too long") - ErrFieldTooLong = errors.New("archive/tar: header field too long") - ErrWriteAfterClose = errors.New("archive/tar: write after close") - errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") -) - -// A Writer provides sequential writing of a tar archive in POSIX.1 format. -// A tar archive consists of a sequence of files. -// Call WriteHeader to begin a new file, and then call Write to supply that file's data, -// writing at most hdr.Size bytes in total. +// Writer provides sequential writing of a tar archive. +// Write.WriteHeader begins a new file with the provided Header, +// and then Writer can be treated as an io.Writer to supply that file's data. type Writer struct { - w io.Writer - err error - nb int64 // number of unwritten bytes for current file entry - pad int64 // amount of padding to write after current file entry - closed bool - usedBinary bool // whether the binary numeric field extension was used - preferPax bool // use pax header instead of binary numeric header - hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header - paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header -} + w io.Writer + pad int64 // Amount of padding to write after current file entry + curr fileWriter // Writer for current file entry + hdr Header // Shallow copy of Header that is safe for mutations + blk block // Buffer to use as temporary local storage -type formatter struct { - err error // Last error seen + // err is a persistent error. + // It is only the responsibility of every exported method of Writer to + // ensure that this error is sticky. + err error } // NewWriter creates a new Writer writing to w. -func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } - -// Flush finishes writing the current file (optional). -func (tw *Writer) Flush() error { - if tw.nb > 0 { - tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb) - return tw.err - } - - n := tw.nb + tw.pad - for n > 0 && tw.err == nil { - nr := n - if nr > blockSize { - nr = blockSize - } - var nw int - nw, tw.err = tw.w.Write(zeroBlock[0:nr]) - n -= int64(nw) - } - tw.nb = 0 - tw.pad = 0 - return tw.err +func NewWriter(w io.Writer) *Writer { + return &Writer{w: w, curr: ®FileWriter{w, 0}} } -// Write s into b, terminating it with a NUL if there is room. -func (f *formatter) formatString(b []byte, s string) { - if len(s) > len(b) { - f.err = ErrFieldTooLong - return - } - ascii := toASCII(s) - copy(b, ascii) - if len(ascii) < len(b) { - b[len(ascii)] = 0 - } +type fileWriter interface { + io.Writer + fileState + + ReadFrom(io.Reader) (int64, error) } -// Encode x as an octal ASCII string and write it into b with leading zeros. -func (f *formatter) formatOctal(b []byte, x int64) { - s := strconv.FormatInt(x, 8) - // leading zeros, but leave room for a NUL. - for len(s)+1 < len(b) { - s = "0" + s - } - f.formatString(b, s) -} - -// fitsInBase256 reports whether x can be encoded into n bytes using base-256 -// encoding. Unlike octal encoding, base-256 encoding does not require that the -// string ends with a NUL character. Thus, all n bytes are available for output. +// Flush finishes writing the current file's block padding. +// The current file must be fully written before Flush can be called. // -// If operating in binary mode, this assumes strict GNU binary mode; which means -// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is -// equivalent to the sign bit in two's complement form. -func fitsInBase256(n int, x int64) bool { - var binBits = uint(n-1) * 8 - return n >= 9 || (x >= -1<= 0; i-- { - b[i] = byte(x) - x >>= 8 - } - b[0] |= 0x80 // Highest bit indicates binary format - return - } - - f.formatOctal(b, 0) // Last resort, just write zero - f.err = ErrFieldTooLong -} - -var ( - minTime = time.Unix(0, 0) - // There is room for 11 octal digits (33 bits) of mtime. - maxTime = minTime.Add((1<<33 - 1) * time.Second) -) - -// WriteHeader writes hdr and prepares to accept the file's contents. -// WriteHeader calls Flush if it is not the first header. -// Calling after a Close will return ErrWriteAfterClose. -func (tw *Writer) WriteHeader(hdr *Header) error { - return tw.writeHeader(hdr, true) -} - -// WriteHeader writes hdr and prepares to accept the file's contents. -// WriteHeader calls Flush if it is not the first header. -// Calling after a Close will return ErrWriteAfterClose. -// As this method is called internally by writePax header to allow it to -// suppress writing the pax header. -func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { - if tw.closed { - return ErrWriteAfterClose - } - if tw.err == nil { - tw.Flush() - } +// This is unnecessary as the next call to WriteHeader or Close +// will implicitly flush out the file's padding. +func (tw *Writer) Flush() error { if tw.err != nil { return tw.err } - - // a map to hold pax header records, if any are needed - paxHeaders := make(map[string]string) - - // TODO(shanemhansen): we might want to use PAX headers for - // subsecond time resolution, but for now let's just capture - // too long fields or non ascii characters - - var f formatter - var header []byte - - // We need to select which scratch buffer to use carefully, - // since this method is called recursively to write PAX headers. - // If allowPax is true, this is the non-recursive call, and we will use hdrBuff. - // If allowPax is false, we are being called by writePAXHeader, and hdrBuff is - // already being used by the non-recursive call, so we must use paxHdrBuff. - header = tw.hdrBuff[:] - if !allowPax { - header = tw.paxHdrBuff[:] + if nb := tw.curr.LogicalRemaining(); nb > 0 { + return fmt.Errorf("archive/tar: missed writing %d bytes", nb) } - copy(header, zeroBlock) - s := slicer(header) - - // Wrappers around formatter that automatically sets paxHeaders if the - // argument extends beyond the capacity of the input byte slice. - var formatString = func(b []byte, s string, paxKeyword string) { - needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) - if needsPaxHeader { - paxHeaders[paxKeyword] = s - return - } - f.formatString(b, s) - } - var formatNumeric = func(b []byte, x int64, paxKeyword string) { - // Try octal first. - s := strconv.FormatInt(x, 8) - if len(s) < len(b) { - f.formatOctal(b, x) - return - } - - // If it is too long for octal, and PAX is preferred, use a PAX header. - if paxKeyword != paxNone && tw.preferPax { - f.formatOctal(b, 0) - s := strconv.FormatInt(x, 10) - paxHeaders[paxKeyword] = s - return - } - - tw.usedBinary = true - f.formatNumeric(b, x) - } - - // keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax - pathHeaderBytes := s.next(fileNameSize) - - formatString(pathHeaderBytes, hdr.Name, paxPath) - - // Handle out of range ModTime carefully. - var modTime int64 - if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) { - modTime = hdr.ModTime.Unix() - } - - f.formatOctal(s.next(8), hdr.Mode) // 100:108 - formatNumeric(s.next(8), int64(hdr.Uid), paxUid) // 108:116 - formatNumeric(s.next(8), int64(hdr.Gid), paxGid) // 116:124 - formatNumeric(s.next(12), hdr.Size, paxSize) // 124:136 - formatNumeric(s.next(12), modTime, paxNone) // 136:148 --- consider using pax for finer granularity - s.next(8) // chksum (148:156) - s.next(1)[0] = hdr.Typeflag // 156:157 - - formatString(s.next(100), hdr.Linkname, paxLinkpath) - - copy(s.next(8), []byte("ustar\x0000")) // 257:265 - formatString(s.next(32), hdr.Uname, paxUname) // 265:297 - formatString(s.next(32), hdr.Gname, paxGname) // 297:329 - formatNumeric(s.next(8), hdr.Devmajor, paxNone) // 329:337 - formatNumeric(s.next(8), hdr.Devminor, paxNone) // 337:345 - - // keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax - prefixHeaderBytes := s.next(155) - formatString(prefixHeaderBytes, "", paxNone) // 345:500 prefix - - // Use the GNU magic instead of POSIX magic if we used any GNU extensions. - if tw.usedBinary { - copy(header[257:265], []byte("ustar \x00")) - } - - _, paxPathUsed := paxHeaders[paxPath] - // try to use a ustar header when only the name is too long - if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { - prefix, suffix, ok := splitUSTARPath(hdr.Name) - if ok { - // Since we can encode in USTAR format, disable PAX header. - delete(paxHeaders, paxPath) - - // Update the path fields - formatString(pathHeaderBytes, suffix, paxNone) - formatString(prefixHeaderBytes, prefix, paxNone) - } - } - - // The chksum field is terminated by a NUL and a space. - // This is different from the other octal fields. - chksum, _ := checksum(header) - f.formatOctal(header[148:155], chksum) // Never fails - header[155] = ' ' - - // Check if there were any formatting errors. - if f.err != nil { - tw.err = f.err + if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { return tw.err } + tw.pad = 0 + return nil +} - if allowPax { - for k, v := range hdr.Xattrs { - paxHeaders[paxXattr+k] = v +// WriteHeader writes hdr and prepares to accept the file's contents. +// The Header.Size determines how many bytes can be written for the next file. +// If the current file is not fully written, then this returns an error. +// This implicitly flushes any padding necessary before writing the header. +func (tw *Writer) WriteHeader(hdr *Header) error { + if err := tw.Flush(); err != nil { + return err + } + tw.hdr = *hdr // Shallow copy of Header + + // Avoid usage of the legacy TypeRegA flag, and automatically promote + // it to use TypeReg or TypeDir. + if tw.hdr.Typeflag == TypeRegA { + if strings.HasSuffix(tw.hdr.Name, "/") { + tw.hdr.Typeflag = TypeDir + } else { + tw.hdr.Typeflag = TypeReg } } - if len(paxHeaders) > 0 { - if !allowPax { - return errInvalidHeader + // Round ModTime and ignore AccessTime and ChangeTime unless + // the format is explicitly chosen. + // This ensures nominal usage of WriteHeader (without specifying the format) + // does not always result in the PAX format being chosen, which + // causes a 1KiB increase to every header. + if tw.hdr.Format == FormatUnknown { + tw.hdr.ModTime = tw.hdr.ModTime.Round(time.Second) + tw.hdr.AccessTime = time.Time{} + tw.hdr.ChangeTime = time.Time{} + } + + allowedFormats, paxHdrs, err := tw.hdr.allowedFormats() + switch { + case allowedFormats.has(FormatUSTAR): + tw.err = tw.writeUSTARHeader(&tw.hdr) + return tw.err + case allowedFormats.has(FormatPAX): + tw.err = tw.writePAXHeader(&tw.hdr, paxHdrs) + return tw.err + case allowedFormats.has(FormatGNU): + tw.err = tw.writeGNUHeader(&tw.hdr) + return tw.err + default: + return err // Non-fatal error + } +} + +func (tw *Writer) writeUSTARHeader(hdr *Header) error { + // Check if we can use USTAR prefix/suffix splitting. + var namePrefix string + if prefix, suffix, ok := splitUSTARPath(hdr.Name); ok { + namePrefix, hdr.Name = prefix, suffix + } + + // Pack the main header. + var f formatter + blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal) + f.formatString(blk.USTAR().Prefix(), namePrefix) + blk.SetFormat(FormatUSTAR) + if f.err != nil { + return f.err // Should never happen since header is validated + } + return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) +} + +func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { + realName, realSize := hdr.Name, hdr.Size + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Handle sparse files. + var spd sparseDatas + var spb []byte + if len(hdr.SparseHoles) > 0 { + sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + hdr.Size = 0 // Replace with encoded size + spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n') + for _, s := range spd { + hdr.Size += s.Length + spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n') + spb = append(strconv.AppendInt(spb, s.Length, 10), '\n') + } + pad := blockPadding(int64(len(spb))) + spb = append(spb, zeroBlock[:pad]...) + hdr.Size += int64(len(spb)) // Accounts for encoded sparse map + + // Add and modify appropriate PAX records. + dir, file := path.Split(realName) + hdr.Name = path.Join(dir, "GNUSparseFile.0", file) + paxHdrs[paxGNUSparseMajor] = "1" + paxHdrs[paxGNUSparseMinor] = "0" + paxHdrs[paxGNUSparseName] = realName + paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10) + paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10) + delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName } - if err := tw.writePAXHeader(hdr, paxHeaders); err != nil { + */ + _ = realSize + + // Write PAX records to the output. + isGlobal := hdr.Typeflag == TypeXGlobalHeader + if len(paxHdrs) > 0 || isGlobal { + // Sort keys for deterministic ordering. + var keys []string + for k := range paxHdrs { + keys = append(keys, k) + } + sort.Strings(keys) + + // Write each record to a buffer. + var buf strings.Builder + for _, k := range keys { + rec, err := formatPAXRecord(k, paxHdrs[k]) + if err != nil { + return err + } + buf.WriteString(rec) + } + + // Write the extended header file. + var name string + var flag byte + if isGlobal { + name = realName + if name == "" { + name = "GlobalHead.0.0" + } + flag = TypeXGlobalHeader + } else { + dir, file := path.Split(realName) + name = path.Join(dir, "PaxHeaders.0", file) + flag = TypeXHeader + } + data := buf.String() + if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal { + return err // Global headers return here + } + } + + // Pack the main header. + var f formatter // Ignore errors since they are expected + fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) } + blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal) + blk.SetFormat(FormatPAX) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + // Write the sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.curr since the sparse map is accounted for in hdr.Size. + if _, err := tw.curr.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + */ + return nil +} + +func (tw *Writer) writeGNUHeader(hdr *Header) error { + // Use long-link files if Name or Linkname exceeds the field size. + const longName = "././@LongLink" + if len(hdr.Name) > nameSize { + data := hdr.Name + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongName, FormatGNU); err != nil { + return err + } + } + if len(hdr.Linkname) > nameSize { + data := hdr.Linkname + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongLink, FormatGNU); err != nil { return err } } - tw.nb = int64(hdr.Size) - tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize - _, tw.err = tw.w.Write(header) - return tw.err + // Pack the main header. + var f formatter // Ignore errors since they are expected + var spd sparseDatas + var spb []byte + blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric) + if !hdr.AccessTime.IsZero() { + f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix()) + } + if !hdr.ChangeTime.IsZero() { + f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix()) + } + // TODO(dsnet): Re-enable this when adding sparse support. + // See https://golang.org/issue/22735 + /* + if hdr.Typeflag == TypeGNUSparse { + sph := append([]sparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas { + for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ { + f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset) + f.formatNumeric(sa.Entry(i).Length(), sp[0].Length) + sp = sp[1:] + } + if len(sp) > 0 { + sa.IsExtended()[0] = 1 + } + return sp + } + sp2 := formatSPD(spd, blk.GNU().Sparse()) + for len(sp2) > 0 { + var spHdr block + sp2 = formatSPD(sp2, spHdr.Sparse()) + spb = append(spb, spHdr[:]...) + } + + // Update size fields in the header block. + realSize := hdr.Size + hdr.Size = 0 // Encoded size; does not account for encoded sparse map + for _, s := range spd { + hdr.Size += s.Length + } + copy(blk.V7().Size(), zeroBlock[:]) // Reset field + f.formatNumeric(blk.V7().Size(), hdr.Size) + f.formatNumeric(blk.GNU().RealSize(), realSize) + } + */ + blk.SetFormat(FormatGNU) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // Write the extended sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.w since the sparse map is not accounted for in hdr.Size. + if _, err := tw.w.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + return nil +} + +type ( + stringFormatter func([]byte, string) + numberFormatter func([]byte, int64) +) + +// templateV7Plus fills out the V7 fields of a block using values from hdr. +// It also fills out fields (uname, gname, devmajor, devminor) that are +// shared in the USTAR, PAX, and GNU formats using the provided formatters. +// +// The block returned is only valid until the next call to +// templateV7Plus or writeRawFile. +func (tw *Writer) templateV7Plus(hdr *Header, fmtStr stringFormatter, fmtNum numberFormatter) *block { + tw.blk.Reset() + + modTime := hdr.ModTime + if modTime.IsZero() { + modTime = time.Unix(0, 0) + } + + v7 := tw.blk.V7() + v7.TypeFlag()[0] = hdr.Typeflag + fmtStr(v7.Name(), hdr.Name) + fmtStr(v7.LinkName(), hdr.Linkname) + fmtNum(v7.Mode(), hdr.Mode) + fmtNum(v7.UID(), int64(hdr.Uid)) + fmtNum(v7.GID(), int64(hdr.Gid)) + fmtNum(v7.Size(), hdr.Size) + fmtNum(v7.ModTime(), modTime.Unix()) + + ustar := tw.blk.USTAR() + fmtStr(ustar.UserName(), hdr.Uname) + fmtStr(ustar.GroupName(), hdr.Gname) + fmtNum(ustar.DevMajor(), hdr.Devmajor) + fmtNum(ustar.DevMinor(), hdr.Devminor) + + return &tw.blk +} + +// writeRawFile writes a minimal file with the given name and flag type. +// It uses format to encode the header format and will write data as the body. +// It uses default values for all of the other fields (as BSD and GNU tar does). +func (tw *Writer) writeRawFile(name, data string, flag byte, format Format) error { + tw.blk.Reset() + + // Best effort for the filename. + name = toASCII(name) + if len(name) > nameSize { + name = name[:nameSize] + } + name = strings.TrimRight(name, "/") + + var f formatter + v7 := tw.blk.V7() + v7.TypeFlag()[0] = flag + f.formatString(v7.Name(), name) + f.formatOctal(v7.Mode(), 0) + f.formatOctal(v7.UID(), 0) + f.formatOctal(v7.GID(), 0) + f.formatOctal(v7.Size(), int64(len(data))) // Must be < 8GiB + f.formatOctal(v7.ModTime(), 0) + tw.blk.SetFormat(format) + if f.err != nil { + return f.err // Only occurs if size condition is violated + } + + // Write the header and data. + if err := tw.writeRawHeader(&tw.blk, int64(len(data)), flag); err != nil { + return err + } + _, err := io.WriteString(tw, data) + return err +} + +// writeRawHeader writes the value of blk, regardless of its value. +// It sets up the Writer such that it can accept a file of the given size. +// If the flag is a special header-only flag, then the size is treated as zero. +func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { + if err := tw.Flush(); err != nil { + return err + } + if _, err := tw.w.Write(blk[:]); err != nil { + return err + } + if isHeaderOnlyType(flag) { + size = 0 + } + tw.curr = ®FileWriter{tw.w, size} + tw.pad = blockPadding(size) + return nil } // splitUSTARPath splits a path according to USTAR prefix and suffix rules. // If the path is not splittable, then it will return ("", "", false). func splitUSTARPath(name string) (prefix, suffix string, ok bool) { length := len(name) - if length <= fileNameSize || !isASCII(name) { + if length <= nameSize || !isASCII(name) { return "", "", false - } else if length > fileNamePrefixSize+1 { - length = fileNamePrefixSize + 1 + } else if length > prefixSize+1 { + length = prefixSize + 1 } else if name[length-1] == '/' { length-- } @@ -300,117 +415,239 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) { i := strings.LastIndex(name[:length], "/") nlen := len(name) - i - 1 // nlen is length of suffix plen := i // plen is length of prefix - if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize { + if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize { return "", "", false } return name[:i], name[i+1:], true } -// writePaxHeader writes an extended pax header to the -// archive. -func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error { - // Prepare extended header - ext := new(Header) - ext.Typeflag = TypeXHeader - // Setting ModTime is required for reader parsing to - // succeed, and seems harmless enough. - ext.ModTime = hdr.ModTime - // The spec asks that we namespace our pseudo files - // with the current pid. However, this results in differing outputs - // for identical inputs. As such, the constant 0 is now used instead. - // golang.org/issue/12358 - dir, file := path.Split(hdr.Name) - fullName := path.Join(dir, "PaxHeaders.0", file) - - ascii := toASCII(fullName) - if len(ascii) > 100 { - ascii = ascii[:100] - } - ext.Name = ascii - // Construct the body - var buf bytes.Buffer - - // Keys are sorted before writing to body to allow deterministic output. - var keys []string - for k := range paxHeaders { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k])) - } - - ext.Size = int64(len(buf.Bytes())) - if err := tw.writeHeader(ext, false); err != nil { - return err - } - if _, err := tw.Write(buf.Bytes()); err != nil { - return err - } - if err := tw.Flush(); err != nil { - return err - } - return nil -} - -// formatPAXRecord formats a single PAX record, prefixing it with the -// appropriate length. -func formatPAXRecord(k, v string) string { - const padding = 3 // Extra padding for ' ', '=', and '\n' - size := len(k) + len(v) + padding - size += len(strconv.Itoa(size)) - record := fmt.Sprintf("%d %s=%s\n", size, k, v) - - // Final adjustment if adding size field increased the record size. - if len(record) != size { - size = len(record) - record = fmt.Sprintf("%d %s=%s\n", size, k, v) - } - return record -} - -// Write writes to the current entry in the tar archive. +// Write writes to the current file in the tar archive. // Write returns the error ErrWriteTooLong if more than -// hdr.Size bytes are written after WriteHeader. -func (tw *Writer) Write(b []byte) (n int, err error) { - if tw.closed { - err = ErrWriteAfterClose - return +// Header.Size bytes are written after WriteHeader. +// +// Calling Write on special types like TypeLink, TypeSymlink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless +// of what the Header.Size claims. +func (tw *Writer) Write(b []byte) (int, error) { + if tw.err != nil { + return 0, tw.err } - overwrite := false - if int64(len(b)) > tw.nb { - b = b[0:tw.nb] - overwrite = true + n, err := tw.curr.Write(b) + if err != nil && err != ErrWriteTooLong { + tw.err = err } - n, err = tw.w.Write(b) - tw.nb -= int64(n) - if err == nil && overwrite { - err = ErrWriteTooLong - return - } - tw.err = err - return + return n, err } -// Close closes the tar archive, flushing any unwritten -// data to the underlying writer. -func (tw *Writer) Close() error { - if tw.err != nil || tw.closed { - return tw.err +// readFrom populates the content of the current file by reading from r. +// The bytes read must match the number of remaining bytes in the current file. +// +// If the current file is sparse and r is an io.ReadSeeker, +// then readFrom uses Seek to skip past holes defined in Header.SparseHoles, +// assuming that skipped regions are all NULs. +// This always reads the last byte to ensure r is the right size. +// +// TODO(dsnet): Re-export this when adding sparse file support. +// See https://golang.org/issue/22735 +func (tw *Writer) readFrom(r io.Reader) (int64, error) { + if tw.err != nil { + return 0, tw.err + } + n, err := tw.curr.ReadFrom(r) + if err != nil && err != ErrWriteTooLong { + tw.err = err + } + return n, err +} + +// Close closes the tar archive by flushing the padding, and writing the footer. +// If the current file (from a prior call to WriteHeader) is not fully written, +// then this returns an error. +func (tw *Writer) Close() error { + if tw.err == ErrWriteAfterClose { + return nil } - tw.Flush() - tw.closed = true if tw.err != nil { return tw.err } - // trailer: two zero blocks - for i := 0; i < 2; i++ { - _, tw.err = tw.w.Write(zeroBlock) - if tw.err != nil { - break + // Trailer: two zero blocks. + err := tw.Flush() + for i := 0; i < 2 && err == nil; i++ { + _, err = tw.w.Write(zeroBlock[:]) + } + + // Ensure all future actions are invalid. + tw.err = ErrWriteAfterClose + return err // Report IO errors +} + +// regFileWriter is a fileWriter for writing data to a regular file entry. +type regFileWriter struct { + w io.Writer // Underlying Writer + nb int64 // Number of remaining bytes to write +} + +func (fw *regFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > fw.nb + if overwrite { + b = b[:fw.nb] + } + if len(b) > 0 { + n, err = fw.w.Write(b) + fw.nb -= int64(n) + } + switch { + case err != nil: + return n, err + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) { + return io.Copy(struct{ io.Writer }{fw}, r) +} + +func (fw regFileWriter) LogicalRemaining() int64 { + return fw.nb +} +func (fw regFileWriter) PhysicalRemaining() int64 { + return fw.nb +} + +// sparseFileWriter is a fileWriter for writing data to a sparse file entry. +type sparseFileWriter struct { + fw fileWriter // Underlying fileWriter + sp sparseDatas // Normalized list of data fragments + pos int64 // Current position in sparse file +} + +func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > sw.LogicalRemaining() + if overwrite { + b = b[:sw.LogicalRemaining()] + } + + b0 := b + endPos := sw.pos + int64(len(b)) + for endPos > sw.pos && err == nil { + var nf int // Bytes written in fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + bf := b[:min(int64(len(b)), dataStart-sw.pos)] + nf, err = zeroWriter{}.Write(bf) + } else { // In a data fragment + bf := b[:min(int64(len(b)), dataEnd-sw.pos)] + nf, err = sw.fw.Write(bf) + } + b = b[nf:] + sw.pos += int64(nf) + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains } } - return tw.err + + n = len(b0) - len(b) + switch { + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) { + rs, ok := r.(io.ReadSeeker) + if ok { + if _, err := rs.Seek(0, io.SeekCurrent); err != nil { + ok = false // Not all io.Seeker can really seek + } + } + if !ok { + return io.Copy(struct{ io.Writer }{sw}, r) + } + + var readLastByte bool + pos0 := sw.pos + for sw.LogicalRemaining() > 0 && !readLastByte && err == nil { + var nf int64 // Size of fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + nf = dataStart - sw.pos + if sw.PhysicalRemaining() == 0 { + readLastByte = true + nf-- + } + _, err = rs.Seek(nf, io.SeekCurrent) + } else { // In a data fragment + nf = dataEnd - sw.pos + nf, err = io.CopyN(sw.fw, rs, nf) + } + sw.pos += nf + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + // If the last fragment is a hole, then seek to 1-byte before EOF, and + // read a single byte to ensure the file is the right size. + if readLastByte && err == nil { + _, err = mustReadFull(rs, []byte{0}) + sw.pos++ + } + + n = sw.pos - pos0 + switch { + case err == io.EOF: + return n, io.ErrUnexpectedEOF + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + default: + return n, ensureEOF(rs) + } +} + +func (sw sparseFileWriter) LogicalRemaining() int64 { + return sw.sp[len(sw.sp)-1].endOffset() - sw.pos +} +func (sw sparseFileWriter) PhysicalRemaining() int64 { + return sw.fw.PhysicalRemaining() +} + +// zeroWriter may only be written with NULs, otherwise it returns errWriteHole. +type zeroWriter struct{} + +func (zeroWriter) Write(b []byte) (int, error) { + for i, c := range b { + if c != 0 { + return i, errWriteHole + } + } + return len(b), nil +} + +// ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so. +func ensureEOF(r io.Reader) error { + n, err := tryReadFull(r, []byte{0}) + switch { + case n > 0: + return ErrWriteTooLong + case err == io.EOF: + return nil + default: + return err + } } From 42fda5fe7ef5971ea6adec8b095270b351eb2dbf Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 29 Aug 2018 13:46:46 -0700 Subject: [PATCH 3/9] pkg/filenotify: poller.Add: fix fd leaks on err In case of errors, the file descriptor is never closed. Fix it. Signed-off-by: Kir Kolyshkin (cherry picked from commit 88bcf1573ca2eaffc15da346a1651a3749567554) Upstream-commit: 7be43586af6824c1e55cb502d9d2bab45c9b4505 Component: engine --- components/engine/pkg/filenotify/poller.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/components/engine/pkg/filenotify/poller.go b/components/engine/pkg/filenotify/poller.go index 22f1897034..7a9e1785c3 100644 --- a/components/engine/pkg/filenotify/poller.go +++ b/components/engine/pkg/filenotify/poller.go @@ -54,6 +54,7 @@ func (w *filePoller) Add(name string) error { } fi, err := os.Stat(name) if err != nil { + f.Close() return err } @@ -61,6 +62,7 @@ func (w *filePoller) Add(name string) error { w.watches = make(map[string]chan struct{}) } if _, exists := w.watches[name]; exists { + f.Close() return fmt.Errorf("watch exists") } chClose := make(chan struct{}) From abbd665e30d5a7581c95fecdeb07f6c8124773b8 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 29 Aug 2018 13:50:01 -0700 Subject: [PATCH 4/9] pkg/filenotify/poller: close file asap There is no need to wait for up to 200ms in order to close the file descriptor once the chClose is received. This commit might reduce the chances for occasional "The process cannot access the file because it is being used by another process" error on Windows, where an opened file can't be removed. Signed-off-by: Kir Kolyshkin (cherry picked from commit dfbb64ea7d042d5b2bb0c1c2b88e3682b7069b10) Upstream-commit: 3a3bfcbf47e98212abfc9cfed860d9e99fc41cdc Component: engine --- components/engine/pkg/filenotify/poller.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/components/engine/pkg/filenotify/poller.go b/components/engine/pkg/filenotify/poller.go index 7a9e1785c3..8f6c310e1e 100644 --- a/components/engine/pkg/filenotify/poller.go +++ b/components/engine/pkg/filenotify/poller.go @@ -148,12 +148,11 @@ func (w *filePoller) sendErr(e error, chClose <-chan struct{}) error { func (w *filePoller) watch(f *os.File, lastFi os.FileInfo, chClose chan struct{}) { defer f.Close() for { - time.Sleep(watchWaitTime) select { + case <-time.After(watchWaitTime): case <-chClose: logrus.Debugf("watch for %s closed", f.Name()) return - default: } fi, err := os.Stat(f.Name()) From 21c28e45663511d9c2f529f2a39bc0855a9c245e Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 29 Aug 2018 22:11:42 -0700 Subject: [PATCH 5/9] pkg/filenotify/poller: fix Close() The code in Close() that removes the watches was not working, because it first sets `w.closed = true` and then calls w.close(), which starts with ``` if w.closed { return errPollerClosed } ``` Fix by setting w.closed only after calling w.remove() for all the files being watched. While at it, remove the duplicated `delete(w.watches, name)` code. Signed-off-by: Kir Kolyshkin (cherry picked from commit fffa8958d00860b4e3563327a2cc6836a12d4ba9) Upstream-commit: 4e2dbfa1af48191126b0910b9463bf94d8371886 Component: engine --- components/engine/pkg/filenotify/poller.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/components/engine/pkg/filenotify/poller.go b/components/engine/pkg/filenotify/poller.go index 8f6c310e1e..6161d4ab73 100644 --- a/components/engine/pkg/filenotify/poller.go +++ b/components/engine/pkg/filenotify/poller.go @@ -115,11 +115,10 @@ func (w *filePoller) Close() error { return nil } - w.closed = true for name := range w.watches { w.remove(name) - delete(w.watches, name) } + w.closed = true return nil } From e5cc6e7dc4e923df02151156e6ef9a73757f4fd2 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 1 Aug 2018 06:50:45 +0300 Subject: [PATCH 6/9] daemon.ContainerLogs: minor debug logging cleanup This code has many return statements, for some of them the "end logs" or "end stream" message was not printed, giving the impression that this "for" loop never ended. Make sure that "begin logs" is to be followed by "end logs". Signed-off-by: Kir Kolyshkin (cherry picked from commit 2e4c2a6bf9cb47fd07e42f9c043024ed3dbcd04d) Upstream-commit: 2b8bc86679b7153bb4ace063a858637df0f16a2e Component: engine --- components/engine/daemon/logs.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/engine/daemon/logs.go b/components/engine/daemon/logs.go index 37ca4caf63..5e5c34992e 100644 --- a/components/engine/daemon/logs.go +++ b/components/engine/daemon/logs.go @@ -118,6 +118,8 @@ func (daemon *Daemon) ContainerLogs(ctx context.Context, containerName string, c defer close(messageChan) lg.Debug("begin logs") + defer lg.Debugf("end logs (%v)", ctx.Err()) + for { select { // i do not believe as the system is currently designed any error @@ -132,14 +134,12 @@ func (daemon *Daemon) ContainerLogs(ctx context.Context, containerName string, c } return case <-ctx.Done(): - lg.Debugf("logs: end stream, ctx is done: %v", ctx.Err()) return case msg, ok := <-logs.Msg: // there is some kind of pool or ring buffer in the logger that // produces these messages, and a possible future optimization // might be to use that pool and reuse message objects if !ok { - lg.Debug("end logs") return } m := msg.AsLogMessage() // just a pointer conversion, does not copy data From a76b67642d591253ba23ff89a796f28a09111db5 Mon Sep 17 00:00:00 2001 From: Brian Goff Date: Mon, 27 Aug 2018 13:53:23 -0700 Subject: [PATCH 7/9] daemon/logger/loggerutils: add TestFollowLogsClose This test case checks that followLogs() exits once the reader is gone. Currently it does not (i.e. this test is supposed to fail) due to #37391. [kolyshkin@: test case Brian Goff, changelog and all bugs are by me] Source: https://gist.github.com/cpuguy83/e538793de18c762608358ee0eaddc197 Signed-off-by: Kir Kolyshkin (cherry picked from commit d37a11bfbab83ab42b1160f116e863daac046192) Upstream-commit: 511741735e0aa2fe68a66d99384c00d187d1a157 Component: engine --- .../daemon/logger/loggerutils/logfile_test.go | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/components/engine/daemon/logger/loggerutils/logfile_test.go b/components/engine/daemon/logger/loggerutils/logfile_test.go index 0e359db3f8..c21aa258e6 100644 --- a/components/engine/daemon/logger/loggerutils/logfile_test.go +++ b/components/engine/daemon/logger/loggerutils/logfile_test.go @@ -4,6 +4,8 @@ import ( "bufio" "context" "io" + "io/ioutil" + "os" "strings" "testing" "time" @@ -74,3 +76,44 @@ func TestTailFiles(t *testing.T) { assert.Assert(t, string(msg.Line) == "Where we're going we don't need roads.", string(msg.Line)) } } + +func TestFollowLogsClose(t *testing.T) { + lw := logger.NewLogWatcher() + + f, err := ioutil.TempFile("", t.Name()) + assert.NilError(t, err) + defer func() { + f.Close() + os.Remove(f.Name()) + }() + + makeDecoder := func(rdr io.Reader) func() (*logger.Message, error) { + return func() (*logger.Message, error) { + return &logger.Message{}, nil + } + } + + followLogsDone := make(chan struct{}) + var since, until time.Time + go func() { + followLogs(f, lw, make(chan interface{}), makeDecoder, since, until) + close(followLogsDone) + }() + + select { + case <-lw.Msg: + case err := <-lw.Err: + assert.NilError(t, err) + case <-followLogsDone: + t.Fatal("follow logs finished unexpectedly") + case <-time.After(10 * time.Second): + t.Fatal("timeout waiting for log message") + } + + lw.Close() + select { + case <-followLogsDone: + case <-time.After(20 * time.Second): + t.Fatal("timeout waiting for followLogs() to finish") + } +} From 1a333bfe59625436fa4379085c121979f437a4bc Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 1 Aug 2018 07:03:55 +0300 Subject: [PATCH 8/9] daemon.ContainerLogs(): fix resource leak on follow When daemon.ContainerLogs() is called with options.follow=true (as in "docker logs --follow"), the "loggerutils.followLogs()" function never returns (even then the logs consumer is gone). As a result, all the resources associated with it (including an opened file descriptor for the log file being read, two FDs for a pipe, and two FDs for inotify watch) are never released. If this is repeated (such as by running "docker logs --follow" and pressing Ctrl-C a few times), this results in DoS caused by either hitting the limit of inotify watches, or the limit of opened files. The only cure is daemon restart. Apparently, what happens is: 1. logs producer (a container) is gone, calling (*LogWatcher).Close() for all its readers (daemon/logger/jsonfilelog/jsonfilelog.go:175). 2. WatchClose() is properly handled by a dedicated goroutine in followLogs(), cancelling the context. 3. Upon receiving the ctx.Done(), the code in followLogs() (daemon/logger/loggerutils/logfile.go#L626-L638) keeps to send messages _synchronously_ (which is OK for now). 4. Logs consumer is gone (Ctrl-C is pressed on a terminal running "docker logs --follow"). Method (*LogWatcher).Close() is properly called (see daemon/logs.go:114). Since it was called before and due to to once.Do(), nothing happens (which is kinda good, as otherwise it will panic on closing a closed channel). 5. A goroutine (see item 3 above) keeps sending log messages synchronously to the logWatcher.Msg channel. Since the channel reader is gone, the channel send operation blocks forever, and resource cleanup set up in defer statements at the beginning of followLogs() never happens. Alas, the fix is somewhat complicated: 1. Distinguish between close from logs producer and logs consumer. To that effect, - yet another channel is added to LogWatcher(); - {Watch,}Close() are renamed to {Watch,}ProducerGone(); - {Watch,}ConsumerGone() are added; *NOTE* that ProducerGone()/WatchProducerGone() pair is ONLY needed in order to stop ConsumerLogs(follow=true) when a container is stopped; otherwise we're not interested in it. In other words, we're only using it in followLogs(). 2. Code that was doing (logWatcher*).Close() is modified to either call ProducerGone() or ConsumerGone(), depending on the context. 3. Code that was waiting for WatchClose() is modified to wait for either ConsumerGone() or ProducerGone(), or both, depending on the context. 4. followLogs() are modified accordingly: - context cancellation is happening on WatchProducerGone(), and once it's received the FileWatcher is closed and waitRead() returns errDone on EOF (i.e. log rotation handling logic is disabled); - due to this, code that was writing synchronously to logWatcher.Msg can be and is removed as the code above it handles this case; - function returns once ConsumerGone is received, freeing all the resources -- this is the bugfix itself. While at it, 1. Let's also remove the ctx usage to simplify the code a bit. It was introduced by commit a69a59ffc7e3d ("Decouple removing the fileWatcher from reading") in order to fix a bug. The bug was actually a deadlock in fsnotify, and the fix was just a workaround. Since then the fsnofify bug has been fixed, and a new fsnotify was vendored in. For more details, please see https://github.com/moby/moby/pull/27782#issuecomment-416794490 2. Since `(*filePoller).Close()` is fixed to remove all the files being watched, there is no need to explicitly call fileWatcher.Remove(name) anymore, so get rid of the extra code. Should fix https://github.com/moby/moby/issues/37391 Signed-off-by: Kir Kolyshkin (cherry picked from commit 916eabd459fe707b5c4a86377d12e2ad1871b353) Upstream-commit: 84a5b528aede5579861201e869870d10fc98c07c Component: engine --- components/engine/daemon/attach.go | 2 +- components/engine/daemon/logger/adapter.go | 16 ++----- .../engine/daemon/logger/adapter_test.go | 2 +- .../engine/daemon/logger/journald/read.go | 4 +- .../daemon/logger/jsonfilelog/jsonfilelog.go | 5 +- .../daemon/logger/jsonfilelog/read_test.go | 3 +- .../engine/daemon/logger/local/local.go | 2 +- components/engine/daemon/logger/logger.go | 47 +++++++++++++------ .../daemon/logger/loggerutils/logfile.go | 41 ++++------------ .../daemon/logger/loggerutils/logfile_test.go | 4 +- components/engine/daemon/logs.go | 4 +- 11 files changed, 58 insertions(+), 72 deletions(-) diff --git a/components/engine/daemon/attach.go b/components/engine/daemon/attach.go index fb14691d24..0e12441ad1 100644 --- a/components/engine/daemon/attach.go +++ b/components/engine/daemon/attach.go @@ -123,7 +123,7 @@ func (daemon *Daemon) containerAttach(c *container.Container, cfg *stream.Attach return logger.ErrReadLogsNotSupported{} } logs := cLog.ReadLogs(logger.ReadConfig{Tail: -1}) - defer logs.Close() + defer logs.ConsumerGone() LogLoop: for { diff --git a/components/engine/daemon/logger/adapter.go b/components/engine/daemon/logger/adapter.go index 95aff9bf3b..d9370352c5 100644 --- a/components/engine/daemon/logger/adapter.go +++ b/components/engine/daemon/logger/adapter.go @@ -93,21 +93,12 @@ func (a *pluginAdapterWithRead) ReadLogs(config ReadConfig) *LogWatcher { dec := logdriver.NewLogEntryDecoder(stream) for { - select { - case <-watcher.WatchClose(): - return - default: - } - var buf logdriver.LogEntry if err := dec.Decode(&buf); err != nil { if err == io.EOF { return } - select { - case watcher.Err <- errors.Wrap(err, "error decoding log message"): - case <-watcher.WatchClose(): - } + watcher.Err <- errors.Wrap(err, "error decoding log message") return } @@ -125,11 +116,10 @@ func (a *pluginAdapterWithRead) ReadLogs(config ReadConfig) *LogWatcher { return } + // send the message unless the consumer is gone select { case watcher.Msg <- msg: - case <-watcher.WatchClose(): - // make sure the message we consumed is sent - watcher.Msg <- msg + case <-watcher.WatchConsumerGone(): return } } diff --git a/components/engine/daemon/logger/adapter_test.go b/components/engine/daemon/logger/adapter_test.go index f47e711c89..d14a48e477 100644 --- a/components/engine/daemon/logger/adapter_test.go +++ b/components/engine/daemon/logger/adapter_test.go @@ -174,7 +174,7 @@ func TestAdapterReadLogs(t *testing.T) { t.Fatal("timeout waiting for message channel to close") } - lw.Close() + lw.ProducerGone() lw = lr.ReadLogs(ReadConfig{Follow: true}) for _, x := range testMsg { diff --git a/components/engine/daemon/logger/journald/read.go b/components/engine/daemon/logger/journald/read.go index d4bcc62d9a..cadb97f4ca 100644 --- a/components/engine/daemon/logger/journald/read.go +++ b/components/engine/daemon/logger/journald/read.go @@ -165,7 +165,7 @@ func (s *journald) Close() error { s.mu.Lock() s.closed = true for reader := range s.readers.readers { - reader.Close() + reader.ProducerGone() } s.mu.Unlock() return nil @@ -299,7 +299,7 @@ func (s *journald) followJournal(logWatcher *logger.LogWatcher, j *C.sd_journal, // Wait until we're told to stop. select { case cursor = <-newCursor: - case <-logWatcher.WatchClose(): + case <-logWatcher.WatchConsumerGone(): // Notify the other goroutine that its work is done. C.close(pfd[1]) cursor = <-newCursor diff --git a/components/engine/daemon/logger/jsonfilelog/jsonfilelog.go b/components/engine/daemon/logger/jsonfilelog/jsonfilelog.go index 3649bdf91c..476e1d834b 100644 --- a/components/engine/daemon/logger/jsonfilelog/jsonfilelog.go +++ b/components/engine/daemon/logger/jsonfilelog/jsonfilelog.go @@ -166,13 +166,14 @@ func ValidateLogOpt(cfg map[string]string) error { return nil } -// Close closes underlying file and signals all readers to stop. +// Close closes underlying file and signals all the readers +// that the logs producer is gone. func (l *JSONFileLogger) Close() error { l.mu.Lock() l.closed = true err := l.writer.Close() for r := range l.readers { - r.Close() + r.ProducerGone() delete(l.readers, r) } l.mu.Unlock() diff --git a/components/engine/daemon/logger/jsonfilelog/read_test.go b/components/engine/daemon/logger/jsonfilelog/read_test.go index 6ce4936e0e..cfa8694b19 100644 --- a/components/engine/daemon/logger/jsonfilelog/read_test.go +++ b/components/engine/daemon/logger/jsonfilelog/read_test.go @@ -50,11 +50,10 @@ func BenchmarkJSONFileLoggerReadLogs(b *testing.B) { }() lw := jsonlogger.(*JSONFileLogger).ReadLogs(logger.ReadConfig{Follow: true}) - watchClose := lw.WatchClose() for { select { case <-lw.Msg: - case <-watchClose: + case <-lw.WatchProducerGone(): return case err := <-chError: if err != nil { diff --git a/components/engine/daemon/logger/local/local.go b/components/engine/daemon/logger/local/local.go index 86c55784d4..ba4aa096f7 100644 --- a/components/engine/daemon/logger/local/local.go +++ b/components/engine/daemon/logger/local/local.go @@ -166,7 +166,7 @@ func (d *driver) Close() error { d.closed = true err := d.logfile.Close() for r := range d.readers { - r.Close() + r.ProducerGone() delete(d.readers, r) } d.mu.Unlock() diff --git a/components/engine/daemon/logger/logger.go b/components/engine/daemon/logger/logger.go index 912e855c7f..12e8d0054e 100644 --- a/components/engine/daemon/logger/logger.go +++ b/components/engine/daemon/logger/logger.go @@ -104,33 +104,50 @@ type LogWatcher struct { // For sending log messages to a reader. Msg chan *Message // For sending error messages that occur while while reading logs. - Err chan error - closeOnce sync.Once - closeNotifier chan struct{} + Err chan error + producerOnce sync.Once + producerGone chan struct{} + consumerOnce sync.Once + consumerGone chan struct{} } // NewLogWatcher returns a new LogWatcher. func NewLogWatcher() *LogWatcher { return &LogWatcher{ - Msg: make(chan *Message, logWatcherBufferSize), - Err: make(chan error, 1), - closeNotifier: make(chan struct{}), + Msg: make(chan *Message, logWatcherBufferSize), + Err: make(chan error, 1), + producerGone: make(chan struct{}), + consumerGone: make(chan struct{}), } } -// Close notifies the underlying log reader to stop. -func (w *LogWatcher) Close() { +// ProducerGone notifies the underlying log reader that +// the logs producer (a container) is gone. +func (w *LogWatcher) ProducerGone() { // only close if not already closed - w.closeOnce.Do(func() { - close(w.closeNotifier) + w.producerOnce.Do(func() { + close(w.producerGone) }) } -// WatchClose returns a channel receiver that receives notification -// when the watcher has been closed. This should only be called from -// one goroutine. -func (w *LogWatcher) WatchClose() <-chan struct{} { - return w.closeNotifier +// WatchProducerGone returns a channel receiver that receives notification +// once the logs producer (a container) is gone. +func (w *LogWatcher) WatchProducerGone() <-chan struct{} { + return w.producerGone +} + +// ConsumerGone notifies that the logs consumer is gone. +func (w *LogWatcher) ConsumerGone() { + // only close if not already closed + w.consumerOnce.Do(func() { + close(w.consumerGone) + }) +} + +// WatchConsumerGone returns a channel receiver that receives notification +// when the log watcher consumer is gone. +func (w *LogWatcher) WatchConsumerGone() <-chan struct{} { + return w.consumerGone } // Capability defines the list of capabilities that a driver can implement diff --git a/components/engine/daemon/logger/loggerutils/logfile.go b/components/engine/daemon/logger/loggerutils/logfile.go index 25be44aa49..623f78f5e7 100644 --- a/components/engine/daemon/logger/loggerutils/logfile.go +++ b/components/engine/daemon/logger/loggerutils/logfile.go @@ -488,7 +488,7 @@ func tailFiles(files []SizeReaderAt, watcher *logger.LogWatcher, createDecoder m go func() { select { case <-ctx.Done(): - case <-watcher.WatchClose(): + case <-watcher.WatchConsumerGone(): cancel() } }() @@ -546,22 +546,9 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int } defer func() { f.Close() - fileWatcher.Remove(name) fileWatcher.Close() }() - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - go func() { - select { - case <-logWatcher.WatchClose(): - fileWatcher.Remove(name) - cancel() - case <-ctx.Done(): - return - } - }() - var retries int handleRotate := func() error { f.Close() @@ -596,7 +583,9 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int case fsnotify.Rename, fsnotify.Remove: select { case <-notifyRotate: - case <-ctx.Done(): + case <-logWatcher.WatchProducerGone(): + return errDone + case <-logWatcher.WatchConsumerGone(): return errDone } if err := handleRotate(); err != nil { @@ -618,7 +607,9 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int return errRetry } return err - case <-ctx.Done(): + case <-logWatcher.WatchProducerGone(): + return errDone + case <-logWatcher.WatchConsumerGone(): return errDone } } @@ -664,23 +655,11 @@ func followLogs(f *os.File, logWatcher *logger.LogWatcher, notifyRotate chan int if !until.IsZero() && msg.Timestamp.After(until) { return } + // send the message, unless the consumer is gone select { case logWatcher.Msg <- msg: - case <-ctx.Done(): - logWatcher.Msg <- msg - for { - msg, err := decodeLogLine() - if err != nil { - return - } - if !since.IsZero() && msg.Timestamp.Before(since) { - continue - } - if !until.IsZero() && msg.Timestamp.After(until) { - return - } - logWatcher.Msg <- msg - } + case <-logWatcher.WatchConsumerGone(): + return } } } diff --git a/components/engine/daemon/logger/loggerutils/logfile_test.go b/components/engine/daemon/logger/loggerutils/logfile_test.go index c21aa258e6..037592a3f2 100644 --- a/components/engine/daemon/logger/loggerutils/logfile_test.go +++ b/components/engine/daemon/logger/loggerutils/logfile_test.go @@ -77,7 +77,7 @@ func TestTailFiles(t *testing.T) { } } -func TestFollowLogsClose(t *testing.T) { +func TestFollowLogsConsumerGone(t *testing.T) { lw := logger.NewLogWatcher() f, err := ioutil.TempFile("", t.Name()) @@ -110,7 +110,7 @@ func TestFollowLogsClose(t *testing.T) { t.Fatal("timeout waiting for log message") } - lw.Close() + lw.ConsumerGone() select { case <-followLogsDone: case <-time.After(20 * time.Second): diff --git a/components/engine/daemon/logs.go b/components/engine/daemon/logs.go index 5e5c34992e..668a75c778 100644 --- a/components/engine/daemon/logs.go +++ b/components/engine/daemon/logs.go @@ -110,8 +110,8 @@ func (daemon *Daemon) ContainerLogs(ctx context.Context, containerName string, c } }() } - // set up some defers - defer logs.Close() + // signal that the log reader is gone + defer logs.ConsumerGone() // close the messages channel. closing is the only way to signal above // that we're doing with logs (other than context cancel i guess). From a620951919ca95f0f5a67b49b7d8d284a5aadc45 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 28 Aug 2018 19:58:47 -0700 Subject: [PATCH 9/9] TestFollowLogsProducerGone: add This should test that - all the messages produced are delivered (i.e. not lost) - followLogs() exits Loosely based on the test having the same name by Brian Goff, see https://gist.github.com/cpuguy83/e538793de18c762608358ee0eaddc197 Signed-off-by: Kir Kolyshkin (cherry picked from commit f845d76d047760c91dc0c7076aea840291fbdbc5) Upstream-commit: 2a82480df9ad91593d59be4b5283917dbea2da39 Component: engine --- .../daemon/logger/loggerutils/logfile_test.go | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/components/engine/daemon/logger/loggerutils/logfile_test.go b/components/engine/daemon/logger/loggerutils/logfile_test.go index 037592a3f2..e3e63210fc 100644 --- a/components/engine/daemon/logger/loggerutils/logfile_test.go +++ b/components/engine/daemon/logger/loggerutils/logfile_test.go @@ -117,3 +117,87 @@ func TestFollowLogsConsumerGone(t *testing.T) { t.Fatal("timeout waiting for followLogs() to finish") } } + +func TestFollowLogsProducerGone(t *testing.T) { + lw := logger.NewLogWatcher() + + f, err := ioutil.TempFile("", t.Name()) + assert.NilError(t, err) + defer os.Remove(f.Name()) + + var sent, received, closed int + makeDecoder := func(rdr io.Reader) func() (*logger.Message, error) { + return func() (*logger.Message, error) { + if closed == 1 { + closed++ + t.Logf("logDecode() closed after sending %d messages\n", sent) + return nil, io.EOF + } else if closed > 1 { + t.Fatal("logDecode() called after closing!") + return nil, io.EOF + } + sent++ + return &logger.Message{}, nil + } + } + var since, until time.Time + + followLogsDone := make(chan struct{}) + go func() { + followLogs(f, lw, make(chan interface{}), makeDecoder, since, until) + close(followLogsDone) + }() + + // read 1 message + select { + case <-lw.Msg: + received++ + case err := <-lw.Err: + assert.NilError(t, err) + case <-followLogsDone: + t.Fatal("followLogs() finished unexpectedly") + case <-time.After(10 * time.Second): + t.Fatal("timeout waiting for log message") + } + + // "stop" the "container" + closed = 1 + lw.ProducerGone() + + // should receive all the messages sent + readDone := make(chan struct{}) + go func() { + defer close(readDone) + for { + select { + case <-lw.Msg: + received++ + if received == sent { + return + } + case err := <-lw.Err: + assert.NilError(t, err) + } + } + }() + select { + case <-readDone: + case <-time.After(30 * time.Second): + t.Fatalf("timeout waiting for log messages to be read (sent: %d, received: %d", sent, received) + } + + t.Logf("messages sent: %d, received: %d", sent, received) + + // followLogs() should be done by now + select { + case <-followLogsDone: + case <-time.After(30 * time.Second): + t.Fatal("timeout waiting for followLogs() to finish") + } + + select { + case <-lw.WatchConsumerGone(): + t.Fatal("consumer should not have exited") + default: + } +}