forked from toolshed/abra
		
	
		
			
				
	
	
		
			475 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			475 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package patternmatcher
 | |
| 
 | |
| import (
 | |
| 	"errors"
 | |
| 	"os"
 | |
| 	"path/filepath"
 | |
| 	"regexp"
 | |
| 	"strings"
 | |
| 	"text/scanner"
 | |
| 	"unicode/utf8"
 | |
| )
 | |
| 
 | |
| // escapeBytes is a bitmap used to check whether a character should be escaped when creating the regex.
 | |
| var escapeBytes [8]byte
 | |
| 
 | |
| // shouldEscape reports whether a rune should be escaped as part of the regex.
 | |
| //
 | |
| // This only includes characters that require escaping in regex but are also NOT valid filepath pattern characters.
 | |
| // Additionally, '\' is not excluded because there is specific logic to properly handle this, as it's a path separator
 | |
| // on Windows.
 | |
| //
 | |
| // Adapted from regexp::QuoteMeta in go stdlib.
 | |
| // See https://cs.opensource.google/go/go/+/refs/tags/go1.17.2:src/regexp/regexp.go;l=703-715;drc=refs%2Ftags%2Fgo1.17.2
 | |
| func shouldEscape(b rune) bool {
 | |
| 	return b < utf8.RuneSelf && escapeBytes[b%8]&(1<<(b/8)) != 0
 | |
| }
 | |
| 
 | |
| func init() {
 | |
| 	for _, b := range []byte(`.+()|{}$`) {
 | |
| 		escapeBytes[b%8] |= 1 << (b / 8)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // PatternMatcher allows checking paths against a list of patterns
 | |
| type PatternMatcher struct {
 | |
| 	patterns   []*Pattern
 | |
| 	exclusions bool
 | |
| }
 | |
| 
 | |
| // New creates a new matcher object for specific patterns that can
 | |
| // be used later to match against patterns against paths
 | |
| func New(patterns []string) (*PatternMatcher, error) {
 | |
| 	pm := &PatternMatcher{
 | |
| 		patterns: make([]*Pattern, 0, len(patterns)),
 | |
| 	}
 | |
| 	for _, p := range patterns {
 | |
| 		// Eliminate leading and trailing whitespace.
 | |
| 		p = strings.TrimSpace(p)
 | |
| 		if p == "" {
 | |
| 			continue
 | |
| 		}
 | |
| 		p = filepath.Clean(p)
 | |
| 		newp := &Pattern{}
 | |
| 		if p[0] == '!' {
 | |
| 			if len(p) == 1 {
 | |
| 				return nil, errors.New("illegal exclusion pattern: \"!\"")
 | |
| 			}
 | |
| 			newp.exclusion = true
 | |
| 			p = p[1:]
 | |
| 			pm.exclusions = true
 | |
| 		}
 | |
| 		// Do some syntax checking on the pattern.
 | |
| 		// filepath's Match() has some really weird rules that are inconsistent
 | |
| 		// so instead of trying to dup their logic, just call Match() for its
 | |
| 		// error state and if there is an error in the pattern return it.
 | |
| 		// If this becomes an issue we can remove this since its really only
 | |
| 		// needed in the error (syntax) case - which isn't really critical.
 | |
| 		if _, err := filepath.Match(p, "."); err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 		newp.cleanedPattern = p
 | |
| 		newp.dirs = strings.Split(p, string(os.PathSeparator))
 | |
| 		pm.patterns = append(pm.patterns, newp)
 | |
| 	}
 | |
| 	return pm, nil
 | |
| }
 | |
| 
 | |
| // Matches returns true if "file" matches any of the patterns
 | |
| // and isn't excluded by any of the subsequent patterns.
 | |
| //
 | |
| // The "file" argument should be a slash-delimited path.
 | |
| //
 | |
| // Matches is not safe to call concurrently.
 | |
| //
 | |
| // Deprecated: This implementation is buggy (it only checks a single parent dir
 | |
| // against the pattern) and will be removed soon. Use either
 | |
| // MatchesOrParentMatches or MatchesUsingParentResults instead.
 | |
| func (pm *PatternMatcher) Matches(file string) (bool, error) {
 | |
| 	matched := false
 | |
| 	file = filepath.FromSlash(file)
 | |
| 	parentPath := filepath.Dir(file)
 | |
| 	parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
 | |
| 
 | |
| 	for _, pattern := range pm.patterns {
 | |
| 		// Skip evaluation if this is an inclusion and the filename
 | |
| 		// already matched the pattern, or it's an exclusion and it has
 | |
| 		// not matched the pattern yet.
 | |
| 		if pattern.exclusion != matched {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		match, err := pattern.match(file)
 | |
| 		if err != nil {
 | |
| 			return false, err
 | |
| 		}
 | |
| 
 | |
| 		if !match && parentPath != "." {
 | |
| 			// Check to see if the pattern matches one of our parent dirs.
 | |
| 			if len(pattern.dirs) <= len(parentPathDirs) {
 | |
| 				match, _ = pattern.match(strings.Join(parentPathDirs[:len(pattern.dirs)], string(os.PathSeparator)))
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if match {
 | |
| 			matched = !pattern.exclusion
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return matched, nil
 | |
| }
 | |
| 
 | |
| // MatchesOrParentMatches returns true if "file" matches any of the patterns
 | |
| // and isn't excluded by any of the subsequent patterns.
 | |
| //
 | |
| // The "file" argument should be a slash-delimited path.
 | |
| //
 | |
| // Matches is not safe to call concurrently.
 | |
| func (pm *PatternMatcher) MatchesOrParentMatches(file string) (bool, error) {
 | |
| 	matched := false
 | |
| 	file = filepath.FromSlash(file)
 | |
| 	parentPath := filepath.Dir(file)
 | |
| 	parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
 | |
| 
 | |
| 	for _, pattern := range pm.patterns {
 | |
| 		// Skip evaluation if this is an inclusion and the filename
 | |
| 		// already matched the pattern, or it's an exclusion and it has
 | |
| 		// not matched the pattern yet.
 | |
| 		if pattern.exclusion != matched {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		match, err := pattern.match(file)
 | |
| 		if err != nil {
 | |
| 			return false, err
 | |
| 		}
 | |
| 
 | |
| 		if !match && parentPath != "." {
 | |
| 			// Check to see if the pattern matches one of our parent dirs.
 | |
| 			for i := range parentPathDirs {
 | |
| 				match, _ = pattern.match(strings.Join(parentPathDirs[:i+1], string(os.PathSeparator)))
 | |
| 				if match {
 | |
| 					break
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if match {
 | |
| 			matched = !pattern.exclusion
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return matched, nil
 | |
| }
 | |
| 
 | |
| // MatchesUsingParentResult returns true if "file" matches any of the patterns
 | |
| // and isn't excluded by any of the subsequent patterns. The functionality is
 | |
| // the same as Matches, but as an optimization, the caller keeps track of
 | |
| // whether the parent directory matched.
 | |
| //
 | |
| // The "file" argument should be a slash-delimited path.
 | |
| //
 | |
| // MatchesUsingParentResult is not safe to call concurrently.
 | |
| //
 | |
| // Deprecated: this function does behave correctly in some cases (see
 | |
| // https://github.com/docker/buildx/issues/850).
 | |
| //
 | |
| // Use MatchesUsingParentResults instead.
 | |
| func (pm *PatternMatcher) MatchesUsingParentResult(file string, parentMatched bool) (bool, error) {
 | |
| 	matched := parentMatched
 | |
| 	file = filepath.FromSlash(file)
 | |
| 
 | |
| 	for _, pattern := range pm.patterns {
 | |
| 		// Skip evaluation if this is an inclusion and the filename
 | |
| 		// already matched the pattern, or it's an exclusion and it has
 | |
| 		// not matched the pattern yet.
 | |
| 		if pattern.exclusion != matched {
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		match, err := pattern.match(file)
 | |
| 		if err != nil {
 | |
| 			return false, err
 | |
| 		}
 | |
| 
 | |
| 		if match {
 | |
| 			matched = !pattern.exclusion
 | |
| 		}
 | |
| 	}
 | |
| 	return matched, nil
 | |
| }
 | |
| 
 | |
| // MatchInfo tracks information about parent dir matches while traversing a
 | |
| // filesystem.
 | |
| type MatchInfo struct {
 | |
| 	parentMatched []bool
 | |
| }
 | |
| 
 | |
| // MatchesUsingParentResults returns true if "file" matches any of the patterns
 | |
| // and isn't excluded by any of the subsequent patterns. The functionality is
 | |
| // the same as Matches, but as an optimization, the caller passes in
 | |
| // intermediate results from matching the parent directory.
 | |
| //
 | |
| // The "file" argument should be a slash-delimited path.
 | |
| //
 | |
| // MatchesUsingParentResults is not safe to call concurrently.
 | |
| func (pm *PatternMatcher) MatchesUsingParentResults(file string, parentMatchInfo MatchInfo) (bool, MatchInfo, error) {
 | |
| 	parentMatched := parentMatchInfo.parentMatched
 | |
| 	if len(parentMatched) != 0 && len(parentMatched) != len(pm.patterns) {
 | |
| 		return false, MatchInfo{}, errors.New("wrong number of values in parentMatched")
 | |
| 	}
 | |
| 
 | |
| 	file = filepath.FromSlash(file)
 | |
| 	matched := false
 | |
| 
 | |
| 	matchInfo := MatchInfo{
 | |
| 		parentMatched: make([]bool, len(pm.patterns)),
 | |
| 	}
 | |
| 	for i, pattern := range pm.patterns {
 | |
| 		match := false
 | |
| 		// If the parent matched this pattern, we don't need to recheck.
 | |
| 		if len(parentMatched) != 0 {
 | |
| 			match = parentMatched[i]
 | |
| 		}
 | |
| 
 | |
| 		if !match {
 | |
| 			// Skip evaluation if this is an inclusion and the filename
 | |
| 			// already matched the pattern, or it's an exclusion and it has
 | |
| 			// not matched the pattern yet.
 | |
| 			if pattern.exclusion != matched {
 | |
| 				continue
 | |
| 			}
 | |
| 
 | |
| 			var err error
 | |
| 			match, err = pattern.match(file)
 | |
| 			if err != nil {
 | |
| 				return false, matchInfo, err
 | |
| 			}
 | |
| 
 | |
| 			// If the zero value of MatchInfo was passed in, we don't have
 | |
| 			// any information about the parent dir's match results, and we
 | |
| 			// apply the same logic as MatchesOrParentMatches.
 | |
| 			if !match && len(parentMatched) == 0 {
 | |
| 				if parentPath := filepath.Dir(file); parentPath != "." {
 | |
| 					parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
 | |
| 					// Check to see if the pattern matches one of our parent dirs.
 | |
| 					for i := range parentPathDirs {
 | |
| 						match, _ = pattern.match(strings.Join(parentPathDirs[:i+1], string(os.PathSeparator)))
 | |
| 						if match {
 | |
| 							break
 | |
| 						}
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 		matchInfo.parentMatched[i] = match
 | |
| 
 | |
| 		if match {
 | |
| 			matched = !pattern.exclusion
 | |
| 		}
 | |
| 	}
 | |
| 	return matched, matchInfo, nil
 | |
| }
 | |
| 
 | |
| // Exclusions returns true if any of the patterns define exclusions
 | |
| func (pm *PatternMatcher) Exclusions() bool {
 | |
| 	return pm.exclusions
 | |
| }
 | |
| 
 | |
| // Patterns returns array of active patterns
 | |
| func (pm *PatternMatcher) Patterns() []*Pattern {
 | |
| 	return pm.patterns
 | |
| }
 | |
| 
 | |
| // Pattern defines a single regexp used to filter file paths.
 | |
| type Pattern struct {
 | |
| 	matchType      matchType
 | |
| 	cleanedPattern string
 | |
| 	dirs           []string
 | |
| 	regexp         *regexp.Regexp
 | |
| 	exclusion      bool
 | |
| }
 | |
| 
 | |
| type matchType int
 | |
| 
 | |
| const (
 | |
| 	unknownMatch matchType = iota
 | |
| 	exactMatch
 | |
| 	prefixMatch
 | |
| 	suffixMatch
 | |
| 	regexpMatch
 | |
| )
 | |
| 
 | |
| func (p *Pattern) String() string {
 | |
| 	return p.cleanedPattern
 | |
| }
 | |
| 
 | |
| // Exclusion returns true if this pattern defines exclusion
 | |
| func (p *Pattern) Exclusion() bool {
 | |
| 	return p.exclusion
 | |
| }
 | |
| 
 | |
| func (p *Pattern) match(path string) (bool, error) {
 | |
| 	if p.matchType == unknownMatch {
 | |
| 		if err := p.compile(string(os.PathSeparator)); err != nil {
 | |
| 			return false, filepath.ErrBadPattern
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	switch p.matchType {
 | |
| 	case exactMatch:
 | |
| 		return path == p.cleanedPattern, nil
 | |
| 	case prefixMatch:
 | |
| 		// strip trailing **
 | |
| 		return strings.HasPrefix(path, p.cleanedPattern[:len(p.cleanedPattern)-2]), nil
 | |
| 	case suffixMatch:
 | |
| 		// strip leading **
 | |
| 		suffix := p.cleanedPattern[2:]
 | |
| 		if strings.HasSuffix(path, suffix) {
 | |
| 			return true, nil
 | |
| 		}
 | |
| 		// **/foo matches "foo"
 | |
| 		return suffix[0] == os.PathSeparator && path == suffix[1:], nil
 | |
| 	case regexpMatch:
 | |
| 		return p.regexp.MatchString(path), nil
 | |
| 	}
 | |
| 
 | |
| 	return false, nil
 | |
| }
 | |
| 
 | |
| func (p *Pattern) compile(sl string) error {
 | |
| 	regStr := "^"
 | |
| 	pattern := p.cleanedPattern
 | |
| 	// Go through the pattern and convert it to a regexp.
 | |
| 	// We use a scanner so we can support utf-8 chars.
 | |
| 	var scan scanner.Scanner
 | |
| 	scan.Init(strings.NewReader(pattern))
 | |
| 
 | |
| 	escSL := sl
 | |
| 	if sl == `\` {
 | |
| 		escSL += `\`
 | |
| 	}
 | |
| 
 | |
| 	p.matchType = exactMatch
 | |
| 	for i := 0; scan.Peek() != scanner.EOF; i++ {
 | |
| 		ch := scan.Next()
 | |
| 
 | |
| 		if ch == '*' {
 | |
| 			if scan.Peek() == '*' {
 | |
| 				// is some flavor of "**"
 | |
| 				scan.Next()
 | |
| 
 | |
| 				// Treat **/ as ** so eat the "/"
 | |
| 				if string(scan.Peek()) == sl {
 | |
| 					scan.Next()
 | |
| 				}
 | |
| 
 | |
| 				if scan.Peek() == scanner.EOF {
 | |
| 					// is "**EOF" - to align with .gitignore just accept all
 | |
| 					if p.matchType == exactMatch {
 | |
| 						p.matchType = prefixMatch
 | |
| 					} else {
 | |
| 						regStr += ".*"
 | |
| 						p.matchType = regexpMatch
 | |
| 					}
 | |
| 				} else {
 | |
| 					// is "**"
 | |
| 					// Note that this allows for any # of /'s (even 0) because
 | |
| 					// the .* will eat everything, even /'s
 | |
| 					regStr += "(.*" + escSL + ")?"
 | |
| 					p.matchType = regexpMatch
 | |
| 				}
 | |
| 
 | |
| 				if i == 0 {
 | |
| 					p.matchType = suffixMatch
 | |
| 				}
 | |
| 			} else {
 | |
| 				// is "*" so map it to anything but "/"
 | |
| 				regStr += "[^" + escSL + "]*"
 | |
| 				p.matchType = regexpMatch
 | |
| 			}
 | |
| 		} else if ch == '?' {
 | |
| 			// "?" is any char except "/"
 | |
| 			regStr += "[^" + escSL + "]"
 | |
| 			p.matchType = regexpMatch
 | |
| 		} else if shouldEscape(ch) {
 | |
| 			// Escape some regexp special chars that have no meaning
 | |
| 			// in golang's filepath.Match
 | |
| 			regStr += `\` + string(ch)
 | |
| 		} else if ch == '\\' {
 | |
| 			// escape next char. Note that a trailing \ in the pattern
 | |
| 			// will be left alone (but need to escape it)
 | |
| 			if sl == `\` {
 | |
| 				// On windows map "\" to "\\", meaning an escaped backslash,
 | |
| 				// and then just continue because filepath.Match on
 | |
| 				// Windows doesn't allow escaping at all
 | |
| 				regStr += escSL
 | |
| 				continue
 | |
| 			}
 | |
| 			if scan.Peek() != scanner.EOF {
 | |
| 				regStr += `\` + string(scan.Next())
 | |
| 				p.matchType = regexpMatch
 | |
| 			} else {
 | |
| 				regStr += `\`
 | |
| 			}
 | |
| 		} else if ch == '[' || ch == ']' {
 | |
| 			regStr += string(ch)
 | |
| 			p.matchType = regexpMatch
 | |
| 		} else {
 | |
| 			regStr += string(ch)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if p.matchType != regexpMatch {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	regStr += "$"
 | |
| 
 | |
| 	re, err := regexp.Compile(regStr)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	p.regexp = re
 | |
| 	p.matchType = regexpMatch
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // Matches returns true if file matches any of the patterns
 | |
| // and isn't excluded by any of the subsequent patterns.
 | |
| //
 | |
| // This implementation is buggy (it only checks a single parent dir against the
 | |
| // pattern) and will be removed soon. Use MatchesOrParentMatches instead.
 | |
| func Matches(file string, patterns []string) (bool, error) {
 | |
| 	pm, err := New(patterns)
 | |
| 	if err != nil {
 | |
| 		return false, err
 | |
| 	}
 | |
| 	file = filepath.Clean(file)
 | |
| 
 | |
| 	if file == "." {
 | |
| 		// Don't let them exclude everything, kind of silly.
 | |
| 		return false, nil
 | |
| 	}
 | |
| 
 | |
| 	return pm.Matches(file)
 | |
| }
 | |
| 
 | |
| // MatchesOrParentMatches returns true if file matches any of the patterns
 | |
| // and isn't excluded by any of the subsequent patterns.
 | |
| func MatchesOrParentMatches(file string, patterns []string) (bool, error) {
 | |
| 	pm, err := New(patterns)
 | |
| 	if err != nil {
 | |
| 		return false, err
 | |
| 	}
 | |
| 	file = filepath.Clean(file)
 | |
| 
 | |
| 	if file == "." {
 | |
| 		// Don't let them exclude everything, kind of silly.
 | |
| 		return false, nil
 | |
| 	}
 | |
| 
 | |
| 	return pm.MatchesOrParentMatches(file)
 | |
| }
 |