Fix bug where internal unquoted whitespace truncates values (#205)

* Add tests to cover the regression reported in #204 * Add a comment on regex for clarity * Remove some old code that wasn't doing anything * Push _all_ parse code into the parser and get tests calling live code * Add some newline specific tests * Add some YAML tests for the newline/space split bug * Fix incorrect terminating of lines on whitespace * Fix most of the parser regressions * Bring back FOO.BAR names * remove some commented out code
2025-06-25 20:34:02 +00:00 · 2023-02-06 08:47:38 +11:00
parent b311b2657d
commit 3fc4292b58
4 changed files with 135 additions and 143 deletions
--- a/parser.go
+++ b/parser.go
@ -4,6 +4,7 @@ import (
 	"bytes"
 	"errors"
 	"fmt"
+	"regexp"
 	"strings"
 	"unicode"
 )
@ -69,7 +70,13 @@ func getStatementStart(src []byte) []byte {
 // locateKeyName locates and parses key name and returns rest of slice
 func locateKeyName(src []byte) (key string, cutset []byte, err error) {
 	// trim "export" and space at beginning
-	src = bytes.TrimLeftFunc(bytes.TrimPrefix(src, []byte(exportPrefix)), isSpace)
+	src = bytes.TrimLeftFunc(src, isSpace)
+	if bytes.HasPrefix(src, []byte(exportPrefix)) {
+		trimmed := bytes.TrimPrefix(src, []byte(exportPrefix))
+		if bytes.IndexFunc(trimmed, isSpace) == 0 {
+			src = bytes.TrimLeftFunc(trimmed, isSpace)
+		}
+	}

 	// locate key name end and validate it in single loop
 	offset := 0
@ -88,8 +95,8 @@ loop:
 			break loop
 		case '_':
 		default:
-			// variable name should match [A-Za-z0-9_]
-			if unicode.IsLetter(rchar) || unicode.IsNumber(rchar) {
+			// variable name should match [A-Za-z0-9_.]
+			if unicode.IsLetter(rchar) || unicode.IsNumber(rchar) || rchar == '.' {
 				continue
 			}

@ -113,13 +120,41 @@ loop:
 func extractVarValue(src []byte, vars map[string]string) (value string, rest []byte, err error) {
 	quote, hasPrefix := hasQuotePrefix(src)
 	if !hasPrefix {
-		// unquoted value - read until whitespace
-		end := bytes.IndexFunc(src, unicode.IsSpace)
-		if end == -1 {
-			return expandVariables(string(src), vars), nil, nil
+		// unquoted value - read until end of line
+		endOfLine := bytes.IndexFunc(src, isLineEnd)
+
+		// Hit EOF without a trailing newline
+		if endOfLine == -1 {
+			endOfLine = len(src)
+
+			if endOfLine == 0 {
+				return "", nil, nil
+			}
 		}

-		return expandVariables(string(src[0:end]), vars), src[end:], nil
+		// Convert line to rune away to do accurate countback of runes
+		line := []rune(string(src[0:endOfLine]))
+
+		// Assume end of line is end of var
+		endOfVar := len(line)
+		if endOfVar == 0 {
+			return "", src[endOfLine:], nil
+		}
+
+		// Work backwards to check if the line ends in whitespace then
+		// a comment (ie asdasd # some comment)
+		for i := endOfVar - 1; i >= 0; i-- {
+			if line[i] == charComment && i > 0 {
+				if isSpace(line[i-1]) {
+					endOfVar = i
+					break
+				}
+			}
+		}
+
+		trimmed := strings.TrimFunc(string(line[0:endOfVar]), isSpace)
+
+		return expandVariables(trimmed, vars), src[endOfLine:], nil
 	}

 	// lookup quoted string terminator
@ -205,3 +240,32 @@ func isSpace(r rune) bool {
 	}
 	return false
 }
+
+func isLineEnd(r rune) bool {
+	if r == '\n' || r == '\r' {
+		return true
+	}
+	return false
+}
+
+var (
+	escapeRegex        = regexp.MustCompile(`\\.`)
+	expandVarRegex     = regexp.MustCompile(`(\\)?(\$)(\()?\{?([A-Z0-9_]+)?\}?`)
+	unescapeCharsRegex = regexp.MustCompile(`\\([^$])`)
+)
+
+func expandVariables(v string, m map[string]string) string {
+	return expandVarRegex.ReplaceAllStringFunc(v, func(s string) string {
+		submatch := expandVarRegex.FindStringSubmatch(s)
+
+		if submatch == nil {
+			return s
+		}
+		if submatch[1] == "\\" || submatch[2] == "(" {
+			return submatch[0][1:]
+		} else if submatch[4] != "" {
+			return m[submatch[4]]
+		}
+		return s
+	})
+}