diff --git a/components/engine/builder/dockerfile/shell_parser.go b/components/engine/builder/dockerfile/shell_parser.go index 7ef1773523..7425ba4e86 100644 --- a/components/engine/builder/dockerfile/shell_parser.go +++ b/components/engine/builder/dockerfile/shell_parser.go @@ -9,13 +9,15 @@ package dockerfile import ( "fmt" "strings" + "text/scanner" "unicode" ) type shellWord struct { - word string - envs []string - pos int + word string + scanner scanner.Scanner + envs []string + pos int } // ProcessWord will use the 'env' list of environment variables, @@ -26,11 +28,12 @@ func ProcessWord(word string, env []string) (string, error) { envs: env, pos: 0, } + sw.scanner.Init(strings.NewReader(word)) return sw.process() } func (sw *shellWord) process() (string, error) { - return sw.processStopOn('\000') + return sw.processStopOn(scanner.EOF) } // Process the word, starting at 'pos', and stop when we get to the @@ -43,10 +46,11 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) { '$': sw.processDollar, } - for sw.pos < len(sw.word) { - ch := sw.peek() - if stopChar != '\000' && ch == stopChar { - sw.next() + for sw.scanner.Peek() != scanner.EOF { + ch := sw.scanner.Peek() + + if stopChar != scanner.EOF && ch == stopChar { + sw.scanner.Next() break } if fn, ok := charFuncMapping[ch]; ok { @@ -58,14 +62,19 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) { result += tmp } else { // Not special, just add it to the result - ch = sw.next() + ch = sw.scanner.Next() + if ch == '\\' { // '\' escapes, except end of line - ch = sw.next() - if ch == '\000' { - continue + + ch = sw.scanner.Next() + + if ch == scanner.EOF { + break } + } + result += string(ch) } } @@ -73,36 +82,21 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, error) { return result, nil } -func (sw *shellWord) peek() rune { - if sw.pos == len(sw.word) { - return '\000' - } - return rune(sw.word[sw.pos]) -} - -func (sw *shellWord) next() rune { - if sw.pos == len(sw.word) { - return '\000' - } - ch := rune(sw.word[sw.pos]) - sw.pos++ - return ch -} - func (sw *shellWord) processSingleQuote() (string, error) { // All chars between single quotes are taken as-is // Note, you can't escape ' var result string - sw.next() + sw.scanner.Next() for { - ch := sw.next() - if ch == '\000' || ch == '\'' { + ch := sw.scanner.Next() + if ch == '\'' || ch == scanner.EOF { break } result += string(ch) } + return result, nil } @@ -111,12 +105,12 @@ func (sw *shellWord) processDoubleQuote() (string, error) { // But you can escape " with a \ var result string - sw.next() + sw.scanner.Next() - for sw.pos < len(sw.word) { - ch := sw.peek() + for sw.scanner.Peek() != scanner.EOF { + ch := sw.scanner.Peek() if ch == '"' { - sw.next() + sw.scanner.Next() break } if ch == '$' { @@ -126,18 +120,18 @@ func (sw *shellWord) processDoubleQuote() (string, error) { } result += tmp } else { - ch = sw.next() + ch = sw.scanner.Next() if ch == '\\' { - chNext := sw.peek() + chNext := sw.scanner.Peek() - if chNext == '\000' { + if chNext == scanner.EOF { // Ignore \ at end of word continue } if chNext == '"' || chNext == '$' { // \" and \$ can be escaped, all other \'s are left as-is - ch = sw.next() + ch = sw.scanner.Next() } } result += string(ch) @@ -148,23 +142,23 @@ func (sw *shellWord) processDoubleQuote() (string, error) { } func (sw *shellWord) processDollar() (string, error) { - sw.next() - ch := sw.peek() + sw.scanner.Next() + ch := sw.scanner.Peek() if ch == '{' { - sw.next() + sw.scanner.Next() name := sw.processName() - ch = sw.peek() + ch = sw.scanner.Peek() if ch == '}' { // Normal ${xx} case - sw.next() + sw.scanner.Next() return sw.getEnv(name), nil } if ch == ':' { // Special ${xx:...} format processing // Yes it allows for recursive $'s in the ... spot - sw.next() // skip over : - modifier := sw.next() + sw.scanner.Next() // skip over : + modifier := sw.scanner.Next() word, err := sw.processStopOn('}') if err != nil { @@ -207,16 +201,16 @@ func (sw *shellWord) processName() string { // If it starts with a numeric then just return $# var name string - for sw.pos < len(sw.word) { - ch := sw.peek() + for sw.scanner.Peek() != scanner.EOF { + ch := sw.scanner.Peek() if len(name) == 0 && unicode.IsDigit(ch) { - ch = sw.next() + ch = sw.scanner.Next() return string(ch) } if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' { break } - ch = sw.next() + ch = sw.scanner.Next() name += string(ch) } diff --git a/components/engine/builder/dockerfile/shell_parser_test.go b/components/engine/builder/dockerfile/shell_parser_test.go index 6568e39418..ed6a1d109c 100644 --- a/components/engine/builder/dockerfile/shell_parser_test.go +++ b/components/engine/builder/dockerfile/shell_parser_test.go @@ -15,7 +15,7 @@ func TestShellParser(t *testing.T) { defer file.Close() scanner := bufio.NewScanner(file) - envs := []string{"PWD=/home", "SHELL=bash"} + envs := []string{"PWD=/home", "SHELL=bash", "KOREAN=한국어"} for scanner.Scan() { line := scanner.Text() diff --git a/components/engine/builder/dockerfile/words b/components/engine/builder/dockerfile/words index 1114a7e466..1a7fe975a7 100644 --- a/components/engine/builder/dockerfile/words +++ b/components/engine/builder/dockerfile/words @@ -56,3 +56,57 @@ he${PWD:=000}xx | error he${PWD:+${PWD}:}xx | he/home:xx he${XXX:-\$PWD:}xx | he$PWD:xx he${XXX:-\${PWD}z}xx | he${PWDz}xx +안녕하세요 | 안녕하세요 +안'녕'하세요 | 안녕하세요 +안'녕하세요 | 안녕하세요 +안녕\'하세요 | 안녕'하세요 +안\\'녕하세요 | 안\녕하세요 +안녕\t하세요 | 안녕t하세요 +"안녕\t하세요" | 안녕\t하세요 +'안녕\t하세요 | 안녕\t하세요 +안녕하세요\ | 안녕하세요 +안녕하세요\\ | 안녕하세요\ +"안녕하세요 | 안녕하세요 +"안녕하세요\" | 안녕하세요" +"안녕'하세요" | 안녕'하세요 +'안녕하세요 | 안녕하세요 +'안녕하세요\' | 안녕하세요\ +안녕$1x | 안녕x +안녕$.x | 안녕$.x +안녕$pwd. | 안녕. +안녕$PWD | 안녕/home +안녕\$PWD | 안녕$PWD +안녕\\$PWD | 안녕\/home +안녕\${} | 안녕${} +안녕\${}xx | 안녕${}xx +안녕${} | 안녕 +안녕${}xx | 안녕xx +안녕${hi} | 안녕 +안녕${hi}xx | 안녕xx +안녕${PWD} | 안녕/home +안녕${.} | error +안녕${XXX:-000}xx | 안녕000xx +안녕${PWD:-000}xx | 안녕/homexx +안녕${XXX:-$PWD}xx | 안녕/homexx +안녕${XXX:-${PWD:-yyy}}xx | 안녕/homexx +안녕${XXX:-${YYY:-yyy}}xx | 안녕yyyxx +안녕${XXX:YYY} | error +안녕${XXX:+${PWD}}xx | 안녕xx +안녕${PWD:+${XXX}}xx | 안녕xx +안녕${PWD:+${SHELL}}xx | 안녕bashxx +안녕${XXX:+000}xx | 안녕xx +안녕${PWD:+000}xx | 안녕000xx +'안녕${XX}' | 안녕${XX} +"안녕${PWD}" | 안녕/home +"안녕'$PWD'" | 안녕'/home' +'"안녕"' | "안녕" +안녕\$PWD | 안녕$PWD +"안녕\$PWD" | 안녕$PWD +'안녕\$PWD' | 안녕\$PWD +안녕${PWD | error +안녕${PWD:=000}xx | error +안녕${PWD:+${PWD}:}xx | 안녕/home:xx +안녕${XXX:-\$PWD:}xx | 안녕$PWD:xx +안녕${XXX:-\${PWD}z}xx | 안녕${PWDz}xx +$KOREAN | 한국어 +안녕$KOREAN | 안녕한국어