From 1a516054070c07fedf44a5aae17c15599ecc7e77 Mon Sep 17 00:00:00 2001 From: Jonathan Stoppani Date: Wed, 8 Jun 2016 13:55:26 +0200 Subject: [PATCH] Support unicode characters in parseWords Signed-off-by: Jonathan Stoppani Upstream-commit: 6284f04a6b3a9b8127dc166ff84b102c8ad411ce Component: engine --- .../builder/dockerfile/parser/line_parsers.go | 19 ++++++++++--------- .../builder/dockerfile/parser/parser_test.go | 12 ++++++++++++ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/components/engine/builder/dockerfile/parser/line_parsers.go b/components/engine/builder/dockerfile/parser/line_parsers.go index ddd92dd416..5f484e4999 100644 --- a/components/engine/builder/dockerfile/parser/line_parsers.go +++ b/components/engine/builder/dockerfile/parser/line_parsers.go @@ -12,6 +12,7 @@ import ( "fmt" "strings" "unicode" + "unicode/utf8" ) var ( @@ -58,10 +59,11 @@ func parseWords(rest string) []string { quote := '\000' blankOK := false var ch rune + var chWidth int - for pos := 0; pos <= len(rest); pos++ { + for pos := 0; pos <= len(rest); pos += chWidth { if pos != len(rest) { - ch = rune(rest[pos]) + ch, chWidth = utf8.DecodeRuneInString(rest[pos:]) } if phase == inSpaces { // Looking for start of word @@ -95,15 +97,15 @@ func parseWords(rest string) []string { phase = inQuote } if ch == tokenEscape { - if pos+1 == len(rest) { + if pos+chWidth == len(rest) { continue // just skip an escape token at end of line } // If we're not quoted and we see an escape token, then always just // add the escape token plus the char to the word, even if the char // is a quote. word += string(ch) - pos++ - ch = rune(rest[pos]) + pos += chWidth + ch, chWidth = utf8.DecodeRuneInString(rest[pos:]) } word += string(ch) continue @@ -114,14 +116,13 @@ func parseWords(rest string) []string { } // The escape token is special except for ' quotes - can't escape anything for ' if ch == tokenEscape && quote != '\'' { - if pos+1 == len(rest) { + if pos+chWidth == len(rest) { phase = inWord continue // just skip the escape token at end } - pos++ - nextCh := rune(rest[pos]) + pos += chWidth word += string(ch) - ch = nextCh + ch, chWidth = utf8.DecodeRuneInString(rest[pos:]) } word += string(ch) } diff --git a/components/engine/builder/dockerfile/parser/parser_test.go b/components/engine/builder/dockerfile/parser/parser_test.go index 4025186ba6..1f5aaf5a6a 100644 --- a/components/engine/builder/dockerfile/parser/parser_test.go +++ b/components/engine/builder/dockerfile/parser/parser_test.go @@ -92,6 +92,10 @@ func TestParseWords(t *testing.T) { "input": {"foo bar"}, "expect": {"foo", "bar"}, }, + { + "input": {"foo\\ bar"}, + "expect": {"foo\\ bar"}, + }, { "input": {"foo=bar"}, "expect": {"foo=bar"}, @@ -104,6 +108,14 @@ func TestParseWords(t *testing.T) { "input": {`foo bar "abc xyz"`}, "expect": {"foo", "bar", `"abc xyz"`}, }, + { + "input": {"àöû"}, + "expect": {"àöû"}, + }, + { + "input": {`föo bàr "âbc xÿz"`}, + "expect": {"föo", "bàr", `"âbc xÿz"`}, + }, } for _, test := range tests {