From 1a516054070c07fedf44a5aae17c15599ecc7e77 Mon Sep 17 00:00:00 2001
From: Jonathan Stoppani <jonathan.stoppani@divio.com>
Date: Wed, 8 Jun 2016 13:55:26 +0200
Subject: [PATCH] Support unicode characters in parseWords

Signed-off-by: Jonathan Stoppani <jonathan.stoppani@divio.com>
Upstream-commit: 6284f04a6b3a9b8127dc166ff84b102c8ad411ce
Component: engine
---
 .../builder/dockerfile/parser/line_parsers.go | 19 ++++++++++---------
 .../builder/dockerfile/parser/parser_test.go  | 12 ++++++++++++
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/components/engine/builder/dockerfile/parser/line_parsers.go b/components/engine/builder/dockerfile/parser/line_parsers.go
index ddd92dd416..5f484e4999 100644
--- a/components/engine/builder/dockerfile/parser/line_parsers.go
+++ b/components/engine/builder/dockerfile/parser/line_parsers.go
@@ -12,6 +12,7 @@ import (
 	"fmt"
 	"strings"
 	"unicode"
+	"unicode/utf8"
 )
 
 var (
@@ -58,10 +59,11 @@ func parseWords(rest string) []string {
 	quote := '\000'
 	blankOK := false
 	var ch rune
+	var chWidth int
 
-	for pos := 0; pos <= len(rest); pos++ {
+	for pos := 0; pos <= len(rest); pos += chWidth {
 		if pos != len(rest) {
-			ch = rune(rest[pos])
+			ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
 		}
 
 		if phase == inSpaces { // Looking for start of word
@@ -95,15 +97,15 @@ func parseWords(rest string) []string {
 				phase = inQuote
 			}
 			if ch == tokenEscape {
-				if pos+1 == len(rest) {
+				if pos+chWidth == len(rest) {
 					continue // just skip an escape token at end of line
 				}
 				// If we're not quoted and we see an escape token, then always just
 				// add the escape token plus the char to the word, even if the char
 				// is a quote.
 				word += string(ch)
-				pos++
-				ch = rune(rest[pos])
+				pos += chWidth
+				ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
 			}
 			word += string(ch)
 			continue
@@ -114,14 +116,13 @@ func parseWords(rest string) []string {
 			}
 			// The escape token is special except for ' quotes - can't escape anything for '
 			if ch == tokenEscape && quote != '\'' {
-				if pos+1 == len(rest) {
+				if pos+chWidth == len(rest) {
 					phase = inWord
 					continue // just skip the escape token at end
 				}
-				pos++
-				nextCh := rune(rest[pos])
+				pos += chWidth
 				word += string(ch)
-				ch = nextCh
+				ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
 			}
 			word += string(ch)
 		}
diff --git a/components/engine/builder/dockerfile/parser/parser_test.go b/components/engine/builder/dockerfile/parser/parser_test.go
index 4025186ba6..1f5aaf5a6a 100644
--- a/components/engine/builder/dockerfile/parser/parser_test.go
+++ b/components/engine/builder/dockerfile/parser/parser_test.go
@@ -92,6 +92,10 @@ func TestParseWords(t *testing.T) {
 			"input":  {"foo bar"},
 			"expect": {"foo", "bar"},
 		},
+		{
+			"input":  {"foo\\ bar"},
+			"expect": {"foo\\ bar"},
+		},
 		{
 			"input":  {"foo=bar"},
 			"expect": {"foo=bar"},
@@ -104,6 +108,14 @@ func TestParseWords(t *testing.T) {
 			"input":  {`foo bar "abc xyz"`},
 			"expect": {"foo", "bar", `"abc xyz"`},
 		},
+		{
+			"input":  {"àöû"},
+			"expect": {"àöû"},
+		},
+		{
+			"input":  {`föo bàr "âbc xÿz"`},
+			"expect": {"föo", "bàr", `"âbc xÿz"`},
+		},
 	}
 
 	for _, test := range tests {