This commit is contained in:
415
vendor/github.com/google/shlex/shlex.go
generated
vendored
Normal file
415
vendor/github.com/google/shlex/shlex.go
generated
vendored
Normal file
@ -0,0 +1,415 @@
|
||||
/*
|
||||
Copyright 2012 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
Package shlex implements a simple lexer which splits input in to tokens using
|
||||
shell-style rules for quoting and commenting.
|
||||
|
||||
The basic use case uses the default ASCII lexer to split a string into sub-strings:
|
||||
|
||||
shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"}
|
||||
|
||||
To process a stream of strings:
|
||||
|
||||
l := NewLexer(os.Stdin)
|
||||
for ; token, err := l.Next(); err != nil {
|
||||
// process token
|
||||
}
|
||||
|
||||
To access the raw token stream (which includes tokens for comments):
|
||||
|
||||
t := NewTokenizer(os.Stdin)
|
||||
for ; token, err := t.Next(); err != nil {
|
||||
// process token
|
||||
}
|
||||
*/
|
||||
package shlex
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// TokenType is a top-level token classification: A word, space, comment, unknown.
|
||||
type TokenType int
|
||||
|
||||
// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
|
||||
type runeTokenClass int
|
||||
|
||||
// the internal state used by the lexer state machine
|
||||
type lexerState int
|
||||
|
||||
// Token is a (type, value) pair representing a lexographical token.
|
||||
type Token struct {
|
||||
tokenType TokenType
|
||||
value string
|
||||
}
|
||||
|
||||
// Equal reports whether tokens a, and b, are equal.
|
||||
// Two tokens are equal if both their types and values are equal. A nil token can
|
||||
// never be equal to another token.
|
||||
func (a *Token) Equal(b *Token) bool {
|
||||
if a == nil || b == nil {
|
||||
return false
|
||||
}
|
||||
if a.tokenType != b.tokenType {
|
||||
return false
|
||||
}
|
||||
return a.value == b.value
|
||||
}
|
||||
|
||||
// Named classes of UTF-8 runes
|
||||
const (
|
||||
spaceRunes = " \t\r\n"
|
||||
escapingQuoteRunes = `"`
|
||||
nonEscapingQuoteRunes = "'"
|
||||
escapeRunes = `\`
|
||||
commentRunes = "#"
|
||||
)
|
||||
|
||||
// Classes of rune token
|
||||
const (
|
||||
unknownRuneClass runeTokenClass = iota
|
||||
spaceRuneClass
|
||||
escapingQuoteRuneClass
|
||||
nonEscapingQuoteRuneClass
|
||||
escapeRuneClass
|
||||
commentRuneClass
|
||||
eofRuneClass
|
||||
)
|
||||
|
||||
// Classes of lexographic token
|
||||
const (
|
||||
UnknownToken TokenType = iota
|
||||
WordToken
|
||||
SpaceToken
|
||||
CommentToken
|
||||
)
|
||||
|
||||
// Lexer state machine states
|
||||
const (
|
||||
startState lexerState = iota // no runes have been seen
|
||||
inWordState // processing regular runes in a word
|
||||
escapingState // we have just consumed an escape rune; the next rune is literal
|
||||
escapingQuotedState // we have just consumed an escape rune within a quoted string
|
||||
quotingEscapingState // we are within a quoted string that supports escaping ("...")
|
||||
quotingState // we are within a string that does not support escaping ('...')
|
||||
commentState // we are within a comment (everything following an unquoted or unescaped #
|
||||
)
|
||||
|
||||
// tokenClassifier is used for classifying rune characters.
|
||||
type tokenClassifier map[rune]runeTokenClass
|
||||
|
||||
func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenClass) {
|
||||
for _, runeChar := range runes {
|
||||
typeMap[runeChar] = tokenType
|
||||
}
|
||||
}
|
||||
|
||||
// newDefaultClassifier creates a new classifier for ASCII characters.
|
||||
func newDefaultClassifier() tokenClassifier {
|
||||
t := tokenClassifier{}
|
||||
t.addRuneClass(spaceRunes, spaceRuneClass)
|
||||
t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
|
||||
t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
|
||||
t.addRuneClass(escapeRunes, escapeRuneClass)
|
||||
t.addRuneClass(commentRunes, commentRuneClass)
|
||||
return t
|
||||
}
|
||||
|
||||
// ClassifyRune classifiees a rune
|
||||
func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
|
||||
return t[runeVal]
|
||||
}
|
||||
|
||||
// Lexer turns an input stream into a sequence of tokens. Whitespace and comments are skipped.
|
||||
type Lexer Tokenizer
|
||||
|
||||
// NewLexer creates a new lexer from an input stream.
|
||||
func NewLexer(r io.Reader) *Lexer {
|
||||
|
||||
return (*Lexer)(NewTokenizer(r))
|
||||
}
|
||||
|
||||
// Next returns the next word, or an error. If there are no more words,
|
||||
// the error will be io.EOF.
|
||||
func (l *Lexer) Next() (string, error) {
|
||||
for {
|
||||
token, err := (*Tokenizer)(l).Next()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
switch token.tokenType {
|
||||
case WordToken:
|
||||
return token.value, nil
|
||||
case CommentToken:
|
||||
// skip comments
|
||||
default:
|
||||
return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tokenizer turns an input stream into a sequence of typed tokens
|
||||
type Tokenizer struct {
|
||||
input bufio.Reader
|
||||
classifier tokenClassifier
|
||||
}
|
||||
|
||||
// NewTokenizer creates a new tokenizer from an input stream.
|
||||
func NewTokenizer(r io.Reader) *Tokenizer {
|
||||
input := bufio.NewReader(r)
|
||||
classifier := newDefaultClassifier()
|
||||
return &Tokenizer{
|
||||
input: *input,
|
||||
classifier: classifier}
|
||||
}
|
||||
|
||||
// scanStream scans the stream for the next token using the internal state machine.
|
||||
// It will panic if it encounters a rune which it does not know how to handle.
|
||||
func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
state := startState
|
||||
var tokenType TokenType
|
||||
var value []rune
|
||||
var nextRune rune
|
||||
var nextRuneType runeTokenClass
|
||||
var err error
|
||||
|
||||
for {
|
||||
nextRune, _, err = t.input.ReadRune()
|
||||
nextRuneType = t.classifier.ClassifyRune(nextRune)
|
||||
|
||||
if err == io.EOF {
|
||||
nextRuneType = eofRuneClass
|
||||
err = nil
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch state {
|
||||
case startState: // no runes read yet
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
return nil, io.EOF
|
||||
}
|
||||
case spaceRuneClass:
|
||||
{
|
||||
}
|
||||
case escapingQuoteRuneClass:
|
||||
{
|
||||
tokenType = WordToken
|
||||
state = quotingEscapingState
|
||||
}
|
||||
case nonEscapingQuoteRuneClass:
|
||||
{
|
||||
tokenType = WordToken
|
||||
state = quotingState
|
||||
}
|
||||
case escapeRuneClass:
|
||||
{
|
||||
tokenType = WordToken
|
||||
state = escapingState
|
||||
}
|
||||
case commentRuneClass:
|
||||
{
|
||||
tokenType = CommentToken
|
||||
state = commentState
|
||||
}
|
||||
default:
|
||||
{
|
||||
tokenType = WordToken
|
||||
value = append(value, nextRune)
|
||||
state = inWordState
|
||||
}
|
||||
}
|
||||
}
|
||||
case inWordState: // in a regular word
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: string(value)}
|
||||
return token, err
|
||||
}
|
||||
case spaceRuneClass:
|
||||
{
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: string(value)}
|
||||
return token, err
|
||||
}
|
||||
case escapingQuoteRuneClass:
|
||||
{
|
||||
state = quotingEscapingState
|
||||
}
|
||||
case nonEscapingQuoteRuneClass:
|
||||
{
|
||||
state = quotingState
|
||||
}
|
||||
case escapeRuneClass:
|
||||
{
|
||||
state = escapingState
|
||||
}
|
||||
default:
|
||||
{
|
||||
value = append(value, nextRune)
|
||||
}
|
||||
}
|
||||
}
|
||||
case escapingState: // the rune after an escape character
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = fmt.Errorf("EOF found after escape character")
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: string(value)}
|
||||
return token, err
|
||||
}
|
||||
default:
|
||||
{
|
||||
state = inWordState
|
||||
value = append(value, nextRune)
|
||||
}
|
||||
}
|
||||
}
|
||||
case escapingQuotedState: // the next rune after an escape character, in double quotes
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = fmt.Errorf("EOF found after escape character")
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: string(value)}
|
||||
return token, err
|
||||
}
|
||||
default:
|
||||
{
|
||||
state = quotingEscapingState
|
||||
value = append(value, nextRune)
|
||||
}
|
||||
}
|
||||
}
|
||||
case quotingEscapingState: // in escaping double quotes
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = fmt.Errorf("EOF found when expecting closing quote")
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: string(value)}
|
||||
return token, err
|
||||
}
|
||||
case escapingQuoteRuneClass:
|
||||
{
|
||||
state = inWordState
|
||||
}
|
||||
case escapeRuneClass:
|
||||
{
|
||||
state = escapingQuotedState
|
||||
}
|
||||
default:
|
||||
{
|
||||
value = append(value, nextRune)
|
||||
}
|
||||
}
|
||||
}
|
||||
case quotingState: // in non-escaping single quotes
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = fmt.Errorf("EOF found when expecting closing quote")
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: string(value)}
|
||||
return token, err
|
||||
}
|
||||
case nonEscapingQuoteRuneClass:
|
||||
{
|
||||
state = inWordState
|
||||
}
|
||||
default:
|
||||
{
|
||||
value = append(value, nextRune)
|
||||
}
|
||||
}
|
||||
}
|
||||
case commentState: // in a comment
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: string(value)}
|
||||
return token, err
|
||||
}
|
||||
case spaceRuneClass:
|
||||
{
|
||||
if nextRune == '\n' {
|
||||
state = startState
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: string(value)}
|
||||
return token, err
|
||||
} else {
|
||||
value = append(value, nextRune)
|
||||
}
|
||||
}
|
||||
default:
|
||||
{
|
||||
value = append(value, nextRune)
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
{
|
||||
return nil, fmt.Errorf("Unexpected state: %v", state)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Next returns the next token in the stream.
|
||||
func (t *Tokenizer) Next() (*Token, error) {
|
||||
return t.scanStream()
|
||||
}
|
||||
|
||||
// Split partitions a string into a slice of strings.
|
||||
func Split(s string) ([]string, error) {
|
||||
l := NewLexer(strings.NewReader(s))
|
||||
subStrings := make([]string, 0)
|
||||
for {
|
||||
word, err := l.Next()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
return subStrings, nil
|
||||
}
|
||||
return subStrings, err
|
||||
}
|
||||
subStrings = append(subStrings, word)
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user