path: root/vendor/github.com/graphql-go/graphql/language/lexer/lexer.go



package lexer

import (
	"bytes"
	"fmt"
	"regexp"
	"strings"
	"unicode/utf8"

	"github.com/graphql-go/graphql/gqlerrors"
	"github.com/graphql-go/graphql/language/source"
)

const (
	EOF = iota + 1
	BANG
	DOLLAR
	PAREN_L
	PAREN_R
	SPREAD
	COLON
	EQUALS
	AT
	BRACKET_L
	BRACKET_R
	BRACE_L
	PIPE
	BRACE_R
	NAME
	INT
	FLOAT
	STRING
	BLOCK_STRING
)

var TokenKind map[int]int
var tokenDescription map[int]string

func init() {
	TokenKind = make(map[int]int)
	tokenDescription = make(map[int]string)
	TokenKind[EOF] = EOF
	TokenKind[BANG] = BANG
	TokenKind[DOLLAR] = DOLLAR
	TokenKind[PAREN_L] = PAREN_L
	TokenKind[PAREN_R] = PAREN_R
	TokenKind[SPREAD] = SPREAD
	TokenKind[COLON] = COLON
	TokenKind[EQUALS] = EQUALS
	TokenKind[AT] = AT
	TokenKind[BRACKET_L] = BRACKET_L
	TokenKind[BRACKET_R] = BRACKET_R
	TokenKind[BRACE_L] = BRACE_L
	TokenKind[PIPE] = PIPE
	TokenKind[BRACE_R] = BRACE_R
	TokenKind[NAME] = NAME
	TokenKind[INT] = INT
	TokenKind[FLOAT] = FLOAT
	TokenKind[STRING] = STRING
	TokenKind[BLOCK_STRING] = BLOCK_STRING
	tokenDescription[TokenKind[EOF]] = "EOF"
	tokenDescription[TokenKind[BANG]] = "!"
	tokenDescription[TokenKind[DOLLAR]] = "$"
	tokenDescription[TokenKind[PAREN_L]] = "("
	tokenDescription[TokenKind[PAREN_R]] = ")"
	tokenDescription[TokenKind[SPREAD]] = "..."
	tokenDescription[TokenKind[COLON]] = ":"
	tokenDescription[TokenKind[EQUALS]] = "="
	tokenDescription[TokenKind[AT]] = "@"
	tokenDescription[TokenKind[BRACKET_L]] = "["
	tokenDescription[TokenKind[BRACKET_R]] = "]"
	tokenDescription[TokenKind[BRACE_L]] = "{"
	tokenDescription[TokenKind[PIPE]] = "|"
	tokenDescription[TokenKind[BRACE_R]] = "}"
	tokenDescription[TokenKind[NAME]] = "Name"
	tokenDescription[TokenKind[INT]] = "Int"
	tokenDescription[TokenKind[FLOAT]] = "Float"
	tokenDescription[TokenKind[STRING]] = "String"
	tokenDescription[TokenKind[BLOCK_STRING]] = "BlockString"
}

// Token is a representation of a lexed Token. Value only appears for non-punctuation
// tokens: NAME, INT, FLOAT, and STRING.
type Token struct {
	Kind  int
	Start int
	End   int
	Value string
}

type Lexer func(resetPosition int) (Token, error)

func Lex(s *source.Source) Lexer {
	var prevPosition int
	return func(resetPosition int) (Token, error) {
		if resetPosition == 0 {
			resetPosition = prevPosition
		}
		token, err := readToken(s, resetPosition)
		if err != nil {
			return token, err
		}
		prevPosition = token.End
		return token, nil
	}
}

// Reads an alphanumeric + underscore name from the source.
// [_A-Za-z][_0-9A-Za-z]*
// position: Points to the byte position in the byte array
// runePosition: Points to the rune position in the byte array
func readName(source *source.Source, position, runePosition int) Token {
	body := source.Body
	bodyLength := len(body)
	endByte := position + 1
	endRune := runePosition + 1
	for {
		code, _ := runeAt(body, endByte)
		if (endByte != bodyLength) &&
			(code == '_' || // _
				code >= '0' && code <= '9' || // 0-9
				code >= 'A' && code <= 'Z' || // A-Z
				code >= 'a' && code <= 'z') { // a-z
			endByte++
			endRune++
			continue
		} else {
			break
		}
	}
	return makeToken(TokenKind[NAME], runePosition, endRune, string(body[position:endByte]))
}

// Reads a number token from the source file, either a float
// or an int depending on whether a decimal point appears.
// Int:   -?(0|[1-9][0-9]*)
// Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
func readNumber(s *source.Source, start int, firstCode rune, codeLength int) (Token, error) {
	code := firstCode
	body := s.Body
	position := start
	isFloat := false
	if code == '-' { // -
		position += codeLength
		code, codeLength = runeAt(body, position)
	}
	if code == '0' { // 0
		position += codeLength
		code, codeLength = runeAt(body, position)
		if code >= '0' && code <= '9' {
			description := fmt.Sprintf("Invalid number, unexpected digit after 0: %v.", printCharCode(code))
			return Token{}, gqlerrors.NewSyntaxError(s, position, description)
		}
	} else {
		p, err := readDigits(s, position, code, codeLength)
		if err != nil {
			return Token{}, err
		}
		position = p
		code, codeLength = runeAt(body, position)
	}
	if code == '.' { // .
		isFloat = true
		position += codeLength
		code, codeLength = runeAt(body, position)
		p, err := readDigits(s, position, code, codeLength)
		if err != nil {
			return Token{}, err
		}
		position = p
		code, codeLength = runeAt(body, position)
	}
	if code == 'E' || code == 'e' { // E e
		isFloat = true
		position += codeLength
		code, codeLength = runeAt(body, position)
		if code == '+' || code == '-' { // + -
			position += codeLength
			code, codeLength = runeAt(body, position)
		}
		p, err := readDigits(s, position, code, codeLength)
		if err != nil {
			return Token{}, err
		}
		position = p
	}
	kind := TokenKind[INT]
	if isFloat {
		kind = TokenKind[FLOAT]
	}

	return makeToken(kind, start, position, string(body[start:position])), nil
}

// Returns the new position in the source after reading digits.
func readDigits(s *source.Source, start int, firstCode rune, codeLength int) (int, error) {
	body := s.Body
	position := start
	code := firstCode
	if code >= '0' && code <= '9' { // 0 - 9
		for {
			if code >= '0' && code <= '9' { // 0 - 9
				position += codeLength
				code, codeLength = runeAt(body, position)
				continue
			} else {
				break
			}
		}
		return position, nil
	}
	var description string
	description = fmt.Sprintf("Invalid number, expected digit but got: %v.", printCharCode(code))
	return position, gqlerrors.NewSyntaxError(s, position, description)
}

func readString(s *source.Source, start int) (Token, error) {
	body := s.Body
	position := start + 1
	runePosition := start + 1
	chunkStart := position
	var code rune
	var n int
	var valueBuffer bytes.Buffer
	for {
		code, n = runeAt(body, position)
		if position < len(body) &&
			// not LineTerminator
			code != 0x000A && code != 0x000D &&
			// not Quote (")
			code != '"' {

			// SourceCharacter
			if code < 0x0020 && code != 0x0009 {
				return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
			}
			position += n
			runePosition++
			if code == '\\' { // \
				valueBuffer.Write(body[chunkStart : position-1])
				code, n = runeAt(body, position)
				switch code {
				case '"':
					valueBuffer.WriteRune('"')
					break
				case '/':
					valueBuffer.WriteRune('/')
					break
				case '\\':
					valueBuffer.WriteRune('\\')
					break
				case 'b':
					valueBuffer.WriteRune('\b')
					break
				case 'f':
					valueBuffer.WriteRune('\f')
					break
				case 'n':
					valueBuffer.WriteRune('\n')
					break
				case 'r':
					valueBuffer.WriteRune('\r')
					break
				case 't':
					valueBuffer.WriteRune('\t')
					break
				case 'u':
					// Check if there are at least 4 bytes available
					if len(body) <= position+4 {
						return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
							fmt.Sprintf("Invalid character escape sequence: "+
								"\\u%v", string(body[position+1:])))
					}
					charCode := uniCharCode(
						rune(body[position+1]),
						rune(body[position+2]),
						rune(body[position+3]),
						rune(body[position+4]),
					)
					if charCode < 0 {
						return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
							fmt.Sprintf("Invalid character escape sequence: "+
								"\\u%v", string(body[position+1:position+5])))
					}
					valueBuffer.WriteRune(charCode)
					position += 4
					runePosition += 4
					break
				default:
					return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
						fmt.Sprintf(`Invalid character escape sequence: \\%c.`, code))
				}
				position += n
				runePosition++
				chunkStart = position
			}
			continue
		} else {
			break
		}
	}
	if code != '"' { // quote (")
		return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.")
	}
	stringContent := body[chunkStart:position]
	valueBuffer.Write(stringContent)
	value := valueBuffer.String()
	return makeToken(TokenKind[STRING], start, position+1, value), nil
}

// readBlockString reads a block string token from the source file.
//
// """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
func readBlockString(s *source.Source, start int) (Token, error) {
	body := s.Body
	position := start + 3
	runePosition := start + 3
	chunkStart := position
	var valueBuffer bytes.Buffer

	for {
		// Stop if we've reached the end of the buffer
		if position >= len(body) {
			break
		}

		code, n := runeAt(body, position)

		// Closing Triple-Quote (""")
		if code == '"' {
			x, _ := runeAt(body, position+1)
			y, _ := runeAt(body, position+2)
			if x == '"' && y == '"' {
				stringContent := body[chunkStart:position]
				valueBuffer.Write(stringContent)
				value := blockStringValue(valueBuffer.String())
				return makeToken(TokenKind[BLOCK_STRING], start, position+3, value), nil
			}
		}

		// SourceCharacter
		if code < 0x0020 &&
			code != 0x0009 &&
			code != 0x000a &&
			code != 0x000d {
			return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
		}

		// Escape Triple-Quote (\""")
		if code == '\\' { // \
			x, _ := runeAt(body, position+1)
			y, _ := runeAt(body, position+2)
			z, _ := runeAt(body, position+3)
			if x == '"' && y == '"' && z == '"' {
				stringContent := append(body[chunkStart:position], []byte(`"""`)...)
				valueBuffer.Write(stringContent)
				position += 4     // account for `"""` characters
				runePosition += 4 // "       "   "     "
				chunkStart = position
				continue
			}
		}

		position += n
		runePosition++
	}

	return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.")
}

var splitLinesRegex = regexp.MustCompile("\r\n|[\n\r]")

// This implements the GraphQL spec's BlockStringValue() static algorithm.
//
// Produces the value of a block string from its parsed raw value, similar to
// Coffeescript's block string, Python's docstring trim or Ruby's strip_heredoc.
//
// Spec: http://facebook.github.io/graphql/draft/#BlockStringValue()
// Heavily borrows from: https://github.com/graphql/graphql-js/blob/8e0c599ceccfa8c40d6edf3b72ee2a71490b10e0/src/language/blockStringValue.js
func blockStringValue(in string) string {
	// Expand a block string's raw value into independent lines.
	lines := splitLinesRegex.Split(in, -1)

	// Remove common indentation from all lines but first
	commonIndent := -1
	for i := 1; i < len(lines); i++ {
		line := lines[i]
		indent := leadingWhitespaceLen(line)
		if indent < len(line) && (commonIndent == -1 || indent < commonIndent) {
			commonIndent = indent
			if commonIndent == 0 {
				break
			}
		}
	}
	if commonIndent > 0 {
		for i, line := range lines {
			if commonIndent > len(line) {
				continue
			}
			lines[i] = line[commonIndent:]
		}
	}

	// Remove leading blank lines.
	for {
		if isBlank := lineIsBlank(lines[0]); !isBlank {
			break
		}
		lines = lines[1:]
	}

	// Remove trailing blank lines.
	for {
		i := len(lines) - 1
		if isBlank := lineIsBlank(lines[i]); !isBlank {
			break
		}
		lines = append(lines[:i], lines[i+1:]...)
	}

	// Return a string of the lines joined with U+000A.
	return strings.Join(lines, "\n")
}

// leadingWhitespaceLen returns count of whitespace characters on given line.
func leadingWhitespaceLen(in string) (n int) {
	for _, ch := range in {
		if ch == ' ' || ch == '\t' {
			n++
		} else {
			break
		}
	}
	return
}

// lineIsBlank returns true when given line has no content.
func lineIsBlank(in string) bool {
	return leadingWhitespaceLen(in) == len(in)
}

// Converts four hexidecimal chars to the integer that the
// string represents. For example, uniCharCode('0','0','0','f')
// will return 15, and uniCharCode('0','0','f','f') returns 255.
// Returns a negative number on error, if a char was invalid.
// This is implemented by noting that char2hex() returns -1 on error,
// which means the result of ORing the char2hex() will also be negative.
func uniCharCode(a, b, c, d rune) rune {
	return rune(char2hex(a)<<12 | char2hex(b)<<8 | char2hex(c)<<4 | char2hex(d))
}

// Converts a hex character to its integer value.
// '0' becomes 0, '9' becomes 9
// 'A' becomes 10, 'F' becomes 15
// 'a' becomes 10, 'f' becomes 15
// Returns -1 on error.
func char2hex(a rune) int {
	if a >= 48 && a <= 57 { // 0-9
		return int(a) - 48
	} else if a >= 65 && a <= 70 { // A-F
		return int(a) - 55
	} else if a >= 97 && a <= 102 {
		// a-f
		return int(a) - 87
	}
	return -1
}

func makeToken(kind int, start int, end int, value string) Token {
	return Token{Kind: kind, Start: start, End: end, Value: value}
}

func printCharCode(code rune) string {
	// NaN/undefined represents access beyond the end of the file.
	if code < 0 {
		return "<EOF>"
	}
	// print as ASCII for printable range
	if code >= 0x0020 && code < 0x007F {
		return fmt.Sprintf(`"%c"`, code)
	}
	// Otherwise print the escaped form. e.g. `"\\u0007"`
	return fmt.Sprintf(`"\\u%04X"`, code)
}

func readToken(s *source.Source, fromPosition int) (Token, error) {
	body := s.Body
	bodyLength := len(body)
	position, runePosition := positionAfterWhitespace(body, fromPosition)
	if position >= bodyLength {
		return makeToken(TokenKind[EOF], position, position, ""), nil
	}
	code, codeLength := runeAt(body, position)

	// SourceCharacter
	if code < 0x0020 && code != 0x0009 && code != 0x000A && code != 0x000D {
		return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character %v`, printCharCode(code)))
	}

	switch code {
	// !
	case '!':
		return makeToken(TokenKind[BANG], position, position+1, ""), nil
	// $
	case '$':
		return makeToken(TokenKind[DOLLAR], position, position+1, ""), nil
	// (
	case '(':
		return makeToken(TokenKind[PAREN_L], position, position+1, ""), nil
	// )
	case ')':
		return makeToken(TokenKind[PAREN_R], position, position+1, ""), nil
	// .
	case '.':
		next1, _ := runeAt(body, position+1)
		next2, _ := runeAt(body, position+2)
		if next1 == '.' && next2 == '.' {
			return makeToken(TokenKind[SPREAD], position, position+3, ""), nil
		}
		break
	// :
	case ':':
		return makeToken(TokenKind[COLON], position, position+1, ""), nil
	// =
	case '=':
		return makeToken(TokenKind[EQUALS], position, position+1, ""), nil
	// @
	case '@':
		return makeToken(TokenKind[AT], position, position+1, ""), nil
	// [
	case '[':
		return makeToken(TokenKind[BRACKET_L], position, position+1, ""), nil
	// ]
	case ']':
		return makeToken(TokenKind[BRACKET_R], position, position+1, ""), nil
	// {
	case '{':
		return makeToken(TokenKind[BRACE_L], position, position+1, ""), nil
	// |
	case '|':
		return makeToken(TokenKind[PIPE], position, position+1, ""), nil
	// }
	case '}':
		return makeToken(TokenKind[BRACE_R], position, position+1, ""), nil
	// A-Z
	case 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
		'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
		return readName(s, position, runePosition), nil
	// _
	// a-z
	case '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
		'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z':
		return readName(s, position, runePosition), nil
	// -
	// 0-9
	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
		token, err := readNumber(s, position, code, codeLength)
		if err != nil {
			return token, err
		}
		return token, nil
	// "
	case '"':
		var token Token
		var err error
		x, _ := runeAt(body, position+1)
		y, _ := runeAt(body, position+2)
		if x == '"' && y == '"' {
			token, err = readBlockString(s, position)
		} else {
			token, err = readString(s, position)
		}
		return token, err
	}
	description := fmt.Sprintf("Unexpected character %v.", printCharCode(code))
	return Token{}, gqlerrors.NewSyntaxError(s, runePosition, description)
}

// Gets the rune from the byte array at given byte position and it's width in bytes
func runeAt(body []byte, position int) (code rune, charWidth int) {
	if len(body) <= position {
		// <EOF>
		return -1, utf8.RuneError
	}

	c := body[position]
	if c < utf8.RuneSelf {
		return rune(c), 1
	}

	r, n := utf8.DecodeRune(body[position:])
	return r, n
}

// Reads from body starting at startPosition until it finds a non-whitespace
// or commented character, then returns the position of that character for lexing.
// lexing.
// Returns both byte positions and rune position
func positionAfterWhitespace(body []byte, startPosition int) (position int, runePosition int) {
	bodyLength := len(body)
	position = startPosition
	runePosition = startPosition
	for {
		if position < bodyLength {
			code, n := runeAt(body, position)

			// Skip Ignored
			if code == 0xFEFF || // BOM
				// White Space
				code == 0x0009 || // tab
				code == 0x0020 || // space
				// Line Terminator
				code == 0x000A || // new line
				code == 0x000D || // carriage return
				// Comma
				code == 0x002C {
				position += n
				runePosition++
			} else if code == 35 { // #
				position += n
				runePosition++
				for {
					code, n := runeAt(body, position)
					if position < bodyLength &&
						code != 0 &&
						// SourceCharacter but not LineTerminator
						(code > 0x001F || code == 0x0009) && code != 0x000A && code != 0x000D {
						position += n
						runePosition++
						continue
					} else {
						break
					}
				}
			} else {
				break
			}
			continue
		} else {
			break
		}
	}
	return position, runePosition
}

func GetTokenDesc(token Token) string {
	if token.Value == "" {
		return GetTokenKindDesc(token.Kind)
	}
	return fmt.Sprintf("%s \"%s\"", GetTokenKindDesc(token.Kind), token.Value)
}

func GetTokenKindDesc(kind int) string {
	return tokenDescription[kind]
}