package lexer
import (
"bytes"
"unicode/utf8"
"github.com/vektah/gqlparser/ast"
"github.com/vektah/gqlparser/gqlerror"
)
// Lexer turns graphql request and schema strings into tokens
type Lexer struct {
*ast.Source
// An offset into the string in bytes
start int
// An offset into the string in runes
startRunes int
// An offset into the string in bytes
end int
// An offset into the string in runes
endRunes int
// the current line number
line int
// An offset into the string in rune
lineStartRunes int
}
func New(src *ast.Source) Lexer {
return Lexer{
Source: src,
line: 1,
}
}
// take one rune from input and advance end
func (s *Lexer) peek() (rune, int) {
return utf8.DecodeRuneInString(s.Input[s.end:])
}
func (s *Lexer) makeToken(kind Type) (Token, *gqlerror.Error) {
return s.makeValueToken(kind, s.Input[s.start:s.end])
}
func (s *Lexer) makeValueToken(kind Type, value string) (Token, *gqlerror.Error) {
return Token{
Kind: kind,
Value: value,
Pos: ast.Position{
Start: s.startRunes,
End: s.endRunes,
Line: s.line,
Column: s.startRunes - s.lineStartRunes + 1,
Src: s.Source,
},
}, nil
}
func (s *Lexer) makeError(format string, args ...interface{}) (Token, *gqlerror.Error) {
column := s.endRunes - s.lineStartRunes + 1
return Token{
Kind: Invalid,
Pos: ast.Position{
Start: s.startRunes,
End: s.endRunes,
Line: s.line,
Column: column,
Src: s.Source,
},
}, gqlerror.ErrorLocf(s.Source.Name, s.line, column, format, args...)
}
// ReadToken gets the next token from the source starting at the given position.
//
// This skips over whitespace and comments until it finds the next lexable
// token, then lexes punctuators immediately or calls the appropriate helper
// function for more complicated tokens.
func (s *Lexer) ReadToken() (token Token, err *gqlerror.Error) {
s.ws()
s.start = s.end
s.startRunes = s.endRunes
if s.end >= len(s.Input) {
return s.makeToken(EOF)
}
r := s.Input[s.start]
s.end++
s.endRunes++
switch r {
case '!':
return s.makeValueToken(Bang, "")
case '$':
return s.makeValueToken(Dollar, "")
case '&':
return s.makeValueToken(Amp, "")
case '(':
return s.makeValueToken(ParenL, "")
case ')':
return s.makeValueToken(ParenR, "")
case '.':
if len(s.Input) > s.start+2 && s.Input[s.start:s.start+3] == "..." {
s.end += 2
s.endRunes += 2
return s.makeValueToken(Spread, "")
}
case ':':
return s.makeValueToken(Colon, "")
case '=':
return s.makeValueToken(Equals, "")
case '@':
return s.makeValueToken(At, "")
case '[':
return s.makeValueToken(BracketL, "")
case ']':
return s.makeValueToken(BracketR, "")
case '{':
return s.makeValueToken(BraceL, "")
case '}':
return s.makeValueToken(BraceR, "")
case '|':
return s.makeValueToken(Pipe, "")
case '#':
s.readComment()
return s.ReadToken()
case '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
return s.readName()
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return s.readNumber()
case '"':
if len(s.Input) > s.start+2 && s.Input[s.start:s.start+3] == `"""` {
return s.readBlockString()
}
return s.readString()
}
s.end--
s.endRunes--
if r < 0x0020 && r != 0x0009 && r != 0x000a && r != 0x000d {
return s.makeError(`Cannot contain the invalid character "\u%04d"`, r)
}
if r == '\'' {
return s.makeError(`Unexpected single quote character ('), did you mean to use a double quote (")?`)
}
return s.makeError(`Cannot parse the unexpected character "%s".`, string(r))
}
// ws reads from body starting at startPosition until it finds a non-whitespace
// or commented character, and updates the token end to include all whitespace
func (s *Lexer) ws() {
for s.end < len(s.Input) {
switch s.Input[s.end] {
case '\t', ' ', ',':
s.end++
s.endRunes++
case '\n':
s.end++
s.endRunes++
s.line++
s.lineStartRunes = s.endRunes
case '\r':
s.end++
s.endRunes++
s.line++
s.lineStartRunes = s.endRunes
// skip the following newline if its there
if s.end < len(s.Input) && s.Input[s.end] == '\n' {
s.end++
s.endRunes++
}
// byte order mark, given ws is hot path we aren't relying on the unicode package here.
case 0xef:
if s.end+2 < len(s.Input) && s.Input[s.end+1] == 0xBB && s.Input[s.end+2] == 0xBF {
s.end += 3
s.endRunes++
} else {
return
}
default:
return
}
}
}
// readComment from the input
//
// #[\u0009\u0020-\uFFFF]*
func (s *Lexer) readComment() (Token, *gqlerror.Error) {
for s.end < len(s.Input) {
r, w := s.peek()
// SourceCharacter but not LineTerminator
if r > 0x001f || r == '\t' {
s.end += w
s.endRunes++
} else {
break
}
}
return s.makeToken(Comment)
}
// readNumber from the input, either a float
// or an int depending on whether a decimal point appears.
//
// Int: -?(0|[1-9][0-9]*)
// Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
func (s *Lexer) readNumber() (Token, *gqlerror.Error) {
float := false
// backup to the first digit
s.end--
s.endRunes--
s.acceptByte('-')
if s.acceptByte('0') {
if consumed := s.acceptDigits(); consumed != 0 {
s.end -= consumed
s.endRunes -= consumed
return s.makeError("Invalid number, unexpected digit after 0: %s.", s.describeNext())
}
} else {
if consumed := s.acceptDigits(); consumed == 0 {
return s.makeError("Invalid number, expected digit but got: %s.", s.describeNext())
}
}
if s.acceptByte('.') {
float = true
if consumed := s.acceptDigits(); consumed == 0 {
return s.makeError("Invalid number, expected digit but got: %s.", s.describeNext())
}
}
if s.acceptByte('e', 'E') {
float = true
s.acceptByte('-', '+')
if consumed := s.acceptDigits(); consumed == 0 {
return s.makeError("Invalid number, expected digit but got: %s.", s.describeNext())
}
}
if float {
return s.makeToken(Float)
} else {
return s.makeToken(Int)
}
}
// acceptByte if it matches any of given bytes, returning true if it found anything
func (s *Lexer) acceptByte(bytes ...uint8) bool {
if s.end >= len(s.Input) {
return false
}
for _, accepted := range bytes {
if s.Input[s.end] == accepted {
s.end++
s.endRunes++
return true
}
}
return false
}
// acceptDigits from the input, returning the number of digits it found
func (s *Lexer) acceptDigits() int {
consumed := 0
for s.end < len(s.Input) && s.Input[s.end] >= '0' && s.Input[s.end] <= '9' {
s.end++
s.endRunes++
consumed++
}
return consumed
}
// describeNext peeks at the input and returns a human readable string. This should will alloc
// and should only be used in errors
func (s *Lexer) describeNext() string {
if s.end < len(s.Input) {
return `"` + string(s.Input[s.end]) + `"`
}
return "<EOF>"
}
// readString from the input
//
// "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*"
func (s *Lexer) readString() (Token, *gqlerror.Error) {
inputLen := len(s.Input)
// this buffer is lazily created only if there are escape characters.
var buf *bytes.Buffer
// skip the opening quote
s.start++
s.startRunes++
for s.end < inputLen {
r := s.Input[s.end]
if r == '\n' || r == '\r' {
break
}
if r < 0x0020 && r != '\t' {
return s.makeError(`Invalid character within String: "\u%04d".`, r)
}
switch r {
default:
var char = rune(r)
var w = 1
// skip unicode overhead if we are in the ascii range
if r >= 127 {
char, w = utf8.DecodeRuneInString(s.Input[s.end:])
}
s.end += w
s.endRunes++
if buf != nil {
buf.WriteRune(char)
}
case '"':
t, err := s.makeToken(String)
// the token should not include the quotes in its value, but should cover them in its position
t.Pos.Start--
t.Pos.End++
if buf != nil {
t.Value = buf.String()
}
// skip the close quote
s.end++
s.endRunes++
return t, err
case '\\':
if s.end+1 >= inputLen {
s.end++
s.endRunes++
return s.makeError(`Invalid character escape sequence.`)
}
if buf == nil {
buf = bytes.NewBufferString(s.Input[s.start:s.end])
}
escape := s.Input[s.end+1]
if escape == 'u' {
if s.end+6 >= inputLen {
s.end++
s.endRunes++
return s.makeError("Invalid character escape sequence: \\%s.", s.Input[s.end:])
}
r, ok := unhex(s.Input[s.end+2 : s.end+6])
if !ok {
s.end++
s.endRunes++
return s.makeError("Invalid character escape sequence: \\%s.", s.Input[s.end:s.end+5])
}
buf.WriteRune(r)
s.end += 6
s.endRunes += 6
} else {
switch escape {
case '"', '/', '\\':
buf.WriteByte(escape)
case 'b':
buf.WriteByte('\b')
case 'f':
buf.WriteByte('\f')
case 'n':
buf.WriteByte('\n')
case 'r':
buf.WriteByte('\r')
case 't':
buf.WriteByte('\t')
default:
s.end += 1
s.endRunes += 1
return s.makeError("Invalid character escape sequence: \\%s.", string(escape))
}
s.end += 2
s.endRunes += 2
}
}
}
return s.makeError("Unterminated string.")
}
// readBlockString from the input
//
// """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
func (s *Lexer) readBlockString() (Token, *gqlerror.Error) {
inputLen := len(s.Input)
var buf bytes.Buffer
// skip the opening quote
s.start += 3
s.startRunes += 3
s.end += 2
s.endRunes += 2
for s.end < inputLen {
r := s.Input[s.end]
// Closing triple quote (""")
if r == '"' && s.end+3 <= inputLen && s.Input[s.end:s.end+3] == `"""` {
t, err := s.makeValueToken(BlockString, blockStringValue(buf.String()))
// the token should not include the quotes in its value, but should cover them in its position
t.Pos.Start -= 3
t.Pos.End += 3
// skip the close quote
s.end += 3
s.endRunes += 3
return t, err
}
// SourceCharacter
if r < 0x0020 && r != '\t' && r != '\n' && r != '\r' {
return s.makeError(`Invalid character within String: "\u%04d".`, r)
}
if r == '\\' && s.end+4 <= inputLen && s.Input[s.end:s.end+4] == `\"""` {
buf.WriteString(`"""`)
s.end += 4
s.endRunes += 4
} else if r == '\r' {
if s.end+1 < inputLen && s.Input[s.end+1] == '\n' {
s.end++
s.endRunes++
}
buf.WriteByte('\n')
s.end++
s.endRunes++
} else {
var char = rune(r)
var w = 1
// skip unicode overhead if we are in the ascii range
if r >= 127 {
char, w = utf8.DecodeRuneInString(s.Input[s.end:])
}
s.end += w
s.endRunes++
buf.WriteRune(char)
}
}
return s.makeError("Unterminated string.")
}
func unhex(b string) (v rune, ok bool) {
for _, c := range b {
v <<= 4
switch {
case '0' <= c && c <= '9':
v |= c - '0'
case 'a' <= c && c <= 'f':
v |= c - 'a' + 10
case 'A' <= c && c <= 'F':
v |= c - 'A' + 10
default:
return 0, false
}
}
return v, true
}
// readName from the input
//
// [_A-Za-z][_0-9A-Za-z]*
func (s *Lexer) readName() (Token, *gqlerror.Error) {
for s.end < len(s.Input) {
r, w := s.peek()
if (r >= '0' && r <= '9') || (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || r == '_' {
s.end += w
s.endRunes++
} else {
break
}
}
return s.makeToken(Name)
}