diff options
Diffstat (limited to 'query/lexer.go')
-rw-r--r-- | query/lexer.go | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/query/lexer.go b/query/lexer.go new file mode 100644 index 00000000..ec05f44e --- /dev/null +++ b/query/lexer.go @@ -0,0 +1,71 @@ +package query + +import ( + "fmt" + "strings" + "unicode" +) + +type token struct { + qualifier string + value string +} + +// TODO: this lexer implementation behave badly with unmatched quotes. +// A hand written one would be better instead of relying on strings.FieldsFunc() + +// tokenize parse and break a input into tokens ready to be +// interpreted later by a parser to get the semantic. +func tokenize(query string) ([]token, error) { + fields := splitQuery(query) + + var tokens []token + for _, field := range fields { + split := strings.Split(field, ":") + if len(split) != 2 { + return nil, fmt.Errorf("can't tokenize \"%s\"", field) + } + + if len(split[0]) == 0 { + return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field) + } + if len(split[1]) == 0 { + return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0]) + } + + tokens = append(tokens, token{ + qualifier: split[0], + value: removeQuote(split[1]), + }) + } + return tokens, nil +} + +func splitQuery(query string) []string { + lastQuote := rune(0) + f := func(c rune) bool { + switch { + case c == lastQuote: + lastQuote = rune(0) + return false + case lastQuote != rune(0): + return false + case unicode.In(c, unicode.Quotation_Mark): + lastQuote = c + return false + default: + return unicode.IsSpace(c) + } + } + + return strings.FieldsFunc(query, f) +} + +func removeQuote(field string) string { + if len(field) >= 2 { + if field[0] == '"' && field[len(field)-1] == '"' { + return field[1 : len(field)-1] + } + } + return field +} |