diff options
author | Michael Muré <batolettre@gmail.com> | 2020-03-28 17:17:13 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-28 17:17:13 +0100 |
commit | 390e66eb2b3a7088cdb8a44eeaf384fe691daf4a (patch) | |
tree | 05386ddf08d7e1c2947a6fc6cf2fbd44efa19eaf /query/lexer.go | |
parent | 58abc6b0a35b679ac0c34579ff1cb53c8fa71af4 (diff) | |
parent | 314fcbb2293d869c33d6a76aedd148aedff6561d (diff) | |
download | git-bug-390e66eb2b3a7088cdb8a44eeaf384fe691daf4a.tar.gz |
Merge pull request #355 from MichaelMure/query-parser-ast
replace the all-in-one query parser by a complete one with AST/lexer/parser
Diffstat (limited to 'query/lexer.go')
-rw-r--r-- | query/lexer.go | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/query/lexer.go b/query/lexer.go new file mode 100644 index 00000000..ec05f44e --- /dev/null +++ b/query/lexer.go @@ -0,0 +1,71 @@ +package query + +import ( + "fmt" + "strings" + "unicode" +) + +type token struct { + qualifier string + value string +} + +// TODO: this lexer implementation behave badly with unmatched quotes. +// A hand written one would be better instead of relying on strings.FieldsFunc() + +// tokenize parse and break a input into tokens ready to be +// interpreted later by a parser to get the semantic. +func tokenize(query string) ([]token, error) { + fields := splitQuery(query) + + var tokens []token + for _, field := range fields { + split := strings.Split(field, ":") + if len(split) != 2 { + return nil, fmt.Errorf("can't tokenize \"%s\"", field) + } + + if len(split[0]) == 0 { + return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field) + } + if len(split[1]) == 0 { + return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0]) + } + + tokens = append(tokens, token{ + qualifier: split[0], + value: removeQuote(split[1]), + }) + } + return tokens, nil +} + +func splitQuery(query string) []string { + lastQuote := rune(0) + f := func(c rune) bool { + switch { + case c == lastQuote: + lastQuote = rune(0) + return false + case lastQuote != rune(0): + return false + case unicode.In(c, unicode.Quotation_Mark): + lastQuote = c + return false + default: + return unicode.IsSpace(c) + } + } + + return strings.FieldsFunc(query, f) +} + +func removeQuote(field string) string { + if len(field) >= 2 { + if field[0] == '"' && field[len(field)-1] == '"' { + return field[1 : len(field)-1] + } + } + return field +} |