aboutsummaryrefslogtreecommitdiffstats
path: root/query/lexer.go
diff options
context:
space:
mode:
authorMichael Muré <batolettre@gmail.com>2020-03-28 17:17:13 +0100
committerGitHub <noreply@github.com>2020-03-28 17:17:13 +0100
commit390e66eb2b3a7088cdb8a44eeaf384fe691daf4a (patch)
tree05386ddf08d7e1c2947a6fc6cf2fbd44efa19eaf /query/lexer.go
parent58abc6b0a35b679ac0c34579ff1cb53c8fa71af4 (diff)
parent314fcbb2293d869c33d6a76aedd148aedff6561d (diff)
downloadgit-bug-390e66eb2b3a7088cdb8a44eeaf384fe691daf4a.tar.gz
Merge pull request #355 from MichaelMure/query-parser-ast
replace the all-in-one query parser by a complete one with AST/lexer/parser
Diffstat (limited to 'query/lexer.go')
-rw-r--r--query/lexer.go71
1 files changed, 71 insertions, 0 deletions
diff --git a/query/lexer.go b/query/lexer.go
new file mode 100644
index 00000000..ec05f44e
--- /dev/null
+++ b/query/lexer.go
@@ -0,0 +1,71 @@
+package query
+
+import (
+ "fmt"
+ "strings"
+ "unicode"
+)
+
+type token struct {
+ qualifier string
+ value string
+}
+
+// TODO: this lexer implementation behave badly with unmatched quotes.
+// A hand written one would be better instead of relying on strings.FieldsFunc()
+
+// tokenize parse and break a input into tokens ready to be
+// interpreted later by a parser to get the semantic.
+func tokenize(query string) ([]token, error) {
+ fields := splitQuery(query)
+
+ var tokens []token
+ for _, field := range fields {
+ split := strings.Split(field, ":")
+ if len(split) != 2 {
+ return nil, fmt.Errorf("can't tokenize \"%s\"", field)
+ }
+
+ if len(split[0]) == 0 {
+ return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field)
+ }
+ if len(split[1]) == 0 {
+ return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0])
+ }
+
+ tokens = append(tokens, token{
+ qualifier: split[0],
+ value: removeQuote(split[1]),
+ })
+ }
+ return tokens, nil
+}
+
+func splitQuery(query string) []string {
+ lastQuote := rune(0)
+ f := func(c rune) bool {
+ switch {
+ case c == lastQuote:
+ lastQuote = rune(0)
+ return false
+ case lastQuote != rune(0):
+ return false
+ case unicode.In(c, unicode.Quotation_Mark):
+ lastQuote = c
+ return false
+ default:
+ return unicode.IsSpace(c)
+ }
+ }
+
+ return strings.FieldsFunc(query, f)
+}
+
+func removeQuote(field string) string {
+ if len(field) >= 2 {
+ if field[0] == '"' && field[len(field)-1] == '"' {
+ return field[1 : len(field)-1]
+ }
+ }
+ return field
+}