aboutsummaryrefslogtreecommitdiffstats
path: root/query/lexer.go
diff options
context:
space:
mode:
Diffstat (limited to 'query/lexer.go')
-rw-r--r--query/lexer.go71
1 files changed, 71 insertions, 0 deletions
diff --git a/query/lexer.go b/query/lexer.go
new file mode 100644
index 00000000..ec05f44e
--- /dev/null
+++ b/query/lexer.go
@@ -0,0 +1,71 @@
+package query
+
+import (
+ "fmt"
+ "strings"
+ "unicode"
+)
+
+type token struct {
+ qualifier string
+ value string
+}
+
+// TODO: this lexer implementation behave badly with unmatched quotes.
+// A hand written one would be better instead of relying on strings.FieldsFunc()
+
+// tokenize parse and break a input into tokens ready to be
+// interpreted later by a parser to get the semantic.
+func tokenize(query string) ([]token, error) {
+ fields := splitQuery(query)
+
+ var tokens []token
+ for _, field := range fields {
+ split := strings.Split(field, ":")
+ if len(split) != 2 {
+ return nil, fmt.Errorf("can't tokenize \"%s\"", field)
+ }
+
+ if len(split[0]) == 0 {
+ return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field)
+ }
+ if len(split[1]) == 0 {
+ return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0])
+ }
+
+ tokens = append(tokens, token{
+ qualifier: split[0],
+ value: removeQuote(split[1]),
+ })
+ }
+ return tokens, nil
+}
+
+func splitQuery(query string) []string {
+ lastQuote := rune(0)
+ f := func(c rune) bool {
+ switch {
+ case c == lastQuote:
+ lastQuote = rune(0)
+ return false
+ case lastQuote != rune(0):
+ return false
+ case unicode.In(c, unicode.Quotation_Mark):
+ lastQuote = c
+ return false
+ default:
+ return unicode.IsSpace(c)
+ }
+ }
+
+ return strings.FieldsFunc(query, f)
+}
+
+func removeQuote(field string) string {
+ if len(field) >= 2 {
+ if field[0] == '"' && field[len(field)-1] == '"' {
+ return field[1 : len(field)-1]
+ }
+ }
+ return field
+}