aboutsummaryrefslogtreecommitdiffstats
path: root/query/lexer.go
diff options
context:
space:
mode:
authorMichael Muré <batolettre@gmail.com>2021-02-27 20:31:10 +0100
committerMichael Muré <batolettre@gmail.com>2021-02-27 20:31:10 +0100
commitfab626a7a663a8fa6ef27848bb63e91af812ab8c (patch)
treec0d7c4f0cfe23044af8eee72dd3d1eee34801bb5 /query/lexer.go
parentcb61245078a0e8f14e359ed20e0582a695645a08 (diff)
downloadgit-bug-fab626a7a663a8fa6ef27848bb63e91af812ab8c.tar.gz
query: refactor to reuse the split function for both query and token
Diffstat (limited to 'query/lexer.go')
-rw-r--r--query/lexer.go120
1 files changed, 51 insertions, 69 deletions
diff --git a/query/lexer.go b/query/lexer.go
index 45f657df..77830a47 100644
--- a/query/lexer.go
+++ b/query/lexer.go
@@ -39,10 +39,10 @@ func newTokenKV(qualifier, value string) token {
func newTokenKVV(qualifier, subQualifier, value string) token {
return token{
- kind: tokenKindKVV,
- qualifier: qualifier,
+ kind: tokenKindKVV,
+ qualifier: qualifier,
subQualifier: subQualifier,
- value: value,
+ value: value,
}
}
@@ -56,73 +56,68 @@ func newTokenSearch(term string) token {
// tokenize parse and break a input into tokens ready to be
// interpreted later by a parser to get the semantic.
func tokenize(query string) ([]token, error) {
- fields, err := splitQuery(query)
+ fields, err := splitFunc(query, unicode.IsSpace)
if err != nil {
return nil, err
}
var tokens []token
for _, field := range fields {
- // Split using ':' as separator, but separators inside '"' don't count.
- quoted := false
- split := strings.FieldsFunc(field, func(r rune) bool {
- if r == '"' {
- quoted = !quoted
- }
- return !quoted && r == ':'
- })
- if (strings.HasPrefix(field, ":")) {
- split = append([]string{""}, split...)
- }
- if (strings.HasSuffix(field, ":")) {
- split = append(split, "")
- }
- if (quoted) {
- return nil, fmt.Errorf("can't tokenize \"%s\": unmatched quote", field)
+ chunks, err := splitFunc(field, func(r rune) bool { return r == ':' })
+ if err != nil {
+ return nil, err
}
- // full text search
- if len(split) == 1 {
- tokens = append(tokens, newTokenSearch(removeQuote(field)))
- continue
+ if strings.HasPrefix(field, ":") || strings.HasSuffix(field, ":") {
+ return nil, fmt.Errorf("empty qualifier or value")
}
- if len(split) > 3 {
- return nil, fmt.Errorf("can't tokenize \"%s\": too many separators", field)
+ // pre-process chunks
+ for i, chunk := range chunks {
+ if len(chunk) == 0 {
+ return nil, fmt.Errorf("empty qualifier or value")
+ }
+ chunks[i] = removeQuote(chunk)
}
- if len(split[0]) == 0 {
- return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field)
- }
+ switch len(chunks) {
+ case 1: // full text search
+ tokens = append(tokens, newTokenSearch(chunks[0]))
- if len(split) == 2 {
- if len(split[1]) == 0 {
- return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0])
- }
+ case 2: // KV
+ tokens = append(tokens, newTokenKV(chunks[0], chunks[1]))
- tokens = append(tokens, newTokenKV(split[0], removeQuote(split[1])))
- } else {
- if len(split[1]) == 0 {
- return nil, fmt.Errorf("empty sub-qualifier for qualifier \"%s\"", split[0])
- }
+ case 3: // KVV
+ tokens = append(tokens, newTokenKVV(chunks[0], chunks[1], chunks[2]))
- if len(split[2]) == 0 {
- return nil, fmt.Errorf("empty value for qualifier \"%s:%s\"", split[0], split[1])
- }
-
- tokens = append(tokens, newTokenKVV(split[0], removeQuote(split[1]), removeQuote(split[2])))
+ default:
+ return nil, fmt.Errorf("can't tokenize \"%s\": too many separators", field)
}
}
return tokens, nil
}
-// split the query into chunks by splitting on whitespaces but respecting
+func removeQuote(field string) string {
+ runes := []rune(field)
+ if len(runes) >= 2 {
+ r1 := runes[0]
+ r2 := runes[len(runes)-1]
+
+ if r1 == r2 && isQuote(r1) {
+ return string(runes[1 : len(runes)-1])
+ }
+ }
+ return field
+}
+
+// split the input into chunks by splitting according to separatorFunc but respecting
// quotes
-func splitQuery(query string) ([]string, error) {
+func splitFunc(input string, separatorFunc func(r rune) bool) ([]string, error) {
lastQuote := rune(0)
inQuote := false
- isToken := func(r rune) bool {
+ // return true if it's part of a chunk, or false if it's a rune that delimit one, as determined by the separatorFunc.
+ isChunk := func(r rune) bool {
switch {
case !inQuote && isQuote(r):
lastQuote = r
@@ -135,19 +130,19 @@ func splitQuery(query string) ([]string, error) {
case inQuote:
return true
default:
- return !unicode.IsSpace(r)
+ return !separatorFunc(r)
}
}
var result []string
- var token strings.Builder
- for _, r := range query {
- if isToken(r) {
- token.WriteRune(r)
+ var chunk strings.Builder
+ for _, r := range input {
+ if isChunk(r) {
+ chunk.WriteRune(r)
} else {
- if token.Len() > 0 {
- result = append(result, token.String())
- token.Reset()
+ if chunk.Len() > 0 {
+ result = append(result, chunk.String())
+ chunk.Reset()
}
}
}
@@ -156,8 +151,8 @@ func splitQuery(query string) ([]string, error) {
return nil, fmt.Errorf("unmatched quote")
}
- if token.Len() > 0 {
- result = append(result, token.String())
+ if chunk.Len() > 0 {
+ result = append(result, chunk.String())
}
return result, nil
@@ -166,16 +161,3 @@ func splitQuery(query string) ([]string, error) {
func isQuote(r rune) bool {
return r == '"' || r == '\''
}
-
-func removeQuote(field string) string {
- runes := []rune(field)
- if len(runes) >= 2 {
- r1 := runes[0]
- r2 := runes[len(runes)-1]
-
- if r1 == r2 && isQuote(r1) {
- return string(runes[1 : len(runes)-1])
- }
- }
- return field
-}