diff options
author | Michael Muré <batolettre@gmail.com> | 2021-02-27 20:31:10 +0100 |
---|---|---|
committer | Michael Muré <batolettre@gmail.com> | 2021-02-27 20:31:10 +0100 |
commit | fab626a7a663a8fa6ef27848bb63e91af812ab8c (patch) | |
tree | c0d7c4f0cfe23044af8eee72dd3d1eee34801bb5 /query/lexer.go | |
parent | cb61245078a0e8f14e359ed20e0582a695645a08 (diff) | |
download | git-bug-fab626a7a663a8fa6ef27848bb63e91af812ab8c.tar.gz |
query: refactor to reuse the split function for both query and token
Diffstat (limited to 'query/lexer.go')
-rw-r--r-- | query/lexer.go | 120 |
1 files changed, 51 insertions, 69 deletions
diff --git a/query/lexer.go b/query/lexer.go index 45f657df..77830a47 100644 --- a/query/lexer.go +++ b/query/lexer.go @@ -39,10 +39,10 @@ func newTokenKV(qualifier, value string) token { func newTokenKVV(qualifier, subQualifier, value string) token { return token{ - kind: tokenKindKVV, - qualifier: qualifier, + kind: tokenKindKVV, + qualifier: qualifier, subQualifier: subQualifier, - value: value, + value: value, } } @@ -56,73 +56,68 @@ func newTokenSearch(term string) token { // tokenize parse and break a input into tokens ready to be // interpreted later by a parser to get the semantic. func tokenize(query string) ([]token, error) { - fields, err := splitQuery(query) + fields, err := splitFunc(query, unicode.IsSpace) if err != nil { return nil, err } var tokens []token for _, field := range fields { - // Split using ':' as separator, but separators inside '"' don't count. - quoted := false - split := strings.FieldsFunc(field, func(r rune) bool { - if r == '"' { - quoted = !quoted - } - return !quoted && r == ':' - }) - if (strings.HasPrefix(field, ":")) { - split = append([]string{""}, split...) - } - if (strings.HasSuffix(field, ":")) { - split = append(split, "") - } - if (quoted) { - return nil, fmt.Errorf("can't tokenize \"%s\": unmatched quote", field) + chunks, err := splitFunc(field, func(r rune) bool { return r == ':' }) + if err != nil { + return nil, err } - // full text search - if len(split) == 1 { - tokens = append(tokens, newTokenSearch(removeQuote(field))) - continue + if strings.HasPrefix(field, ":") || strings.HasSuffix(field, ":") { + return nil, fmt.Errorf("empty qualifier or value") } - if len(split) > 3 { - return nil, fmt.Errorf("can't tokenize \"%s\": too many separators", field) + // pre-process chunks + for i, chunk := range chunks { + if len(chunk) == 0 { + return nil, fmt.Errorf("empty qualifier or value") + } + chunks[i] = removeQuote(chunk) } - if len(split[0]) == 0 { - return nil, fmt.Errorf("can't tokenize \"%s\": empty qualifier", field) - } + switch len(chunks) { + case 1: // full text search + tokens = append(tokens, newTokenSearch(chunks[0])) - if len(split) == 2 { - if len(split[1]) == 0 { - return nil, fmt.Errorf("empty value for qualifier \"%s\"", split[0]) - } + case 2: // KV + tokens = append(tokens, newTokenKV(chunks[0], chunks[1])) - tokens = append(tokens, newTokenKV(split[0], removeQuote(split[1]))) - } else { - if len(split[1]) == 0 { - return nil, fmt.Errorf("empty sub-qualifier for qualifier \"%s\"", split[0]) - } + case 3: // KVV + tokens = append(tokens, newTokenKVV(chunks[0], chunks[1], chunks[2])) - if len(split[2]) == 0 { - return nil, fmt.Errorf("empty value for qualifier \"%s:%s\"", split[0], split[1]) - } - - tokens = append(tokens, newTokenKVV(split[0], removeQuote(split[1]), removeQuote(split[2]))) + default: + return nil, fmt.Errorf("can't tokenize \"%s\": too many separators", field) } } return tokens, nil } -// split the query into chunks by splitting on whitespaces but respecting +func removeQuote(field string) string { + runes := []rune(field) + if len(runes) >= 2 { + r1 := runes[0] + r2 := runes[len(runes)-1] + + if r1 == r2 && isQuote(r1) { + return string(runes[1 : len(runes)-1]) + } + } + return field +} + +// split the input into chunks by splitting according to separatorFunc but respecting // quotes -func splitQuery(query string) ([]string, error) { +func splitFunc(input string, separatorFunc func(r rune) bool) ([]string, error) { lastQuote := rune(0) inQuote := false - isToken := func(r rune) bool { + // return true if it's part of a chunk, or false if it's a rune that delimit one, as determined by the separatorFunc. + isChunk := func(r rune) bool { switch { case !inQuote && isQuote(r): lastQuote = r @@ -135,19 +130,19 @@ func splitQuery(query string) ([]string, error) { case inQuote: return true default: - return !unicode.IsSpace(r) + return !separatorFunc(r) } } var result []string - var token strings.Builder - for _, r := range query { - if isToken(r) { - token.WriteRune(r) + var chunk strings.Builder + for _, r := range input { + if isChunk(r) { + chunk.WriteRune(r) } else { - if token.Len() > 0 { - result = append(result, token.String()) - token.Reset() + if chunk.Len() > 0 { + result = append(result, chunk.String()) + chunk.Reset() } } } @@ -156,8 +151,8 @@ func splitQuery(query string) ([]string, error) { return nil, fmt.Errorf("unmatched quote") } - if token.Len() > 0 { - result = append(result, token.String()) + if chunk.Len() > 0 { + result = append(result, chunk.String()) } return result, nil @@ -166,16 +161,3 @@ func splitQuery(query string) ([]string, error) { func isQuote(r rune) bool { return r == '"' || r == '\'' } - -func removeQuote(field string) string { - runes := []rune(field) - if len(runes) >= 2 { - r1 := runes[0] - r2 := runes[len(runes)-1] - - if r1 == r2 && isQuote(r1) { - return string(runes[1 : len(runes)-1]) - } - } - return field -} |