package query import ( "fmt" "strings" "unicode" ) type tokenKind int const ( _ tokenKind = iota tokenKindKV tokenKindKVV tokenKindSearch ) type token struct { kind tokenKind // KV and KVV qualifier string value string // KVV only subQualifier string // Search term string } func newTokenKV(qualifier, value string) token { return token{ kind: tokenKindKV, qualifier: qualifier, value: value, } } func newTokenKVV(qualifier, subQualifier, value string) token { return token{ kind: tokenKindKVV, qualifier: qualifier, subQualifier: subQualifier, value: value, } } func newTokenSearch(term string) token { return token{ kind: tokenKindSearch, term: term, } } // tokenize parse and break a input into tokens ready to be // interpreted later by a parser to get the semantic. func tokenize(query string) ([]token, error) { fields, err := splitFunc(query, unicode.IsSpace) if err != nil { return nil, err } var tokens []token for _, field := range fields { chunks, err := splitFunc(field, func(r rune) bool { return r == ':' }) if err != nil { return nil, err } if strings.HasPrefix(field, ":") || strings.HasSuffix(field, ":") { return nil, fmt.Errorf("empty qualifier or value") } // pre-process chunks for i, chunk := range chunks { if len(chunk) == 0 { return nil, fmt.Errorf("empty qualifier or value") } chunks[i] = removeQuote(chunk) } switch len(chunks) { case 1: // full text search tokens = append(tokens, newTokenSearch(chunks[0])) case 2: // KV tokens = append(tokens, newTokenKV(chunks[0], chunks[1])) case 3: // KVV tokens = append(tokens, newTokenKVV(chunks[0], chunks[1], chunks[2])) default: return nil, fmt.Errorf("can't tokenize \"%s\": too many separators", field) } } return tokens, nil } func removeQuote(field string) string { runes := []rune(field) if len(runes) >= 2 { r1 := runes[0] r2 := runes[len(runes)-1] if r1 == r2 && isQuote(r1) { return string(runes[1 : len(runes)-1]) } } return field } // split the input into chunks by splitting according to separatorFunc but respecting // quotes func splitFunc(input string, separatorFunc func(r rune) bool) ([]string, error) { lastQuote := rune(0) inQuote := false // return true if it's part of a chunk, or false if it's a rune that delimit one, as determined by the separatorFunc. isChunk := func(r rune) bool { switch { case !inQuote && isQuote(r): lastQuote = r inQuote = true return true case inQuote && r == lastQuote: lastQuote = rune(0) inQuote = false return true case inQuote: return true default: return !separatorFunc(r) } } var result []string var chunk strings.Builder for _, r := range input { if isChunk(r) { chunk.WriteRune(r) } else { if chunk.Len() > 0 { result = append(result, chunk.String()) chunk.Reset() } } } if inQuote { return nil, fmt.Errorf("unmatched quote") } if chunk.Len() > 0 { result = append(result, chunk.String()) } return result, nil } func isQuote(r rune) bool { return r == '"' || r == '\'' }