package repository import ( "fmt" "os" "strings" "sync" "unicode/utf8" "github.com/blevesearch/bleve" ) var _ Index = &bleveIndex{} type bleveIndex struct { path string mu sync.RWMutex index bleve.Index } func openBleveIndex(path string) (*bleveIndex, error) { index, err := bleve.Open(path) if err == nil { return &bleveIndex{path: path, index: index}, nil } b := &bleveIndex{path: path} err = b.makeIndex() if err != nil { return nil, err } return b, nil } func (b *bleveIndex) makeIndex() error { err := os.MkdirAll(b.path, os.ModePerm) if err != nil { return err } // TODO: follow https://github.com/blevesearch/bleve/issues/1576 recommendations mapping := bleve.NewIndexMapping() mapping.DefaultAnalyzer = "en" index, err := bleve.New(b.path, mapping) if err != nil { return err } b.index = index return nil } func (b *bleveIndex) IndexOne(id string, texts []string) error { b.mu.Lock() defer b.mu.Unlock() return b._index(b.index.Index, id, texts) } func (b *bleveIndex) IndexBatch() (indexer func(id string, texts []string) error, closer func() error) { b.mu.Lock() defer b.mu.Unlock() batch := b.index.NewBatch() indexer = func(id string, texts []string) error { return b._index(batch.Index, id, texts) } closer = func() error { return b.index.Batch(batch) } return indexer, closer } func (b *bleveIndex) _index(indexer func(string, interface{}) error, id string, texts []string) error { searchable := struct{ Text []string }{Text: texts} // See https://github.com/blevesearch/bleve/issues/1576 var sb strings.Builder normalize := func(text string) string { sb.Reset() for _, field := range strings.Fields(text) { if utf8.RuneCountInString(field) < 100 { sb.WriteString(field) sb.WriteRune(' ') } } return sb.String() } for i, s := range searchable.Text { searchable.Text[i] = normalize(s) } return indexer(id, searchable) } func (b *bleveIndex) Search(terms []string) ([]string, error) { b.mu.RLock() defer b.mu.RUnlock() for i, term := range terms { if strings.Contains(term, " ") { terms[i] = fmt.Sprintf("\"%s\"", term) } } query := bleve.NewQueryStringQuery(strings.Join(terms, " ")) search := bleve.NewSearchRequest(query) res, err := b.index.Search(search) if err != nil { return nil, err } ids := make([]string, len(res.Hits)) for i, hit := range res.Hits { ids[i] = hit.ID } return ids, nil } func (b *bleveIndex) DocCount() (uint64, error) { return b.index.DocCount() } func (b *bleveIndex) Remove(id string) error { b.mu.Lock() defer b.mu.Unlock() return b.index.Delete(id) } func (b *bleveIndex) Clear() error { b.mu.Lock() defer b.mu.Unlock() err := b.index.Close() if err != nil { return err } err = os.RemoveAll(b.path) if err != nil { return err } return b.makeIndex() } func (b *bleveIndex) Close() error { b.mu.Lock() defer b.mu.Unlock() return b.index.Close() }