diff options
author | Michael Muré <batolettre@gmail.com> | 2022-12-23 01:48:14 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-12-23 01:48:14 +0100 |
commit | 0a5a0ec1ef4ad98bc2116a953e201f96474941ab (patch) | |
tree | 660a9b17b5247fe2f954bfa814cce3193c5afa23 /repository | |
parent | 108518530e822e3bdf59c8bfc333ad0bbe2d5fc8 (diff) | |
parent | 95911100823b5c809225d664de74ad2d64e91972 (diff) | |
download | git-bug-0a5a0ec1ef4ad98bc2116a953e201f96474941ab.tar.gz |
Merge pull request #938 from MichaelMure/cache-reorg
Generic cache layer
Diffstat (limited to 'repository')
-rw-r--r-- | repository/gogit.go | 156 | ||||
-rw-r--r-- | repository/gogit_test.go | 11 | ||||
-rw-r--r-- | repository/hash.go | 2 | ||||
-rw-r--r-- | repository/index_bleve.go | 154 | ||||
-rw-r--r-- | repository/mock_repo.go | 142 | ||||
-rw-r--r-- | repository/repo.go | 54 | ||||
-rw-r--r-- | repository/repo_testing.go | 84 |
7 files changed, 384 insertions, 219 deletions
diff --git a/repository/gogit.go b/repository/gogit.go index c1f1fe37..b14efbe5 100644 --- a/repository/gogit.go +++ b/repository/gogit.go @@ -12,7 +12,6 @@ import ( "time" "github.com/ProtonMail/go-crypto/openpgp" - "github.com/blevesearch/bleve" "github.com/go-git/go-billy/v5" "github.com/go-git/go-billy/v5/osfs" gogit "github.com/go-git/go-git/v5" @@ -45,7 +44,7 @@ type GoGitRepo struct { clocks map[string]lamport.Clock indexesMutex sync.Mutex - indexes map[string]bleve.Index + indexes map[string]Index keyring Keyring localStorage billy.Filesystem @@ -75,7 +74,7 @@ func OpenGoGitRepo(path, namespace string, clockLoaders []ClockLoader) (*GoGitRe r: r, path: path, clocks: make(map[string]lamport.Clock), - indexes: make(map[string]bleve.Index), + indexes: make(map[string]Index), keyring: k, localStorage: osfs.New(filepath.Join(path, namespace)), } @@ -129,7 +128,7 @@ func InitGoGitRepo(path, namespace string) (*GoGitRepo, error) { r: r, path: filepath.Join(path, ".git"), clocks: make(map[string]lamport.Clock), - indexes: make(map[string]bleve.Index), + indexes: make(map[string]Index), keyring: k, localStorage: osfs.New(filepath.Join(path, ".git", namespace)), }, nil @@ -154,7 +153,7 @@ func InitBareGoGitRepo(path, namespace string) (*GoGitRepo, error) { r: r, path: path, clocks: make(map[string]lamport.Clock), - indexes: make(map[string]bleve.Index), + indexes: make(map[string]Index), keyring: k, localStorage: osfs.New(filepath.Join(path, namespace)), }, nil @@ -218,11 +217,12 @@ func isGitDir(path string) (bool, error) { func (repo *GoGitRepo) Close() error { var firstErr error - for _, index := range repo.indexes { + for name, index := range repo.indexes { err := index.Close() if err != nil && firstErr == nil { firstErr = err } + delete(repo.indexes, name) } return firstErr } @@ -323,8 +323,7 @@ func (repo *GoGitRepo) LocalStorage() billy.Filesystem { return repo.localStorage } -// GetBleveIndex return a bleve.Index that can be used to index documents -func (repo *GoGitRepo) GetBleveIndex(name string) (bleve.Index, error) { +func (repo *GoGitRepo) GetIndex(name string) (Index, error) { repo.indexesMutex.Lock() defer repo.indexesMutex.Unlock() @@ -334,63 +333,28 @@ func (repo *GoGitRepo) GetBleveIndex(name string) (bleve.Index, error) { path := filepath.Join(repo.localStorage.Root(), indexPath, name) - index, err := bleve.Open(path) + index, err := openBleveIndex(path) if err == nil { repo.indexes[name] = index - return index, nil - } - - err = os.MkdirAll(path, os.ModePerm) - if err != nil { - return nil, err - } - - mapping := bleve.NewIndexMapping() - mapping.DefaultAnalyzer = "en" - - index, err = bleve.New(path, mapping) - if err != nil { - return nil, err } - - repo.indexes[name] = index - - return index, nil -} - -// ClearBleveIndex will wipe the given index -func (repo *GoGitRepo) ClearBleveIndex(name string) error { - repo.indexesMutex.Lock() - defer repo.indexesMutex.Unlock() - - if index, ok := repo.indexes[name]; ok { - err := index.Close() - if err != nil { - return err - } - delete(repo.indexes, name) - } - - path := filepath.Join(repo.localStorage.Root(), indexPath, name) - err := os.RemoveAll(path) - if err != nil { - return err - } - - return nil + return index, err } // FetchRefs fetch git refs matching a directory prefix to a remote // Ex: prefix="foo" will fetch any remote refs matching "refs/foo/*" locally. // The equivalent git refspec would be "refs/foo/*:refs/remotes/<remote>/foo/*" -func (repo *GoGitRepo) FetchRefs(remote string, prefix string) (string, error) { - refspec := fmt.Sprintf("refs/%s/*:refs/remotes/%s/%s/*", prefix, remote, prefix) +func (repo *GoGitRepo) FetchRefs(remote string, prefixes ...string) (string, error) { + refSpecs := make([]config.RefSpec, len(prefixes)) + + for i, prefix := range prefixes { + refSpecs[i] = config.RefSpec(fmt.Sprintf("refs/%s/*:refs/remotes/%s/%s/*", prefix, remote, prefix)) + } buf := bytes.NewBuffer(nil) err := repo.r.Fetch(&gogit.FetchOptions{ RemoteName: remote, - RefSpecs: []config.RefSpec{config.RefSpec(refspec)}, + RefSpecs: refSpecs, Progress: buf, }) if err == gogit.NoErrAlreadyUpToDate { @@ -409,35 +373,41 @@ func (repo *GoGitRepo) FetchRefs(remote string, prefix string) (string, error) { // // Additionally, PushRefs will update the local references in refs/remotes/<remote>/foo to match // the remote state. -func (repo *GoGitRepo) PushRefs(remote string, prefix string) (string, error) { - refspec := fmt.Sprintf("refs/%s/*:refs/%s/*", prefix, prefix) - +func (repo *GoGitRepo) PushRefs(remote string, prefixes ...string) (string, error) { remo, err := repo.r.Remote(remote) if err != nil { return "", err } - // to make sure that the push also create the corresponding refs/remotes/<remote>/... references, - // we need to have a default fetch refspec configured on the remote, to make our refs "track" the remote ones. - // This does not change the config on disk, only on memory. - hasCustomFetch := false - fetchRefspec := fmt.Sprintf("refs/%s/*:refs/remotes/%s/%s/*", prefix, remote, prefix) - for _, r := range remo.Config().Fetch { - if string(r) == fetchRefspec { - hasCustomFetch = true - break + refSpecs := make([]config.RefSpec, len(prefixes)) + + for i, prefix := range prefixes { + refspec := fmt.Sprintf("refs/%s/*:refs/%s/*", prefix, prefix) + + // to make sure that the push also create the corresponding refs/remotes/<remote>/... references, + // we need to have a default fetch refspec configured on the remote, to make our refs "track" the remote ones. + // This does not change the config on disk, only on memory. + hasCustomFetch := false + fetchRefspec := fmt.Sprintf("refs/%s/*:refs/remotes/%s/%s/*", prefix, remote, prefix) + for _, r := range remo.Config().Fetch { + if string(r) == fetchRefspec { + hasCustomFetch = true + break + } } - } - if !hasCustomFetch { - remo.Config().Fetch = append(remo.Config().Fetch, config.RefSpec(fetchRefspec)) + if !hasCustomFetch { + remo.Config().Fetch = append(remo.Config().Fetch, config.RefSpec(fetchRefspec)) + } + + refSpecs[i] = config.RefSpec(refspec) } buf := bytes.NewBuffer(nil) err = remo.Push(&gogit.PushOptions{ RemoteName: remote, - RefSpecs: []config.RefSpec{config.RefSpec(refspec)}, + RefSpecs: refSpecs, Progress: buf, }) if err == gogit.NoErrAlreadyUpToDate { @@ -479,6 +449,9 @@ func (repo *GoGitRepo) ReadData(hash Hash) ([]byte, error) { defer repo.rMutex.Unlock() obj, err := repo.r.BlobObject(plumbing.NewHash(hash.String())) + if err == plumbing.ErrObjectNotFound { + return nil, ErrNotFound + } if err != nil { return nil, err } @@ -548,6 +521,9 @@ func (repo *GoGitRepo) ReadTree(hash Hash) ([]TreeEntry, error) { // the given hash could be a tree or a commit obj, err := repo.r.Storer.EncodedObject(plumbing.AnyObject, h) + if err == plumbing.ErrObjectNotFound { + return nil, ErrNotFound + } if err != nil { return nil, err } @@ -654,43 +630,11 @@ func (repo *GoGitRepo) StoreSignedCommit(treeHash Hash, signKey *openpgp.Entity, return Hash(hash.String()), nil } -// GetTreeHash return the git tree hash referenced in a commit -func (repo *GoGitRepo) GetTreeHash(commit Hash) (Hash, error) { - repo.rMutex.Lock() - defer repo.rMutex.Unlock() - - obj, err := repo.r.CommitObject(plumbing.NewHash(commit.String())) - if err != nil { - return "", err - } - - return Hash(obj.TreeHash.String()), nil -} - -// FindCommonAncestor will return the last common ancestor of two chain of commit -func (repo *GoGitRepo) FindCommonAncestor(commit1 Hash, commit2 Hash) (Hash, error) { - repo.rMutex.Lock() - defer repo.rMutex.Unlock() - - obj1, err := repo.r.CommitObject(plumbing.NewHash(commit1.String())) - if err != nil { - return "", err - } - obj2, err := repo.r.CommitObject(plumbing.NewHash(commit2.String())) - if err != nil { - return "", err - } - - commits, err := obj1.MergeBase(obj2) - if err != nil { - return "", err - } - - return Hash(commits[0].Hash.String()), nil -} - func (repo *GoGitRepo) ResolveRef(ref string) (Hash, error) { r, err := repo.r.Reference(plumbing.ReferenceName(ref), false) + if err == plumbing.ErrReferenceNotFound { + return "", ErrNotFound + } if err != nil { return "", err } @@ -743,6 +687,9 @@ func (repo *GoGitRepo) RefExist(ref string) (bool, error) { // CopyRef will create a new reference with the same value as another one func (repo *GoGitRepo) CopyRef(source string, dest string) error { r, err := repo.r.Reference(plumbing.ReferenceName(source), false) + if err == plumbing.ErrReferenceNotFound { + return ErrNotFound + } if err != nil { return err } @@ -759,6 +706,9 @@ func (repo *GoGitRepo) ReadCommit(hash Hash) (Commit, error) { defer repo.rMutex.Unlock() commit, err := repo.r.CommitObject(plumbing.NewHash(hash.String())) + if err == plumbing.ErrObjectNotFound { + return Commit{}, ErrNotFound + } if err != nil { return Commit{}, err } diff --git a/repository/gogit_test.go b/repository/gogit_test.go index a3de0a03..02bd42fd 100644 --- a/repository/gogit_test.go +++ b/repository/gogit_test.go @@ -65,24 +65,19 @@ func TestGoGitRepo_Indexes(t *testing.T) { plainRoot := goGitRepoDir(t, repo) // Can create indices - indexA, err := repo.GetBleveIndex("a") + indexA, err := repo.GetIndex("a") require.NoError(t, err) require.NotZero(t, indexA) require.FileExists(t, filepath.Join(plainRoot, ".git", namespace, "indexes", "a", "index_meta.json")) require.FileExists(t, filepath.Join(plainRoot, ".git", namespace, "indexes", "a", "store")) - indexB, err := repo.GetBleveIndex("b") + indexB, err := repo.GetIndex("b") require.NoError(t, err) require.NotZero(t, indexB) require.DirExists(t, filepath.Join(plainRoot, ".git", namespace, "indexes", "b")) // Can get an existing index - indexA, err = repo.GetBleveIndex("a") + indexA, err = repo.GetIndex("a") require.NoError(t, err) require.NotZero(t, indexA) - - // Can delete an index - err = repo.ClearBleveIndex("a") - require.NoError(t, err) - require.NoDirExists(t, filepath.Join(plainRoot, ".git", namespace, "indexes", "a")) } diff --git a/repository/hash.go b/repository/hash.go index 6a11558f..ad0206c3 100644 --- a/repository/hash.go +++ b/repository/hash.go @@ -43,7 +43,7 @@ func (h *Hash) IsValid() bool { return false } for _, r := range *h { - if (r < 'a' || r > 'z') && (r < '0' || r > '9') { + if (r < 'a' || r > 'f') && (r < '0' || r > '9') { return false } } diff --git a/repository/index_bleve.go b/repository/index_bleve.go new file mode 100644 index 00000000..aae41d5f --- /dev/null +++ b/repository/index_bleve.go @@ -0,0 +1,154 @@ +package repository + +import ( + "fmt" + "os" + "strings" + "sync" + "unicode/utf8" + + "github.com/blevesearch/bleve" +) + +var _ Index = &bleveIndex{} + +type bleveIndex struct { + path string + + mu sync.RWMutex + index bleve.Index +} + +func openBleveIndex(path string) (*bleveIndex, error) { + index, err := bleve.Open(path) + if err == nil { + return &bleveIndex{path: path, index: index}, nil + } + + b := &bleveIndex{path: path} + err = b.makeIndex() + if err != nil { + return nil, err + } + + return b, nil +} + +func (b *bleveIndex) makeIndex() error { + err := os.MkdirAll(b.path, os.ModePerm) + if err != nil { + return err + } + + // TODO: follow https://github.com/blevesearch/bleve/issues/1576 recommendations + + mapping := bleve.NewIndexMapping() + mapping.DefaultAnalyzer = "en" + + index, err := bleve.New(b.path, mapping) + if err != nil { + return err + } + b.index = index + return nil +} + +func (b *bleveIndex) IndexOne(id string, texts []string) error { + b.mu.Lock() + defer b.mu.Unlock() + return b._index(b.index.Index, id, texts) +} + +func (b *bleveIndex) IndexBatch() (indexer func(id string, texts []string) error, closer func() error) { + b.mu.Lock() + defer b.mu.Unlock() + + batch := b.index.NewBatch() + + indexer = func(id string, texts []string) error { + return b._index(batch.Index, id, texts) + } + + closer = func() error { + return b.index.Batch(batch) + } + + return indexer, closer +} + +func (b *bleveIndex) _index(indexer func(string, interface{}) error, id string, texts []string) error { + searchable := struct{ Text []string }{Text: texts} + + // See https://github.com/blevesearch/bleve/issues/1576 + var sb strings.Builder + normalize := func(text string) string { + sb.Reset() + for _, field := range strings.Fields(text) { + if utf8.RuneCountInString(field) < 100 { + sb.WriteString(field) + sb.WriteRune(' ') + } + } + return sb.String() + } + + for i, s := range searchable.Text { + searchable.Text[i] = normalize(s) + } + + return indexer(id, searchable) +} + +func (b *bleveIndex) Search(terms []string) ([]string, error) { + b.mu.RLock() + defer b.mu.RUnlock() + + for i, term := range terms { + if strings.Contains(term, " ") { + terms[i] = fmt.Sprintf("\"%s\"", term) + } + } + + query := bleve.NewQueryStringQuery(strings.Join(terms, " ")) + search := bleve.NewSearchRequest(query) + + res, err := b.index.Search(search) + if err != nil { + return nil, err + } + + ids := make([]string, len(res.Hits)) + for i, hit := range res.Hits { + ids[i] = hit.ID + } + + return ids, nil +} + +func (b *bleveIndex) DocCount() (uint64, error) { + return b.index.DocCount() +} + +func (b *bleveIndex) Clear() error { + b.mu.Lock() + defer b.mu.Unlock() + + err := b.index.Close() + if err != nil { + return err + } + + err = os.RemoveAll(b.path) + if err != nil { + return err + } + + return b.makeIndex() +} + +func (b *bleveIndex) Close() error { + b.mu.Lock() + defer b.mu.Unlock() + + return b.index.Close() +} diff --git a/repository/mock_repo.go b/repository/mock_repo.go index 3d7f0e73..c2cef8ef 100644 --- a/repository/mock_repo.go +++ b/repository/mock_repo.go @@ -9,7 +9,6 @@ import ( "github.com/99designs/keyring" "github.com/ProtonMail/go-crypto/openpgp" - "github.com/blevesearch/bleve" "github.com/go-git/go-billy/v5" "github.com/go-git/go-billy/v5/memfs" @@ -25,7 +24,7 @@ type mockRepo struct { *mockRepoKeyring *mockRepoCommon *mockRepoStorage - *mockRepoBleve + *mockRepoIndex *mockRepoData *mockRepoClock *mockRepoTest @@ -39,7 +38,7 @@ func NewMockRepo() *mockRepo { mockRepoKeyring: NewMockRepoKeyring(), mockRepoCommon: NewMockRepoCommon(), mockRepoStorage: NewMockRepoStorage(), - mockRepoBleve: newMockRepoBleve(), + mockRepoIndex: newMockRepoIndex(), mockRepoData: NewMockRepoData(), mockRepoClock: NewMockRepoClock(), mockRepoTest: NewMockRepoTest(), @@ -135,20 +134,20 @@ func (m *mockRepoStorage) LocalStorage() billy.Filesystem { return m.localFs } -var _ RepoBleve = &mockRepoBleve{} +var _ RepoIndex = &mockRepoIndex{} -type mockRepoBleve struct { +type mockRepoIndex struct { indexesMutex sync.Mutex - indexes map[string]bleve.Index + indexes map[string]Index } -func newMockRepoBleve() *mockRepoBleve { - return &mockRepoBleve{ - indexes: make(map[string]bleve.Index), +func newMockRepoIndex() *mockRepoIndex { + return &mockRepoIndex{ + indexes: make(map[string]Index), } } -func (m *mockRepoBleve) GetBleveIndex(name string) (bleve.Index, error) { +func (m *mockRepoIndex) GetIndex(name string) (Index, error) { m.indexesMutex.Lock() defer m.indexesMutex.Unlock() @@ -156,24 +155,63 @@ func (m *mockRepoBleve) GetBleveIndex(name string) (bleve.Index, error) { return index, nil } - mapping := bleve.NewIndexMapping() - mapping.DefaultAnalyzer = "en" + index := newIndex() + m.indexes[name] = index + return index, nil +} - index, err := bleve.NewMemOnly(mapping) - if err != nil { - return nil, err - } +var _ Index = &mockIndex{} - m.indexes[name] = index +type mockIndex map[string][]string - return index, nil +func newIndex() *mockIndex { + m := make(map[string][]string) + return (*mockIndex)(&m) } -func (m *mockRepoBleve) ClearBleveIndex(name string) error { - m.indexesMutex.Lock() - defer m.indexesMutex.Unlock() +func (m *mockIndex) IndexOne(id string, texts []string) error { + (*m)[id] = texts + return nil +} + +func (m *mockIndex) IndexBatch() (indexer func(id string, texts []string) error, closer func() error) { + indexer = func(id string, texts []string) error { + (*m)[id] = texts + return nil + } + closer = func() error { return nil } + return indexer, closer +} + +func (m *mockIndex) Search(terms []string) (ids []string, err error) { +loop: + for id, texts := range *m { + for _, text := range texts { + for _, s := range strings.Fields(text) { + for _, term := range terms { + if s == term { + ids = append(ids, id) + continue loop + } + } + } + } + } + return ids, nil +} + +func (m *mockIndex) DocCount() (uint64, error) { + return uint64(len(*m)), nil +} + +func (m *mockIndex) Clear() error { + for k, _ := range *m { + delete(*m, k) + } + return nil +} - delete(m.indexes, name) +func (m *mockIndex) Close() error { return nil } @@ -201,12 +239,12 @@ func NewMockRepoData() *mockRepoData { } } -func (r *mockRepoData) FetchRefs(remote string, prefix string) (string, error) { +func (r *mockRepoData) FetchRefs(remote string, prefixes ...string) (string, error) { panic("implement me") } // PushRefs push git refs to a remote -func (r *mockRepoData) PushRefs(remote string, prefix string) (string, error) { +func (r *mockRepoData) PushRefs(remote string, prefixes ...string) (string, error) { panic("implement me") } @@ -220,7 +258,7 @@ func (r *mockRepoData) StoreData(data []byte) (Hash, error) { func (r *mockRepoData) ReadData(hash Hash) ([]byte, error) { data, ok := r.blobs[hash] if !ok { - return nil, fmt.Errorf("unknown hash") + return nil, ErrNotFound } return data, nil @@ -245,13 +283,13 @@ func (r *mockRepoData) ReadTree(hash Hash) ([]TreeEntry, error) { commit, ok := r.commits[hash] if !ok { - return nil, fmt.Errorf("unknown hash") + return nil, ErrNotFound } data, ok = r.trees[commit.treeHash] if !ok { - return nil, fmt.Errorf("unknown hash") + return nil, ErrNotFound } } @@ -289,7 +327,7 @@ func (r *mockRepoData) StoreSignedCommit(treeHash Hash, signKey *openpgp.Entity, func (r *mockRepoData) ReadCommit(hash Hash) (Commit, error) { c, ok := r.commits[hash] if !ok { - return Commit{}, fmt.Errorf("unknown commit") + return Commit{}, ErrNotFound } result := Commit{ @@ -308,19 +346,10 @@ func (r *mockRepoData) ReadCommit(hash Hash) (Commit, error) { return result, nil } -func (r *mockRepoData) GetTreeHash(commit Hash) (Hash, error) { - c, ok := r.commits[commit] - if !ok { - return "", fmt.Errorf("unknown commit") - } - - return c.treeHash, nil -} - func (r *mockRepoData) ResolveRef(ref string) (Hash, error) { h, ok := r.refs[ref] if !ok { - return "", fmt.Errorf("unknown ref") + return "", ErrNotFound } return h, nil } @@ -356,48 +385,13 @@ func (r *mockRepoData) CopyRef(source string, dest string) error { hash, exist := r.refs[source] if !exist { - return fmt.Errorf("Unknown ref") + return ErrNotFound } r.refs[dest] = hash return nil } -func (r *mockRepoData) FindCommonAncestor(hash1 Hash, hash2 Hash) (Hash, error) { - ancestor1 := []Hash{hash1} - - for hash1 != "" { - c, ok := r.commits[hash1] - if !ok { - return "", fmt.Errorf("unknown commit %v", hash1) - } - if len(c.parents) == 0 { - break - } - ancestor1 = append(ancestor1, c.parents[0]) - hash1 = c.parents[0] - } - - for { - for _, ancestor := range ancestor1 { - if ancestor == hash2 { - return ancestor, nil - } - } - - c, ok := r.commits[hash2] - if !ok { - return "", fmt.Errorf("unknown commit %v", hash1) - } - - if c.parents[0] == "" { - return "", fmt.Errorf("no ancestor found") - } - - hash2 = c.parents[0] - } -} - func (r *mockRepoData) ListCommits(ref string) ([]Hash, error) { return nonNativeListCommits(r, ref) } diff --git a/repository/repo.go b/repository/repo.go index 2f90b437..66baec65 100644 --- a/repository/repo.go +++ b/repository/repo.go @@ -6,7 +6,6 @@ import ( "io" "github.com/ProtonMail/go-crypto/openpgp" - "github.com/blevesearch/bleve" "github.com/go-git/go-billy/v5" "github.com/MichaelMure/git-bug/util/lamport" @@ -17,6 +16,8 @@ var ( ErrNotARepo = errors.New("not a git repository") // ErrClockNotExist is the error returned when a clock can't be found ErrClockNotExist = errors.New("clock doesn't exist") + // ErrNotFound is the error returned when a git object can't be found + ErrNotFound = errors.New("ref not found") ) // Repo represents a source code repository. @@ -25,7 +26,7 @@ type Repo interface { RepoKeyring RepoCommon RepoStorage - RepoBleve + RepoIndex RepoData Close() error @@ -81,13 +82,33 @@ type RepoStorage interface { LocalStorage() billy.Filesystem } -// RepoBleve give access to Bleve to implement full-text search indexes. -type RepoBleve interface { - // GetBleveIndex return a bleve.Index that can be used to index documents - GetBleveIndex(name string) (bleve.Index, error) +// RepoIndex gives access to full-text search indexes +type RepoIndex interface { + GetIndex(name string) (Index, error) +} + +// Index is a full-text search index +type Index interface { + // IndexOne indexes one document, for the given ID. If the document already exist, + // it replaces it. + IndexOne(id string, texts []string) error + + // IndexBatch start a batch indexing. The returned indexer function is used the same + // way as IndexOne, and the closer function complete the batch insertion. + IndexBatch() (indexer func(id string, texts []string) error, closer func() error) + + // Search returns the list of IDs matching the given terms. + Search(terms []string) (ids []string, err error) + + // DocCount returns the number of document in the index. + DocCount() (uint64, error) - // ClearBleveIndex will wipe the given index - ClearBleveIndex(name string) error + // Clear empty the index. + Clear() error + + // Close closes the index and make sure everything is safely written. After this call + // the index can't be used anymore. + Close() error } type Commit struct { @@ -103,7 +124,7 @@ type RepoData interface { // FetchRefs fetch git refs matching a directory prefix to a remote // Ex: prefix="foo" will fetch any remote refs matching "refs/foo/*" locally. // The equivalent git refspec would be "refs/foo/*:refs/remotes/<remote>/foo/*" - FetchRefs(remote string, prefix string) (string, error) + FetchRefs(remote string, prefixes ...string) (string, error) // PushRefs push git refs matching a directory prefix to a remote // Ex: prefix="foo" will push any local refs matching "refs/foo/*" to the remote. @@ -111,12 +132,13 @@ type RepoData interface { // // Additionally, PushRefs will update the local references in refs/remotes/<remote>/foo to match // the remote state. - PushRefs(remote string, prefix string) (string, error) + PushRefs(remote string, prefixes ...string) (string, error) // StoreData will store arbitrary data and return the corresponding hash StoreData(data []byte) (Hash, error) // ReadData will attempt to read arbitrary data from the given hash + // Returns ErrNotFound if not found. ReadData(hash Hash) ([]byte, error) // StoreTree will store a mapping key-->Hash as a Git tree @@ -124,6 +146,7 @@ type RepoData interface { // ReadTree will return the list of entries in a Git tree // The given hash could be from either a commit or a tree + // Returns ErrNotFound if not found. ReadTree(hash Hash) ([]TreeEntry, error) // StoreCommit will store a Git commit with the given Git tree @@ -134,13 +157,11 @@ type RepoData interface { StoreSignedCommit(treeHash Hash, signKey *openpgp.Entity, parents ...Hash) (Hash, error) // ReadCommit read a Git commit and returns some of its characteristic + // Returns ErrNotFound if not found. ReadCommit(hash Hash) (Commit, error) - // GetTreeHash return the git tree hash referenced in a commit - // Deprecated - GetTreeHash(commit Hash) (Hash, error) - // ResolveRef returns the hash of the target commit of the given ref + // Returns ErrNotFound if not found. ResolveRef(ref string) (Hash, error) // UpdateRef will create or update a Git reference @@ -157,12 +178,9 @@ type RepoData interface { RefExist(ref string) (bool, error) // CopyRef will create a new reference with the same value as another one + // Returns ErrNotFound if not found. CopyRef(source string, dest string) error - // FindCommonAncestor will return the last common ancestor of two chain of commit - // Deprecated - FindCommonAncestor(commit1 Hash, commit2 Hash) (Hash, error) - // ListCommits will return the list of tree hashes of a ref, in chronological order ListCommits(ref string) ([]Hash, error) } diff --git a/repository/repo_testing.go b/repository/repo_testing.go index 5d51d23f..821eb762 100644 --- a/repository/repo_testing.go +++ b/repository/repo_testing.go @@ -10,7 +10,6 @@ import ( "github.com/MichaelMure/git-bug/util/lamport" ) -// TODO: add tests for RepoBleve // TODO: add tests for RepoStorage type RepoCreator func(t testing.TB, bare bool) TestedRepo @@ -33,6 +32,10 @@ func RepoTest(t *testing.T, creator RepoCreator) { RepoConfigTest(t, repo) }) + t.Run("Index", func(t *testing.T) { + RepoIndexTest(t, repo) + }) + t.Run("Clocks", func(t *testing.T) { RepoClockTest(t, repo) }) @@ -45,6 +48,15 @@ func RepoConfigTest(t *testing.T, repo RepoConfig) { testConfig(t, repo.LocalConfig()) } +func randomHash() Hash { + var letterRunes = "abcdef0123456789" + b := make([]byte, idLengthSHA256) + for i := range b { + b[i] = letterRunes[rand.Intn(len(letterRunes))] + } + return Hash(b) +} + // helper to test a RepoData func RepoDataTest(t *testing.T, repo RepoData) { // Blob @@ -59,6 +71,9 @@ func RepoDataTest(t *testing.T, repo RepoData) { require.NoError(t, err) require.Equal(t, data, blob1Read) + _, err = repo.ReadData(randomHash()) + require.ErrorIs(t, err, ErrNotFound) + // Tree blobHash2, err := repo.StoreData(randomData()) @@ -108,25 +123,20 @@ func RepoDataTest(t *testing.T, repo RepoData) { require.NoError(t, err) require.ElementsMatch(t, tree2, tree2Read) + _, err = repo.ReadTree(randomHash()) + require.ErrorIs(t, err, ErrNotFound) + // Commit commit1, err := repo.StoreCommit(treeHash1) require.NoError(t, err) require.True(t, commit1.IsValid()) - treeHash1Read, err := repo.GetTreeHash(commit1) - require.NoError(t, err) - require.Equal(t, treeHash1, treeHash1Read) - // commit with a parent commit2, err := repo.StoreCommit(treeHash2, commit1) require.NoError(t, err) require.True(t, commit2.IsValid()) - treeHash2Read, err := repo.GetTreeHash(commit2) - require.NoError(t, err) - require.Equal(t, treeHash2, treeHash2Read) - // ReadTree should accept tree and commit hashes tree1read, err := repo.ReadTree(commit1) require.NoError(t, err) @@ -137,6 +147,9 @@ func RepoDataTest(t *testing.T, repo RepoData) { c2expected := Commit{Hash: commit2, Parents: []Hash{commit1}, TreeHash: treeHash2} require.Equal(t, c2expected, c2) + _, err = repo.ReadCommit(randomHash()) + require.ErrorIs(t, err, ErrNotFound) + // Ref exist1, err := repo.RefExist("refs/bugs/ref1") @@ -169,14 +182,13 @@ func RepoDataTest(t *testing.T, repo RepoData) { require.NoError(t, err) require.Equal(t, []Hash{commit1, commit2}, commits) - // Graph + _, err = repo.ResolveRef("/refs/bugs/refnotexist") + require.ErrorIs(t, err, ErrNotFound) - commit3, err := repo.StoreCommit(treeHash1, commit1) - require.NoError(t, err) + err = repo.CopyRef("/refs/bugs/refnotexist", "refs/foo") + require.ErrorIs(t, err, ErrNotFound) - ancestorHash, err := repo.FindCommonAncestor(commit2, commit3) - require.NoError(t, err) - require.Equal(t, commit1, ancestorHash) + // Cleanup err = repo.RemoveRef("refs/bugs/ref1") require.NoError(t, err) @@ -234,6 +246,48 @@ func RepoDataSignatureTest(t *testing.T, repo RepoData) { require.Error(t, err) } +func RepoIndexTest(t *testing.T, repo RepoIndex) { + idx, err := repo.GetIndex("a") + require.NoError(t, err) + + // simple indexing + err = idx.IndexOne("id1", []string{"foo", "bar", "foobar barfoo"}) + require.NoError(t, err) + + // batched indexing + indexer, closer := idx.IndexBatch() + err = indexer("id2", []string{"hello", "foo bar"}) + require.NoError(t, err) + err = indexer("id3", []string{"Hola", "Esta bien"}) + require.NoError(t, err) + err = closer() + require.NoError(t, err) + + // search + res, err := idx.Search([]string{"foobar"}) + require.NoError(t, err) + require.ElementsMatch(t, []string{"id1"}, res) + + res, err = idx.Search([]string{"foo"}) + require.NoError(t, err) + require.ElementsMatch(t, []string{"id1", "id2"}, res) + + // re-indexing an item replace previous versions + err = idx.IndexOne("id2", []string{"hello"}) + require.NoError(t, err) + + res, err = idx.Search([]string{"foo"}) + require.NoError(t, err) + require.ElementsMatch(t, []string{"id1"}, res) + + err = idx.Clear() + require.NoError(t, err) + + res, err = idx.Search([]string{"foo"}) + require.NoError(t, err) + require.Empty(t, res) +} + // helper to test a RepoClock func RepoClockTest(t *testing.T, repo RepoClock) { allClocks, err := repo.AllClocks() |