diff options
author | David Symonds <dsymonds@golang.org> | 2020-07-15 13:52:04 +1000 |
---|---|---|
committer | David Symonds <dsymonds@golang.org> | 2020-07-16 12:53:37 +1000 |
commit | e28d9c90aad624596d20d1a59c8371d81b69190b (patch) | |
tree | 5785565db48069ff8f173b9dddcbb1b4c1f7e8ff | |
parent | 41758ec4b81c557092d7566c3eed46f89c1ec3cc (diff) | |
download | go-git-e28d9c90aad624596d20d1a59c8371d81b69190b.tar.gz |
Support partial hashes in Repository.ResolveRevision.
Like `git rev-parse <prefix>`, this enumerates the hashes of objects
with the given prefix and adds them to the list of candidates for
resolution.
This has an exhaustive slow path, which requires enumerating all objects
and filtering each one, but also a couple of fast paths for common
cases. There's room for future work to make this faster; TODOs have been
left for that.
Fixes #135.
-rw-r--r-- | internal/revision/parser.go | 2 | ||||
-rw-r--r-- | internal/revision/parser_test.go | 1 | ||||
-rw-r--r-- | repository.go | 80 | ||||
-rw-r--r-- | repository_test.go | 1 | ||||
-rw-r--r-- | storage/filesystem/dotgit/dotgit.go | 77 | ||||
-rw-r--r-- | storage/filesystem/dotgit/dotgit_test.go | 35 | ||||
-rw-r--r-- | storage/filesystem/object.go | 31 | ||||
-rw-r--r-- | storage/filesystem/object_test.go | 17 |
8 files changed, 234 insertions, 10 deletions
diff --git a/internal/revision/parser.go b/internal/revision/parser.go index 61de386..3efe5d7 100644 --- a/internal/revision/parser.go +++ b/internal/revision/parser.go @@ -28,7 +28,7 @@ func (e *ErrInvalidRevision) Error() string { type Revisioner interface { } -// Ref represents a reference name : HEAD, master +// Ref represents a reference name : HEAD, master, <hash> type Ref string // TildePath represents ~, ~{n} diff --git a/internal/revision/parser_test.go b/internal/revision/parser_test.go index 4bb007f..98403cc 100644 --- a/internal/revision/parser_test.go +++ b/internal/revision/parser_test.go @@ -354,6 +354,7 @@ func (s *ParserSuite) TestParseRefWithValidName(c *C) { "refs/remotes/test", "refs/remotes/origin/HEAD", "refs/remotes/origin/master", + "0123abcd", // short hash } for _, d := range datas { diff --git a/repository.go b/repository.go index 172029c..66c0918 100644 --- a/repository.go +++ b/repository.go @@ -3,6 +3,7 @@ package git import ( "bytes" "context" + "encoding/hex" "errors" "fmt" "io" @@ -1426,7 +1427,7 @@ func (r *Repository) Worktree() (*Worktree, error) { // resolve to a commit hash, not a tree or annotated tag. // // Implemented resolvers : HEAD, branch, tag, heads/branch, refs/heads/branch, -// refs/tags/tag, refs/remotes/origin/branch, refs/remotes/origin/HEAD, tilde and caret (HEAD~1, master~^, tag~2, ref/heads/master~1, ...), selection by text (HEAD^{/fix nasty bug}) +// refs/tags/tag, refs/remotes/origin/branch, refs/remotes/origin/HEAD, tilde and caret (HEAD~1, master~^, tag~2, ref/heads/master~1, ...), selection by text (HEAD^{/fix nasty bug}), hash (prefix and full) func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, error) { p := revision.NewParserFromString(string(rev)) @@ -1445,11 +1446,7 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err var tryHashes []plumbing.Hash - maybeHash := plumbing.NewHash(string(revisionRef)) - - if !maybeHash.IsZero() { - tryHashes = append(tryHashes, maybeHash) - } + tryHashes = append(tryHashes, r.resolveHashPrefix(string(revisionRef))...) for _, rule := range append([]string{"%s"}, plumbing.RefRevParseRules...) { ref, err := storer.ResolveReference(r.Storer, plumbing.ReferenceName(fmt.Sprintf(rule, revisionRef))) @@ -1567,6 +1564,49 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err return &commit.Hash, nil } +// resolveHashPrefix returns a list of potential hashes that the given string +// is a prefix of. It quietly swallows errors, returning nil. +func (r *Repository) resolveHashPrefix(hashStr string) []plumbing.Hash { + // Handle complete and partial hashes. + // plumbing.NewHash forces args into a full 20 byte hash, which isn't suitable + // for partial hashes since they will become zero-filled. + + if hashStr == "" { + return nil + } + if len(hashStr) == len(plumbing.ZeroHash)*2 { + // Only a full hash is possible. + hexb, err := hex.DecodeString(hashStr) + if err != nil { + return nil + } + var h plumbing.Hash + copy(h[:], hexb) + return []plumbing.Hash{h} + } + + // Partial hash. + // hex.DecodeString only decodes to complete bytes, so only works with pairs of hex digits. + evenHex := hashStr[:len(hashStr)&^1] + hexb, err := hex.DecodeString(evenHex) + if err != nil { + return nil + } + candidates := expandPartialHash(r.Storer, hexb) + if len(evenHex) == len(hashStr) { + // The prefix was an exact number of bytes. + return candidates + } + // Do another prefix check to ensure the dangling nybble is correct. + var hashes []plumbing.Hash + for _, h := range candidates { + if strings.HasPrefix(h.String(), hashStr) { + hashes = append(hashes, h) + } + } + return hashes +} + type RepackConfig struct { // UseRefDeltas configures whether packfile encoder will use reference deltas. // By default OFSDeltaObject is used. @@ -1659,3 +1699,31 @@ func (r *Repository) createNewObjectPack(cfg *RepackConfig) (h plumbing.Hash, er return h, err } + +func expandPartialHash(st storer.EncodedObjectStorer, prefix []byte) (hashes []plumbing.Hash) { + // The fast version is implemented by storage/filesystem.ObjectStorage. + type fastIter interface { + HashesWithPrefix(prefix []byte) ([]plumbing.Hash, error) + } + if fi, ok := st.(fastIter); ok { + h, err := fi.HashesWithPrefix(prefix) + if err != nil { + return nil + } + return h + } + + // Slow path. + iter, err := st.IterEncodedObjects(plumbing.AnyObject) + if err != nil { + return nil + } + iter.ForEach(func(obj plumbing.EncodedObject) error { + h := obj.Hash() + if bytes.HasPrefix(h[:], prefix) { + hashes = append(hashes, h) + } + return nil + }) + return +} diff --git a/repository_test.go b/repository_test.go index bbed604..112561e 100644 --- a/repository_test.go +++ b/repository_test.go @@ -2642,6 +2642,7 @@ func (s *RepositorySuite) TestResolveRevision(c *C) { "v1.0.0~1": "918c48b83bd081e863dbe1b80f8998f058cd8294", "master~1": "918c48b83bd081e863dbe1b80f8998f058cd8294", "918c48b83bd081e863dbe1b80f8998f058cd8294": "918c48b83bd081e863dbe1b80f8998f058cd8294", + "918c48b": "918c48b83bd081e863dbe1b80f8998f058cd8294", // odd number of hex digits } for rev, hash := range datas { diff --git a/storage/filesystem/dotgit/dotgit.go b/storage/filesystem/dotgit/dotgit.go index 3840ea7..6c386f7 100644 --- a/storage/filesystem/dotgit/dotgit.go +++ b/storage/filesystem/dotgit/dotgit.go @@ -3,12 +3,14 @@ package dotgit import ( "bufio" + "bytes" "errors" "fmt" "io" stdioutil "io/ioutil" "os" "path/filepath" + "sort" "strings" "time" @@ -88,7 +90,7 @@ type DotGit struct { incomingChecked bool incomingDirName string - objectList []plumbing.Hash + objectList []plumbing.Hash // sorted objectMap map[plumbing.Hash]struct{} packList []plumbing.Hash packMap map[plumbing.Hash]struct{} @@ -336,6 +338,53 @@ func (d *DotGit) NewObject() (*ObjectWriter, error) { return newObjectWriter(d.fs) } +// ObjectsWithPrefix returns the hashes of objects that have the given prefix. +func (d *DotGit) ObjectsWithPrefix(prefix []byte) ([]plumbing.Hash, error) { + // Handle edge cases. + if len(prefix) < 1 { + return d.Objects() + } else if len(prefix) > len(plumbing.ZeroHash) { + return nil, nil + } + + if d.options.ExclusiveAccess { + err := d.genObjectList() + if err != nil { + return nil, err + } + + // Rely on d.objectList being sorted. + // Figure out the half-open interval defined by the prefix. + first := sort.Search(len(d.objectList), func(i int) bool { + // Same as plumbing.HashSlice.Less. + return bytes.Compare(d.objectList[i][:], prefix) >= 0 + }) + lim := len(d.objectList) + if limPrefix, overflow := incBytes(prefix); !overflow { + lim = sort.Search(len(d.objectList), func(i int) bool { + // Same as plumbing.HashSlice.Less. + return bytes.Compare(d.objectList[i][:], limPrefix) >= 0 + }) + } + return d.objectList[first:lim], nil + } + + // This is the slow path. + var objects []plumbing.Hash + var n int + err := d.ForEachObjectHash(func(hash plumbing.Hash) error { + n++ + if bytes.HasPrefix(hash[:], prefix) { + objects = append(objects, hash) + } + return nil + }) + if err != nil { + return nil, err + } + return objects, nil +} + // Objects returns a slice with the hashes of objects found under the // .git/objects/ directory. func (d *DotGit) Objects() ([]plumbing.Hash, error) { @@ -427,12 +476,17 @@ func (d *DotGit) genObjectList() error { } d.objectMap = make(map[plumbing.Hash]struct{}) - return d.forEachObjectHash(func(h plumbing.Hash) error { + populate := func(h plumbing.Hash) error { d.objectList = append(d.objectList, h) d.objectMap[h] = struct{}{} return nil - }) + } + if err := d.forEachObjectHash(populate); err != nil { + return err + } + plumbing.HashesSort(d.objectList) + return nil } func (d *DotGit) hasObject(h plumbing.Hash) error { @@ -1115,3 +1169,20 @@ func isNum(b byte) bool { func isHexAlpha(b byte) bool { return b >= 'a' && b <= 'f' || b >= 'A' && b <= 'F' } + +// incBytes increments a byte slice, which involves incrementing the +// right-most byte, and following carry leftward. +// It makes a copy so that the provided slice's underlying array is not modified. +// If the overall operation overflows (e.g. incBytes(0xff, 0xff)), the second return parameter indicates that. +func incBytes(in []byte) (out []byte, overflow bool) { + out = make([]byte, len(in)) + copy(out, in) + for i := len(out) - 1; i >= 0; i-- { + out[i]++ + if out[i] != 0 { + return // Didn't overflow. + } + } + overflow = true + return +} diff --git a/storage/filesystem/dotgit/dotgit_test.go b/storage/filesystem/dotgit/dotgit_test.go index 0a72aa6..237605f 100644 --- a/storage/filesystem/dotgit/dotgit_test.go +++ b/storage/filesystem/dotgit/dotgit_test.go @@ -2,6 +2,7 @@ package dotgit import ( "bufio" + "encoding/hex" "io/ioutil" "os" "path/filepath" @@ -591,6 +592,7 @@ func (s *SuiteDotGit) TestObjects(c *C) { dir := New(fs) testObjects(c, fs, dir) + testObjectsWithPrefix(c, fs, dir) } func (s *SuiteDotGit) TestObjectsExclusive(c *C) { @@ -598,6 +600,7 @@ func (s *SuiteDotGit) TestObjectsExclusive(c *C) { dir := NewWithOptions(fs, Options{ExclusiveAccess: true}) testObjects(c, fs, dir) + testObjectsWithPrefix(c, fs, dir) } func testObjects(c *C, fs billy.Filesystem, dir *DotGit) { @@ -609,6 +612,20 @@ func testObjects(c *C, fs billy.Filesystem, dir *DotGit) { c.Assert(hashes[2].String(), Equals, "03db8e1fbe133a480f2867aac478fd866686d69e") } +func testObjectsWithPrefix(c *C, fs billy.Filesystem, dir *DotGit) { + prefix, _ := hex.DecodeString("01d5") + hashes, err := dir.ObjectsWithPrefix(prefix) + c.Assert(err, IsNil) + c.Assert(hashes, HasLen, 1) + c.Assert(hashes[0].String(), Equals, "01d5fa556c33743006de7e76e67a2dfcd994ca04") + + // Empty prefix should yield all objects. + // (subset of testObjects) + hashes, err = dir.ObjectsWithPrefix(nil) + c.Assert(err, IsNil) + c.Assert(hashes, HasLen, 187) +} + func (s *SuiteDotGit) TestObjectsNoFolder(c *C) { tmp, err := ioutil.TempDir("", "dot-git") c.Assert(err, IsNil) @@ -835,3 +852,21 @@ type norwfs struct { func (f *norwfs) Capabilities() billy.Capability { return billy.Capabilities(f.Filesystem) &^ billy.ReadAndWriteCapability } + +func (s *SuiteDotGit) TestIncBytes(c *C) { + tests := []struct { + in []byte + out []byte + overflow bool + }{ + {[]byte{0}, []byte{1}, false}, + {[]byte{0xff}, []byte{0}, true}, + {[]byte{7, 0xff}, []byte{8, 0}, false}, + {[]byte{0xff, 0xff}, []byte{0, 0}, true}, + } + for _, test := range tests { + out, overflow := incBytes(test.in) + c.Assert(out, DeepEquals, test.out) + c.Assert(overflow, Equals, test.overflow) + } +} diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 7437174..0c25dad 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -1,6 +1,7 @@ package filesystem import ( + "bytes" "io" "os" "time" @@ -518,6 +519,36 @@ func (s *ObjectStorage) findObjectInPackfile(h plumbing.Hash) (plumbing.Hash, pl return plumbing.ZeroHash, plumbing.ZeroHash, -1 } +func (s *ObjectStorage) HashesWithPrefix(prefix []byte) ([]plumbing.Hash, error) { + hashes, err := s.dir.ObjectsWithPrefix(prefix) + if err != nil { + return nil, err + } + + // TODO: This could be faster with some idxfile changes, + // or diving into the packfile. + for _, index := range s.index { + ei, err := index.Entries() + if err != nil { + return nil, err + } + for { + e, err := ei.Next() + if err == io.EOF { + break + } else if err != nil { + return nil, err + } + if bytes.HasPrefix(e.Hash[:], prefix) { + hashes = append(hashes, e.Hash) + } + } + ei.Close() + } + + return hashes, nil +} + // IterEncodedObjects returns an iterator for all the objects in the packfile // with the given type. func (s *ObjectStorage) IterEncodedObjects(t plumbing.ObjectType) (storer.EncodedObjectIter, error) { diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go index 2d6f568..036420f 100644 --- a/storage/filesystem/object_test.go +++ b/storage/filesystem/object_test.go @@ -1,6 +1,7 @@ package filesystem import ( + "encoding/hex" "fmt" "io" "io/ioutil" @@ -332,6 +333,22 @@ func (s *FsSuite) TestGetFromObjectFileSharedCache(c *C) { c.Assert(err, Equals, plumbing.ErrObjectNotFound) } +func (s *FsSuite) TestHashesWithPrefix(c *C) { + // Same setup as TestGetFromObjectFile. + fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit() + o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) + expected := plumbing.NewHash("f3dfe29d268303fc6e1bbce268605fc99573406e") + obj, err := o.EncodedObject(plumbing.AnyObject, expected) + c.Assert(err, IsNil) + c.Assert(obj.Hash(), Equals, expected) + + prefix, _ := hex.DecodeString("f3dfe2") + hashes, err := o.HashesWithPrefix(prefix) + c.Assert(err, IsNil) + c.Assert(hashes, HasLen, 1) + c.Assert(hashes[0].String(), Equals, "f3dfe29d268303fc6e1bbce268605fc99573406e") +} + func BenchmarkPackfileIter(b *testing.B) { defer fixtures.Clean() |