From e28d9c90aad624596d20d1a59c8371d81b69190b Mon Sep 17 00:00:00 2001 From: David Symonds Date: Wed, 15 Jul 2020 13:52:04 +1000 Subject: Support partial hashes in Repository.ResolveRevision. Like `git rev-parse `, this enumerates the hashes of objects with the given prefix and adds them to the list of candidates for resolution. This has an exhaustive slow path, which requires enumerating all objects and filtering each one, but also a couple of fast paths for common cases. There's room for future work to make this faster; TODOs have been left for that. Fixes #135. --- storage/filesystem/dotgit/dotgit.go | 77 ++++++++++++++++++++++++++++++-- storage/filesystem/dotgit/dotgit_test.go | 35 +++++++++++++++ storage/filesystem/object.go | 31 +++++++++++++ storage/filesystem/object_test.go | 17 +++++++ 4 files changed, 157 insertions(+), 3 deletions(-) (limited to 'storage/filesystem') diff --git a/storage/filesystem/dotgit/dotgit.go b/storage/filesystem/dotgit/dotgit.go index 3840ea7..6c386f7 100644 --- a/storage/filesystem/dotgit/dotgit.go +++ b/storage/filesystem/dotgit/dotgit.go @@ -3,12 +3,14 @@ package dotgit import ( "bufio" + "bytes" "errors" "fmt" "io" stdioutil "io/ioutil" "os" "path/filepath" + "sort" "strings" "time" @@ -88,7 +90,7 @@ type DotGit struct { incomingChecked bool incomingDirName string - objectList []plumbing.Hash + objectList []plumbing.Hash // sorted objectMap map[plumbing.Hash]struct{} packList []plumbing.Hash packMap map[plumbing.Hash]struct{} @@ -336,6 +338,53 @@ func (d *DotGit) NewObject() (*ObjectWriter, error) { return newObjectWriter(d.fs) } +// ObjectsWithPrefix returns the hashes of objects that have the given prefix. +func (d *DotGit) ObjectsWithPrefix(prefix []byte) ([]plumbing.Hash, error) { + // Handle edge cases. + if len(prefix) < 1 { + return d.Objects() + } else if len(prefix) > len(plumbing.ZeroHash) { + return nil, nil + } + + if d.options.ExclusiveAccess { + err := d.genObjectList() + if err != nil { + return nil, err + } + + // Rely on d.objectList being sorted. + // Figure out the half-open interval defined by the prefix. + first := sort.Search(len(d.objectList), func(i int) bool { + // Same as plumbing.HashSlice.Less. + return bytes.Compare(d.objectList[i][:], prefix) >= 0 + }) + lim := len(d.objectList) + if limPrefix, overflow := incBytes(prefix); !overflow { + lim = sort.Search(len(d.objectList), func(i int) bool { + // Same as plumbing.HashSlice.Less. + return bytes.Compare(d.objectList[i][:], limPrefix) >= 0 + }) + } + return d.objectList[first:lim], nil + } + + // This is the slow path. + var objects []plumbing.Hash + var n int + err := d.ForEachObjectHash(func(hash plumbing.Hash) error { + n++ + if bytes.HasPrefix(hash[:], prefix) { + objects = append(objects, hash) + } + return nil + }) + if err != nil { + return nil, err + } + return objects, nil +} + // Objects returns a slice with the hashes of objects found under the // .git/objects/ directory. func (d *DotGit) Objects() ([]plumbing.Hash, error) { @@ -427,12 +476,17 @@ func (d *DotGit) genObjectList() error { } d.objectMap = make(map[plumbing.Hash]struct{}) - return d.forEachObjectHash(func(h plumbing.Hash) error { + populate := func(h plumbing.Hash) error { d.objectList = append(d.objectList, h) d.objectMap[h] = struct{}{} return nil - }) + } + if err := d.forEachObjectHash(populate); err != nil { + return err + } + plumbing.HashesSort(d.objectList) + return nil } func (d *DotGit) hasObject(h plumbing.Hash) error { @@ -1115,3 +1169,20 @@ func isNum(b byte) bool { func isHexAlpha(b byte) bool { return b >= 'a' && b <= 'f' || b >= 'A' && b <= 'F' } + +// incBytes increments a byte slice, which involves incrementing the +// right-most byte, and following carry leftward. +// It makes a copy so that the provided slice's underlying array is not modified. +// If the overall operation overflows (e.g. incBytes(0xff, 0xff)), the second return parameter indicates that. +func incBytes(in []byte) (out []byte, overflow bool) { + out = make([]byte, len(in)) + copy(out, in) + for i := len(out) - 1; i >= 0; i-- { + out[i]++ + if out[i] != 0 { + return // Didn't overflow. + } + } + overflow = true + return +} diff --git a/storage/filesystem/dotgit/dotgit_test.go b/storage/filesystem/dotgit/dotgit_test.go index 0a72aa6..237605f 100644 --- a/storage/filesystem/dotgit/dotgit_test.go +++ b/storage/filesystem/dotgit/dotgit_test.go @@ -2,6 +2,7 @@ package dotgit import ( "bufio" + "encoding/hex" "io/ioutil" "os" "path/filepath" @@ -591,6 +592,7 @@ func (s *SuiteDotGit) TestObjects(c *C) { dir := New(fs) testObjects(c, fs, dir) + testObjectsWithPrefix(c, fs, dir) } func (s *SuiteDotGit) TestObjectsExclusive(c *C) { @@ -598,6 +600,7 @@ func (s *SuiteDotGit) TestObjectsExclusive(c *C) { dir := NewWithOptions(fs, Options{ExclusiveAccess: true}) testObjects(c, fs, dir) + testObjectsWithPrefix(c, fs, dir) } func testObjects(c *C, fs billy.Filesystem, dir *DotGit) { @@ -609,6 +612,20 @@ func testObjects(c *C, fs billy.Filesystem, dir *DotGit) { c.Assert(hashes[2].String(), Equals, "03db8e1fbe133a480f2867aac478fd866686d69e") } +func testObjectsWithPrefix(c *C, fs billy.Filesystem, dir *DotGit) { + prefix, _ := hex.DecodeString("01d5") + hashes, err := dir.ObjectsWithPrefix(prefix) + c.Assert(err, IsNil) + c.Assert(hashes, HasLen, 1) + c.Assert(hashes[0].String(), Equals, "01d5fa556c33743006de7e76e67a2dfcd994ca04") + + // Empty prefix should yield all objects. + // (subset of testObjects) + hashes, err = dir.ObjectsWithPrefix(nil) + c.Assert(err, IsNil) + c.Assert(hashes, HasLen, 187) +} + func (s *SuiteDotGit) TestObjectsNoFolder(c *C) { tmp, err := ioutil.TempDir("", "dot-git") c.Assert(err, IsNil) @@ -835,3 +852,21 @@ type norwfs struct { func (f *norwfs) Capabilities() billy.Capability { return billy.Capabilities(f.Filesystem) &^ billy.ReadAndWriteCapability } + +func (s *SuiteDotGit) TestIncBytes(c *C) { + tests := []struct { + in []byte + out []byte + overflow bool + }{ + {[]byte{0}, []byte{1}, false}, + {[]byte{0xff}, []byte{0}, true}, + {[]byte{7, 0xff}, []byte{8, 0}, false}, + {[]byte{0xff, 0xff}, []byte{0, 0}, true}, + } + for _, test := range tests { + out, overflow := incBytes(test.in) + c.Assert(out, DeepEquals, test.out) + c.Assert(overflow, Equals, test.overflow) + } +} diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 7437174..0c25dad 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -1,6 +1,7 @@ package filesystem import ( + "bytes" "io" "os" "time" @@ -518,6 +519,36 @@ func (s *ObjectStorage) findObjectInPackfile(h plumbing.Hash) (plumbing.Hash, pl return plumbing.ZeroHash, plumbing.ZeroHash, -1 } +func (s *ObjectStorage) HashesWithPrefix(prefix []byte) ([]plumbing.Hash, error) { + hashes, err := s.dir.ObjectsWithPrefix(prefix) + if err != nil { + return nil, err + } + + // TODO: This could be faster with some idxfile changes, + // or diving into the packfile. + for _, index := range s.index { + ei, err := index.Entries() + if err != nil { + return nil, err + } + for { + e, err := ei.Next() + if err == io.EOF { + break + } else if err != nil { + return nil, err + } + if bytes.HasPrefix(e.Hash[:], prefix) { + hashes = append(hashes, e.Hash) + } + } + ei.Close() + } + + return hashes, nil +} + // IterEncodedObjects returns an iterator for all the objects in the packfile // with the given type. func (s *ObjectStorage) IterEncodedObjects(t plumbing.ObjectType) (storer.EncodedObjectIter, error) { diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go index 2d6f568..036420f 100644 --- a/storage/filesystem/object_test.go +++ b/storage/filesystem/object_test.go @@ -1,6 +1,7 @@ package filesystem import ( + "encoding/hex" "fmt" "io" "io/ioutil" @@ -332,6 +333,22 @@ func (s *FsSuite) TestGetFromObjectFileSharedCache(c *C) { c.Assert(err, Equals, plumbing.ErrObjectNotFound) } +func (s *FsSuite) TestHashesWithPrefix(c *C) { + // Same setup as TestGetFromObjectFile. + fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit() + o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) + expected := plumbing.NewHash("f3dfe29d268303fc6e1bbce268605fc99573406e") + obj, err := o.EncodedObject(plumbing.AnyObject, expected) + c.Assert(err, IsNil) + c.Assert(obj.Hash(), Equals, expected) + + prefix, _ := hex.DecodeString("f3dfe2") + hashes, err := o.HashesWithPrefix(prefix) + c.Assert(err, IsNil) + c.Assert(hashes, HasLen, 1) + c.Assert(hashes[0].String(), Equals, "f3dfe29d268303fc6e1bbce268605fc99573406e") +} + func BenchmarkPackfileIter(b *testing.B) { defer fixtures.Clean() -- cgit