aboutsummaryrefslogtreecommitdiffstats
path: root/storage/filesystem
diff options
context:
space:
mode:
authorDavid Symonds <dsymonds@golang.org>2020-07-15 13:52:04 +1000
committerDavid Symonds <dsymonds@golang.org>2020-07-16 12:53:37 +1000
commite28d9c90aad624596d20d1a59c8371d81b69190b (patch)
tree5785565db48069ff8f173b9dddcbb1b4c1f7e8ff /storage/filesystem
parent41758ec4b81c557092d7566c3eed46f89c1ec3cc (diff)
downloadgo-git-e28d9c90aad624596d20d1a59c8371d81b69190b.tar.gz
Support partial hashes in Repository.ResolveRevision.
Like `git rev-parse <prefix>`, this enumerates the hashes of objects with the given prefix and adds them to the list of candidates for resolution. This has an exhaustive slow path, which requires enumerating all objects and filtering each one, but also a couple of fast paths for common cases. There's room for future work to make this faster; TODOs have been left for that. Fixes #135.
Diffstat (limited to 'storage/filesystem')
-rw-r--r--storage/filesystem/dotgit/dotgit.go77
-rw-r--r--storage/filesystem/dotgit/dotgit_test.go35
-rw-r--r--storage/filesystem/object.go31
-rw-r--r--storage/filesystem/object_test.go17
4 files changed, 157 insertions, 3 deletions
diff --git a/storage/filesystem/dotgit/dotgit.go b/storage/filesystem/dotgit/dotgit.go
index 3840ea7..6c386f7 100644
--- a/storage/filesystem/dotgit/dotgit.go
+++ b/storage/filesystem/dotgit/dotgit.go
@@ -3,12 +3,14 @@ package dotgit
import (
"bufio"
+ "bytes"
"errors"
"fmt"
"io"
stdioutil "io/ioutil"
"os"
"path/filepath"
+ "sort"
"strings"
"time"
@@ -88,7 +90,7 @@ type DotGit struct {
incomingChecked bool
incomingDirName string
- objectList []plumbing.Hash
+ objectList []plumbing.Hash // sorted
objectMap map[plumbing.Hash]struct{}
packList []plumbing.Hash
packMap map[plumbing.Hash]struct{}
@@ -336,6 +338,53 @@ func (d *DotGit) NewObject() (*ObjectWriter, error) {
return newObjectWriter(d.fs)
}
+// ObjectsWithPrefix returns the hashes of objects that have the given prefix.
+func (d *DotGit) ObjectsWithPrefix(prefix []byte) ([]plumbing.Hash, error) {
+ // Handle edge cases.
+ if len(prefix) < 1 {
+ return d.Objects()
+ } else if len(prefix) > len(plumbing.ZeroHash) {
+ return nil, nil
+ }
+
+ if d.options.ExclusiveAccess {
+ err := d.genObjectList()
+ if err != nil {
+ return nil, err
+ }
+
+ // Rely on d.objectList being sorted.
+ // Figure out the half-open interval defined by the prefix.
+ first := sort.Search(len(d.objectList), func(i int) bool {
+ // Same as plumbing.HashSlice.Less.
+ return bytes.Compare(d.objectList[i][:], prefix) >= 0
+ })
+ lim := len(d.objectList)
+ if limPrefix, overflow := incBytes(prefix); !overflow {
+ lim = sort.Search(len(d.objectList), func(i int) bool {
+ // Same as plumbing.HashSlice.Less.
+ return bytes.Compare(d.objectList[i][:], limPrefix) >= 0
+ })
+ }
+ return d.objectList[first:lim], nil
+ }
+
+ // This is the slow path.
+ var objects []plumbing.Hash
+ var n int
+ err := d.ForEachObjectHash(func(hash plumbing.Hash) error {
+ n++
+ if bytes.HasPrefix(hash[:], prefix) {
+ objects = append(objects, hash)
+ }
+ return nil
+ })
+ if err != nil {
+ return nil, err
+ }
+ return objects, nil
+}
+
// Objects returns a slice with the hashes of objects found under the
// .git/objects/ directory.
func (d *DotGit) Objects() ([]plumbing.Hash, error) {
@@ -427,12 +476,17 @@ func (d *DotGit) genObjectList() error {
}
d.objectMap = make(map[plumbing.Hash]struct{})
- return d.forEachObjectHash(func(h plumbing.Hash) error {
+ populate := func(h plumbing.Hash) error {
d.objectList = append(d.objectList, h)
d.objectMap[h] = struct{}{}
return nil
- })
+ }
+ if err := d.forEachObjectHash(populate); err != nil {
+ return err
+ }
+ plumbing.HashesSort(d.objectList)
+ return nil
}
func (d *DotGit) hasObject(h plumbing.Hash) error {
@@ -1115,3 +1169,20 @@ func isNum(b byte) bool {
func isHexAlpha(b byte) bool {
return b >= 'a' && b <= 'f' || b >= 'A' && b <= 'F'
}
+
+// incBytes increments a byte slice, which involves incrementing the
+// right-most byte, and following carry leftward.
+// It makes a copy so that the provided slice's underlying array is not modified.
+// If the overall operation overflows (e.g. incBytes(0xff, 0xff)), the second return parameter indicates that.
+func incBytes(in []byte) (out []byte, overflow bool) {
+ out = make([]byte, len(in))
+ copy(out, in)
+ for i := len(out) - 1; i >= 0; i-- {
+ out[i]++
+ if out[i] != 0 {
+ return // Didn't overflow.
+ }
+ }
+ overflow = true
+ return
+}
diff --git a/storage/filesystem/dotgit/dotgit_test.go b/storage/filesystem/dotgit/dotgit_test.go
index 0a72aa6..237605f 100644
--- a/storage/filesystem/dotgit/dotgit_test.go
+++ b/storage/filesystem/dotgit/dotgit_test.go
@@ -2,6 +2,7 @@ package dotgit
import (
"bufio"
+ "encoding/hex"
"io/ioutil"
"os"
"path/filepath"
@@ -591,6 +592,7 @@ func (s *SuiteDotGit) TestObjects(c *C) {
dir := New(fs)
testObjects(c, fs, dir)
+ testObjectsWithPrefix(c, fs, dir)
}
func (s *SuiteDotGit) TestObjectsExclusive(c *C) {
@@ -598,6 +600,7 @@ func (s *SuiteDotGit) TestObjectsExclusive(c *C) {
dir := NewWithOptions(fs, Options{ExclusiveAccess: true})
testObjects(c, fs, dir)
+ testObjectsWithPrefix(c, fs, dir)
}
func testObjects(c *C, fs billy.Filesystem, dir *DotGit) {
@@ -609,6 +612,20 @@ func testObjects(c *C, fs billy.Filesystem, dir *DotGit) {
c.Assert(hashes[2].String(), Equals, "03db8e1fbe133a480f2867aac478fd866686d69e")
}
+func testObjectsWithPrefix(c *C, fs billy.Filesystem, dir *DotGit) {
+ prefix, _ := hex.DecodeString("01d5")
+ hashes, err := dir.ObjectsWithPrefix(prefix)
+ c.Assert(err, IsNil)
+ c.Assert(hashes, HasLen, 1)
+ c.Assert(hashes[0].String(), Equals, "01d5fa556c33743006de7e76e67a2dfcd994ca04")
+
+ // Empty prefix should yield all objects.
+ // (subset of testObjects)
+ hashes, err = dir.ObjectsWithPrefix(nil)
+ c.Assert(err, IsNil)
+ c.Assert(hashes, HasLen, 187)
+}
+
func (s *SuiteDotGit) TestObjectsNoFolder(c *C) {
tmp, err := ioutil.TempDir("", "dot-git")
c.Assert(err, IsNil)
@@ -835,3 +852,21 @@ type norwfs struct {
func (f *norwfs) Capabilities() billy.Capability {
return billy.Capabilities(f.Filesystem) &^ billy.ReadAndWriteCapability
}
+
+func (s *SuiteDotGit) TestIncBytes(c *C) {
+ tests := []struct {
+ in []byte
+ out []byte
+ overflow bool
+ }{
+ {[]byte{0}, []byte{1}, false},
+ {[]byte{0xff}, []byte{0}, true},
+ {[]byte{7, 0xff}, []byte{8, 0}, false},
+ {[]byte{0xff, 0xff}, []byte{0, 0}, true},
+ }
+ for _, test := range tests {
+ out, overflow := incBytes(test.in)
+ c.Assert(out, DeepEquals, test.out)
+ c.Assert(overflow, Equals, test.overflow)
+ }
+}
diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go
index 7437174..0c25dad 100644
--- a/storage/filesystem/object.go
+++ b/storage/filesystem/object.go
@@ -1,6 +1,7 @@
package filesystem
import (
+ "bytes"
"io"
"os"
"time"
@@ -518,6 +519,36 @@ func (s *ObjectStorage) findObjectInPackfile(h plumbing.Hash) (plumbing.Hash, pl
return plumbing.ZeroHash, plumbing.ZeroHash, -1
}
+func (s *ObjectStorage) HashesWithPrefix(prefix []byte) ([]plumbing.Hash, error) {
+ hashes, err := s.dir.ObjectsWithPrefix(prefix)
+ if err != nil {
+ return nil, err
+ }
+
+ // TODO: This could be faster with some idxfile changes,
+ // or diving into the packfile.
+ for _, index := range s.index {
+ ei, err := index.Entries()
+ if err != nil {
+ return nil, err
+ }
+ for {
+ e, err := ei.Next()
+ if err == io.EOF {
+ break
+ } else if err != nil {
+ return nil, err
+ }
+ if bytes.HasPrefix(e.Hash[:], prefix) {
+ hashes = append(hashes, e.Hash)
+ }
+ }
+ ei.Close()
+ }
+
+ return hashes, nil
+}
+
// IterEncodedObjects returns an iterator for all the objects in the packfile
// with the given type.
func (s *ObjectStorage) IterEncodedObjects(t plumbing.ObjectType) (storer.EncodedObjectIter, error) {
diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go
index 2d6f568..036420f 100644
--- a/storage/filesystem/object_test.go
+++ b/storage/filesystem/object_test.go
@@ -1,6 +1,7 @@
package filesystem
import (
+ "encoding/hex"
"fmt"
"io"
"io/ioutil"
@@ -332,6 +333,22 @@ func (s *FsSuite) TestGetFromObjectFileSharedCache(c *C) {
c.Assert(err, Equals, plumbing.ErrObjectNotFound)
}
+func (s *FsSuite) TestHashesWithPrefix(c *C) {
+ // Same setup as TestGetFromObjectFile.
+ fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit()
+ o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault())
+ expected := plumbing.NewHash("f3dfe29d268303fc6e1bbce268605fc99573406e")
+ obj, err := o.EncodedObject(plumbing.AnyObject, expected)
+ c.Assert(err, IsNil)
+ c.Assert(obj.Hash(), Equals, expected)
+
+ prefix, _ := hex.DecodeString("f3dfe2")
+ hashes, err := o.HashesWithPrefix(prefix)
+ c.Assert(err, IsNil)
+ c.Assert(hashes, HasLen, 1)
+ c.Assert(hashes[0].String(), Equals, "f3dfe29d268303fc6e1bbce268605fc99573406e")
+}
+
func BenchmarkPackfileIter(b *testing.B) {
defer fixtures.Clean()