Merge pull request #136 from dsymonds/revparse-partial-hash

Repository.ResolveRevision, support partial hashes
author: Máximo Cuadros <mcuadros@gmail.com> 2020-07-16 21:28:40 +0200
committer: GitHub <noreply@github.com> 2020-07-16 21:28:40 +0200
commit: 7906922c319b6077e8e66b70711073fb59a08a20 (patch)
tree: 5785565db48069ff8f173b9dddcbb1b4c1f7e8ff
parent: 41758ec4b81c557092d7566c3eed46f89c1ec3cc (diff)
parent: e28d9c90aad624596d20d1a59c8371d81b69190b (diff)
download: go-git-7906922c319b6077e8e66b70711073fb59a08a20.tar.gz
8 files changed, 234 insertions, 10 deletions
diff --git a/internal/revision/parser.go b/internal/revision/parser.go
index 61de386..3efe5d7 100644
--- a/internal/revision/parser.go
+++ b/internal/revision/parser.go
@@ -28,7 +28,7 @@ func (e *ErrInvalidRevision) Error() string {
 type Revisioner interface {
 }
 
-// Ref represents a reference name : HEAD, master
+// Ref represents a reference name : HEAD, master, <hash>
 type Ref string
 
 // TildePath represents ~, ~{n}
diff --git a/internal/revision/parser_test.go b/internal/revision/parser_test.go
index 4bb007f..98403cc 100644
--- a/internal/revision/parser_test.go
+++ b/internal/revision/parser_test.go
@@ -354,6 +354,7 @@ func (s *ParserSuite) TestParseRefWithValidName(c *C) {
 		"refs/remotes/test",
 		"refs/remotes/origin/HEAD",
 		"refs/remotes/origin/master",
+		"0123abcd", // short hash
 	}
 
 	for _, d := range datas {
diff --git a/repository.go b/repository.go
index 172029c..66c0918 100644
--- a/repository.go
+++ b/repository.go
@@ -3,6 +3,7 @@ package git
 import (
 	"bytes"
 	"context"
+	"encoding/hex"
 	"errors"
 	"fmt"
 	"io"
@@ -1426,7 +1427,7 @@ func (r *Repository) Worktree() (*Worktree, error) {
 // resolve to a commit hash, not a tree or annotated tag.
 //
 // Implemented resolvers : HEAD, branch, tag, heads/branch, refs/heads/branch,
-// refs/tags/tag, refs/remotes/origin/branch, refs/remotes/origin/HEAD, tilde and caret (HEAD~1, master~^, tag~2, ref/heads/master~1, ...), selection by text (HEAD^{/fix nasty bug})
+// refs/tags/tag, refs/remotes/origin/branch, refs/remotes/origin/HEAD, tilde and caret (HEAD~1, master~^, tag~2, ref/heads/master~1, ...), selection by text (HEAD^{/fix nasty bug}), hash (prefix and full)
 func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, error) {
 	p := revision.NewParserFromString(string(rev))
 
@@ -1445,11 +1446,7 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err
 
 			var tryHashes []plumbing.Hash
 
-			maybeHash := plumbing.NewHash(string(revisionRef))
-
-			if !maybeHash.IsZero() {
-				tryHashes = append(tryHashes, maybeHash)
-			}
+			tryHashes = append(tryHashes, r.resolveHashPrefix(string(revisionRef))...)
 
 			for _, rule := range append([]string{"%s"}, plumbing.RefRevParseRules...) {
 				ref, err := storer.ResolveReference(r.Storer, plumbing.ReferenceName(fmt.Sprintf(rule, revisionRef)))
@@ -1567,6 +1564,49 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err
 	return &commit.Hash, nil
 }
 
+// resolveHashPrefix returns a list of potential hashes that the given string
+// is a prefix of. It quietly swallows errors, returning nil.
+func (r *Repository) resolveHashPrefix(hashStr string) []plumbing.Hash {
+	// Handle complete and partial hashes.
+	// plumbing.NewHash forces args into a full 20 byte hash, which isn't suitable
+	// for partial hashes since they will become zero-filled.
+
+	if hashStr == "" {
+		return nil
+	}
+	if len(hashStr) == len(plumbing.ZeroHash)*2 {
+		// Only a full hash is possible.
+		hexb, err := hex.DecodeString(hashStr)
+		if err != nil {
+			return nil
+		}
+		var h plumbing.Hash
+		copy(h[:], hexb)
+		return []plumbing.Hash{h}
+	}
+
+	// Partial hash.
+	// hex.DecodeString only decodes to complete bytes, so only works with pairs of hex digits.
+	evenHex := hashStr[:len(hashStr)&^1]
+	hexb, err := hex.DecodeString(evenHex)
+	if err != nil {
+		return nil
+	}
+	candidates := expandPartialHash(r.Storer, hexb)
+	if len(evenHex) == len(hashStr) {
+		// The prefix was an exact number of bytes.
+		return candidates
+	}
+	// Do another prefix check to ensure the dangling nybble is correct.
+	var hashes []plumbing.Hash
+	for _, h := range candidates {
+		if strings.HasPrefix(h.String(), hashStr) {
+			hashes = append(hashes, h)
+		}
+	}
+	return hashes
+}
+
 type RepackConfig struct {
 	// UseRefDeltas configures whether packfile encoder will use reference deltas.
 	// By default OFSDeltaObject is used.
@@ -1659,3 +1699,31 @@ func (r *Repository) createNewObjectPack(cfg *RepackConfig) (h plumbing.Hash, er
 
 	return h, err
 }
+
+func expandPartialHash(st storer.EncodedObjectStorer, prefix []byte) (hashes []plumbing.Hash) {
+	// The fast version is implemented by storage/filesystem.ObjectStorage.
+	type fastIter interface {
+		HashesWithPrefix(prefix []byte) ([]plumbing.Hash, error)
+	}
+	if fi, ok := st.(fastIter); ok {
+		h, err := fi.HashesWithPrefix(prefix)
+		if err != nil {
+			return nil
+		}
+		return h
+	}
+
+	// Slow path.
+	iter, err := st.IterEncodedObjects(plumbing.AnyObject)
+	if err != nil {
+		return nil
+	}
+	iter.ForEach(func(obj plumbing.EncodedObject) error {
+		h := obj.Hash()
+		if bytes.HasPrefix(h[:], prefix) {
+			hashes = append(hashes, h)
+		}
+		return nil
+	})
+	return
+}
diff --git a/repository_test.go b/repository_test.go
index bbed604..112561e 100644
--- a/repository_test.go
+++ b/repository_test.go
@@ -2642,6 +2642,7 @@ func (s *RepositorySuite) TestResolveRevision(c *C) {
 		"v1.0.0~1":                   "918c48b83bd081e863dbe1b80f8998f058cd8294",
 		"master~1":                   "918c48b83bd081e863dbe1b80f8998f058cd8294",
 		"918c48b83bd081e863dbe1b80f8998f058cd8294": "918c48b83bd081e863dbe1b80f8998f058cd8294",
+		"918c48b": "918c48b83bd081e863dbe1b80f8998f058cd8294", // odd number of hex digits
 	}
 
 	for rev, hash := range datas {
diff --git a/storage/filesystem/dotgit/dotgit.go b/storage/filesystem/dotgit/dotgit.go
index 3840ea7..6c386f7 100644
--- a/storage/filesystem/dotgit/dotgit.go
+++ b/storage/filesystem/dotgit/dotgit.go
@@ -3,12 +3,14 @@ package dotgit
 
 import (
 	"bufio"
+	"bytes"
 	"errors"
 	"fmt"
 	"io"
 	stdioutil "io/ioutil"
 	"os"
 	"path/filepath"
+	"sort"
 	"strings"
 	"time"
 
@@ -88,7 +90,7 @@ type DotGit struct {
 	incomingChecked bool
 	incomingDirName string
 
-	objectList []plumbing.Hash
+	objectList []plumbing.Hash // sorted
 	objectMap  map[plumbing.Hash]struct{}
 	packList   []plumbing.Hash
 	packMap    map[plumbing.Hash]struct{}
@@ -336,6 +338,53 @@ func (d *DotGit) NewObject() (*ObjectWriter, error) {
 	return newObjectWriter(d.fs)
 }
 
+// ObjectsWithPrefix returns the hashes of objects that have the given prefix.
+func (d *DotGit) ObjectsWithPrefix(prefix []byte) ([]plumbing.Hash, error) {
+	// Handle edge cases.
+	if len(prefix) < 1 {
+		return d.Objects()
+	} else if len(prefix) > len(plumbing.ZeroHash) {
+		return nil, nil
+	}
+
+	if d.options.ExclusiveAccess {
+		err := d.genObjectList()
+		if err != nil {
+			return nil, err
+		}
+
+		// Rely on d.objectList being sorted.
+		// Figure out the half-open interval defined by the prefix.
+		first := sort.Search(len(d.objectList), func(i int) bool {
+			// Same as plumbing.HashSlice.Less.
+			return bytes.Compare(d.objectList[i][:], prefix) >= 0
+		})
+		lim := len(d.objectList)
+		if limPrefix, overflow := incBytes(prefix); !overflow {
+			lim = sort.Search(len(d.objectList), func(i int) bool {
+				// Same as plumbing.HashSlice.Less.
+				return bytes.Compare(d.objectList[i][:], limPrefix) >= 0
+			})
+		}
+		return d.objectList[first:lim], nil
+	}
+
+	// This is the slow path.
+	var objects []plumbing.Hash
+	var n int
+	err := d.ForEachObjectHash(func(hash plumbing.Hash) error {
+		n++
+		if bytes.HasPrefix(hash[:], prefix) {
+			objects = append(objects, hash)
+		}
+		return nil
+	})
+	if err != nil {
+		return nil, err
+	}
+	return objects, nil
+}
+
 // Objects returns a slice with the hashes of objects found under the
 // .git/objects/ directory.
 func (d *DotGit) Objects() ([]plumbing.Hash, error) {
@@ -427,12 +476,17 @@ func (d *DotGit) genObjectList() error {
 	}
 
 	d.objectMap = make(map[plumbing.Hash]struct{})
-	return d.forEachObjectHash(func(h plumbing.Hash) error {
+	populate := func(h plumbing.Hash) error {
 		d.objectList = append(d.objectList, h)
 		d.objectMap[h] = struct{}{}
 
 		return nil
-	})
+	}
+	if err := d.forEachObjectHash(populate); err != nil {
+		return err
+	}
+	plumbing.HashesSort(d.objectList)
+	return nil
 }
 
 func (d *DotGit) hasObject(h plumbing.Hash) error {
@@ -1115,3 +1169,20 @@ func isNum(b byte) bool {
 func isHexAlpha(b byte) bool {
 	return b >= 'a' && b <= 'f' || b >= 'A' && b <= 'F'
 }
+
+// incBytes increments a byte slice, which involves incrementing the
+// right-most byte, and following carry leftward.
+// It makes a copy so that the provided slice's underlying array is not modified.
+// If the overall operation overflows (e.g. incBytes(0xff, 0xff)), the second return parameter indicates that.
+func incBytes(in []byte) (out []byte, overflow bool) {
+	out = make([]byte, len(in))
+	copy(out, in)
+	for i := len(out) - 1; i >= 0; i-- {
+		out[i]++
+		if out[i] != 0 {
+			return // Didn't overflow.
+		}
+	}
+	overflow = true
+	return
+}
diff --git a/storage/filesystem/dotgit/dotgit_test.go b/storage/filesystem/dotgit/dotgit_test.go
index 0a72aa6..237605f 100644
--- a/storage/filesystem/dotgit/dotgit_test.go
+++ b/storage/filesystem/dotgit/dotgit_test.go
@@ -2,6 +2,7 @@ package dotgit
 
 import (
 	"bufio"
+	"encoding/hex"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -591,6 +592,7 @@ func (s *SuiteDotGit) TestObjects(c *C) {
 	dir := New(fs)
 
 	testObjects(c, fs, dir)
+	testObjectsWithPrefix(c, fs, dir)
 }
 
 func (s *SuiteDotGit) TestObjectsExclusive(c *C) {
@@ -598,6 +600,7 @@ func (s *SuiteDotGit) TestObjectsExclusive(c *C) {
 	dir := NewWithOptions(fs, Options{ExclusiveAccess: true})
 
 	testObjects(c, fs, dir)
+	testObjectsWithPrefix(c, fs, dir)
 }
 
 func testObjects(c *C, fs billy.Filesystem, dir *DotGit) {
@@ -609,6 +612,20 @@ func testObjects(c *C, fs billy.Filesystem, dir *DotGit) {
 	c.Assert(hashes[2].String(), Equals, "03db8e1fbe133a480f2867aac478fd866686d69e")
 }
 
+func testObjectsWithPrefix(c *C, fs billy.Filesystem, dir *DotGit) {
+	prefix, _ := hex.DecodeString("01d5")
+	hashes, err := dir.ObjectsWithPrefix(prefix)
+	c.Assert(err, IsNil)
+	c.Assert(hashes, HasLen, 1)
+	c.Assert(hashes[0].String(), Equals, "01d5fa556c33743006de7e76e67a2dfcd994ca04")
+
+	// Empty prefix should yield all objects.
+	// (subset of testObjects)
+	hashes, err = dir.ObjectsWithPrefix(nil)
+	c.Assert(err, IsNil)
+	c.Assert(hashes, HasLen, 187)
+}
+
 func (s *SuiteDotGit) TestObjectsNoFolder(c *C) {
 	tmp, err := ioutil.TempDir("", "dot-git")
 	c.Assert(err, IsNil)
@@ -835,3 +852,21 @@ type norwfs struct {
 func (f *norwfs) Capabilities() billy.Capability {
 	return billy.Capabilities(f.Filesystem) &^ billy.ReadAndWriteCapability
 }
+
+func (s *SuiteDotGit) TestIncBytes(c *C) {
+	tests := []struct {
+		in       []byte
+		out      []byte
+		overflow bool
+	}{
+		{[]byte{0}, []byte{1}, false},
+		{[]byte{0xff}, []byte{0}, true},
+		{[]byte{7, 0xff}, []byte{8, 0}, false},
+		{[]byte{0xff, 0xff}, []byte{0, 0}, true},
+	}
+	for _, test := range tests {
+		out, overflow := incBytes(test.in)
+		c.Assert(out, DeepEquals, test.out)
+		c.Assert(overflow, Equals, test.overflow)
+	}
+}
diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go
index 7437174..0c25dad 100644
--- a/storage/filesystem/object.go
+++ b/storage/filesystem/object.go
@@ -1,6 +1,7 @@
 package filesystem
 
 import (
+	"bytes"
 	"io"
 	"os"
 	"time"
@@ -518,6 +519,36 @@ func (s *ObjectStorage) findObjectInPackfile(h plumbing.Hash) (plumbing.Hash, pl
 	return plumbing.ZeroHash, plumbing.ZeroHash, -1
 }
 
+func (s *ObjectStorage) HashesWithPrefix(prefix []byte) ([]plumbing.Hash, error) {
+	hashes, err := s.dir.ObjectsWithPrefix(prefix)
+	if err != nil {
+		return nil, err
+	}
+
+	// TODO: This could be faster with some idxfile changes,
+	// or diving into the packfile.
+	for _, index := range s.index {
+		ei, err := index.Entries()
+		if err != nil {
+			return nil, err
+		}
+		for {
+			e, err := ei.Next()
+			if err == io.EOF {
+				break
+			} else if err != nil {
+				return nil, err
+			}
+			if bytes.HasPrefix(e.Hash[:], prefix) {
+				hashes = append(hashes, e.Hash)
+			}
+		}
+		ei.Close()
+	}
+
+	return hashes, nil
+}
+
 // IterEncodedObjects returns an iterator for all the objects in the packfile
 // with the given type.
 func (s *ObjectStorage) IterEncodedObjects(t plumbing.ObjectType) (storer.EncodedObjectIter, error) {
diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go
index 2d6f568..036420f 100644
--- a/storage/filesystem/object_test.go
+++ b/storage/filesystem/object_test.go
@@ -1,6 +1,7 @@
 package filesystem
 
 import (
+	"encoding/hex"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -332,6 +333,22 @@ func (s *FsSuite) TestGetFromObjectFileSharedCache(c *C) {
 	c.Assert(err, Equals, plumbing.ErrObjectNotFound)
 }
 
+func (s *FsSuite) TestHashesWithPrefix(c *C) {
+	// Same setup as TestGetFromObjectFile.
+	fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit()
+	o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault())
+	expected := plumbing.NewHash("f3dfe29d268303fc6e1bbce268605fc99573406e")
+	obj, err := o.EncodedObject(plumbing.AnyObject, expected)
+	c.Assert(err, IsNil)
+	c.Assert(obj.Hash(), Equals, expected)
+
+	prefix, _ := hex.DecodeString("f3dfe2")
+	hashes, err := o.HashesWithPrefix(prefix)
+	c.Assert(err, IsNil)
+	c.Assert(hashes, HasLen, 1)
+	c.Assert(hashes[0].String(), Equals, "f3dfe29d268303fc6e1bbce268605fc99573406e")
+}
+
 func BenchmarkPackfileIter(b *testing.B) {
 	defer fixtures.Clean()
author	Máximo Cuadros <mcuadros@gmail.com>	2020-07-16 21:28:40 +0200
committer	GitHub <noreply@github.com>	2020-07-16 21:28:40 +0200
commit	7906922c319b6077e8e66b70711073fb59a08a20 (patch)
tree	5785565db48069ff8f173b9dddcbb1b4c1f7e8ff
parent	41758ec4b81c557092d7566c3eed46f89c1ec3cc (diff)
parent	e28d9c90aad624596d20d1a59c8371d81b69190b (diff)
download	go-git-7906922c319b6077e8e66b70711073fb59a08a20.tar.gz