aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntonio Navarro Perez <antnavper@gmail.com>2017-01-12 08:46:32 +0100
committerMáximo Cuadros <mcuadros@gmail.com>2017-01-12 08:46:32 +0100
commitf9c7c8c2158140d75d4d5a2fa925fc35ad77be9b (patch)
treef0878e352cfa3f48cea2981b0202dbb494baada4
parent65d23b4620a76418dc4aeca83ce7a991a1945ef0 (diff)
downloadgo-git-f9c7c8c2158140d75d4d5a2fa925fc35ad77be9b.tar.gz
packfile/decoder: speed up packfile iterator when specific type (#200)
-rw-r--r--plumbing/format/packfile/decoder.go89
-rw-r--r--plumbing/format/packfile/decoder_test.go46
-rw-r--r--storage/filesystem/object.go38
-rw-r--r--storage/filesystem/object_test.go59
4 files changed, 204 insertions, 28 deletions
diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go
index 59a2f8d..70d41ef 100644
--- a/plumbing/format/packfile/decoder.go
+++ b/plumbing/format/packfile/decoder.go
@@ -59,6 +59,9 @@ type Decoder struct {
offsetToHash map[int64]plumbing.Hash
hashToOffset map[plumbing.Hash]int64
crcs map[plumbing.Hash]uint32
+
+ offsetToType map[int64]plumbing.ObjectType
+ decoderType plumbing.ObjectType
}
// NewDecoder returns a new Decoder that decodes a Packfile using the given
@@ -72,6 +75,22 @@ type Decoder struct {
// If the ObjectStorer implements storer.Transactioner, a transaction is created
// during the Decode execution, if something fails the Rollback is called
func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) {
+ return NewDecoderForType(s, o, plumbing.AnyObject)
+}
+
+// NewDecoderForType returns a new Decoder but in this case for a specific object type.
+// When an object is read using this Decoder instance and it is not of the same type of
+// the specified one, nil will be returned. This is intended to avoid the content
+// deserialization of all the objects
+func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,
+ t plumbing.ObjectType) (*Decoder, error) {
+
+ if t == plumbing.OFSDeltaObject ||
+ t == plumbing.REFDeltaObject ||
+ t == plumbing.InvalidObject {
+ return nil, plumbing.ErrInvalidType
+ }
+
if !canResolveDeltas(s, o) {
return nil, ErrResolveDeltasNotSupported
}
@@ -83,6 +102,9 @@ func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) {
offsetToHash: make(map[int64]plumbing.Hash, 0),
hashToOffset: make(map[plumbing.Hash]int64, 0),
crcs: make(map[plumbing.Hash]uint32, 0),
+
+ offsetToType: make(map[int64]plumbing.ObjectType, 0),
+ decoderType: t,
}, nil
}
@@ -174,17 +196,82 @@ func (d *Decoder) decodeObjectsWithObjectStorerTx(count int) error {
// DecodeObject reads the next object from the scanner and returns it. This
// method can be used in replacement of the Decode method, to work in a
-// interative way
+// interactive way. If you created a new decoder instance using NewDecoderForType
+// constructor, if the object decoded is not equals to the specified one, nil will
+// be returned
func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) {
h, err := d.s.NextObjectHeader()
if err != nil {
return nil, err
}
+ if d.decoderType == plumbing.AnyObject {
+ return d.decodeByHeader(h)
+ }
+
+ return d.decodeIfSpecificType(h)
+}
+
+func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) {
+ var realType plumbing.ObjectType
+ var err error
+ switch h.Type {
+ case plumbing.OFSDeltaObject:
+ realType, err = d.ofsDeltaType(h.OffsetReference)
+ case plumbing.REFDeltaObject:
+ realType, err = d.refDeltaType(h.Reference)
+
+ // If a reference delta is not found, it means that it isn't of
+ // the type we are looking for, because we don't have any reference
+ // and it is not present into the object storer
+ if err == plumbing.ErrObjectNotFound {
+ return nil, nil
+ }
+ default:
+ realType = h.Type
+ }
+
+ if err != nil {
+ return nil, err
+ }
+
+ d.offsetToType[h.Offset] = realType
+
+ if d.decoderType == realType {
+ return d.decodeByHeader(h)
+ }
+
+ return nil, nil
+}
+
+func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) {
+ t, ok := d.offsetToType[offset]
+ if !ok {
+ return plumbing.InvalidObject, plumbing.ErrObjectNotFound
+ }
+
+ return t, nil
+}
+
+func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) {
+ if o, ok := d.hashToOffset[ref]; ok {
+ return d.ofsDeltaType(o)
+ }
+
+ obj, err := d.o.EncodedObject(plumbing.AnyObject, ref)
+ if err != nil {
+ return plumbing.InvalidObject, err
+ }
+
+ return obj.Type(), nil
+}
+
+func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) {
obj := d.newObject()
obj.SetSize(h.Length)
obj.SetType(h.Type)
var crc uint32
+ var err error
switch h.Type {
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
crc, err = d.fillRegularObjectContent(obj)
diff --git a/plumbing/format/packfile/decoder_test.go b/plumbing/format/packfile/decoder_test.go
index 10d3bc2..0ab5bc8 100644
--- a/plumbing/format/packfile/decoder_test.go
+++ b/plumbing/format/packfile/decoder_test.go
@@ -47,6 +47,52 @@ func (s *ReaderSuite) TestDecode(c *C) {
})
}
+func (s *ReaderSuite) TestDecodeByType(c *C) {
+ ts := []plumbing.ObjectType{
+ plumbing.CommitObject,
+ plumbing.TagObject,
+ plumbing.TreeObject,
+ plumbing.BlobObject,
+ }
+
+ fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
+ for _, t := range ts {
+ storage := memory.NewStorage()
+ scanner := packfile.NewScanner(f.Packfile())
+ d, err := packfile.NewDecoderForType(scanner, storage, t)
+ c.Assert(err, IsNil)
+ defer d.Close()
+
+ _, count, err := scanner.Header()
+ c.Assert(err, IsNil)
+
+ var i uint32
+ for i = 0; i < count; i++ {
+ obj, err := d.DecodeObject()
+ c.Assert(err, IsNil)
+
+ if obj != nil {
+ c.Assert(obj.Type(), Equals, t)
+ }
+ }
+ }
+ })
+}
+func (s *ReaderSuite) TestDecodeByTypeConstructor(c *C) {
+ f := fixtures.Basic().ByTag("packfile").One()
+ storage := memory.NewStorage()
+ scanner := packfile.NewScanner(f.Packfile())
+
+ _, err := packfile.NewDecoderForType(scanner, storage, plumbing.OFSDeltaObject)
+ c.Assert(err, Equals, plumbing.ErrInvalidType)
+
+ _, err = packfile.NewDecoderForType(scanner, storage, plumbing.REFDeltaObject)
+ c.Assert(err, Equals, plumbing.ErrInvalidType)
+
+ _, err = packfile.NewDecoderForType(scanner, storage, plumbing.InvalidObject)
+ c.Assert(err, Equals, plumbing.ErrInvalidType)
+}
+
func (s *ReaderSuite) TestDecodeMultipleTimes(c *C) {
f := fixtures.Basic().ByTag("packfile").One()
scanner := packfile.NewScanner(f.Packfile())
diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go
index e9b5bb7..1001032 100644
--- a/storage/filesystem/object.go
+++ b/storage/filesystem/object.go
@@ -271,6 +271,10 @@ type packfileIter struct {
total uint32
}
+func NewPackfileIter(f billy.File, t plumbing.ObjectType) (storer.EncodedObjectIter, error) {
+ return newPackfileIter(f, t, make(map[plumbing.Hash]bool))
+}
+
func newPackfileIter(f billy.File, t plumbing.ObjectType, seen map[plumbing.Hash]bool) (storer.EncodedObjectIter, error) {
s := packfile.NewScanner(f)
_, total, err := s.Header()
@@ -278,7 +282,7 @@ func newPackfileIter(f billy.File, t plumbing.ObjectType, seen map[plumbing.Hash
return nil, err
}
- d, err := packfile.NewDecoder(s, memory.NewStorage())
+ d, err := packfile.NewDecoderForType(s, memory.NewStorage(), t)
if err != nil {
return nil, err
}
@@ -294,25 +298,27 @@ func newPackfileIter(f billy.File, t plumbing.ObjectType, seen map[plumbing.Hash
}
func (iter *packfileIter) Next() (plumbing.EncodedObject, error) {
- if iter.position >= iter.total {
- return nil, io.EOF
- }
+ for {
+ if iter.position >= iter.total {
+ return nil, io.EOF
+ }
- obj, err := iter.d.DecodeObject()
- if err != nil {
- return nil, err
- }
+ obj, err := iter.d.DecodeObject()
+ if err != nil {
+ return nil, err
+ }
- iter.position++
- if iter.seen[obj.Hash()] {
- return iter.Next()
- }
+ iter.position++
+ if obj == nil {
+ continue
+ }
- if iter.t != plumbing.AnyObject && iter.t != obj.Type() {
- return iter.Next()
- }
+ if iter.seen[obj.Hash()] {
+ return iter.Next()
+ }
- return obj, nil
+ return obj, nil
+ }
}
// ForEach is never called since is used inside of a MultiObjectIterator
diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go
index a3a5e68..6841ac4 100644
--- a/storage/filesystem/object_test.go
+++ b/storage/filesystem/object_test.go
@@ -10,9 +10,17 @@ import (
type FsSuite struct {
fixtures.Suite
+ Types []plumbing.ObjectType
}
-var _ = Suite(&FsSuite{})
+var _ = Suite(&FsSuite{
+ Types: []plumbing.ObjectType{
+ plumbing.CommitObject,
+ plumbing.TagObject,
+ plumbing.TreeObject,
+ plumbing.BlobObject,
+ },
+})
func (s *FsSuite) TestGetFromObjectFile(c *C) {
fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit()
@@ -76,18 +84,47 @@ func (s *FsSuite) TestIter(c *C) {
func (s *FsSuite) TestIterWithType(c *C) {
fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) {
- fs := f.DotGit()
- o, err := newObjectStorage(dotgit.New(fs))
- c.Assert(err, IsNil)
+ for _, t := range s.Types {
+ fs := f.DotGit()
+ o, err := newObjectStorage(dotgit.New(fs))
+ c.Assert(err, IsNil)
- iter, err := o.IterEncodedObjects(plumbing.CommitObject)
- c.Assert(err, IsNil)
+ iter, err := o.IterEncodedObjects(t)
+ c.Assert(err, IsNil)
- err = iter.ForEach(func(o plumbing.EncodedObject) error {
- c.Assert(o.Type(), Equals, plumbing.CommitObject)
- return nil
- })
+ err = iter.ForEach(func(o plumbing.EncodedObject) error {
+ c.Assert(o.Type(), Equals, t)
+ return nil
+ })
+
+ c.Assert(err, IsNil)
+ }
- c.Assert(err, IsNil)
})
}
+
+func (s *FsSuite) TestPackfileIter(c *C) {
+ fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) {
+ fs := f.DotGit()
+ dg := dotgit.New(fs)
+
+ for _, t := range s.Types {
+ ph, err := dg.ObjectPacks()
+ c.Assert(err, IsNil)
+
+ for _, h := range ph {
+ f, err := dg.ObjectPack(h)
+ c.Assert(err, IsNil)
+ iter, err := NewPackfileIter(f, t)
+ c.Assert(err, IsNil)
+ err = iter.ForEach(func(o plumbing.EncodedObject) error {
+ c.Assert(o.Type(), Equals, t)
+ return nil
+ })
+
+ c.Assert(err, IsNil)
+ }
+ }
+ })
+
+}