From 87413ced43b02a41359ce7a1a07ab41aec6ee313 Mon Sep 17 00:00:00 2001 From: "Santiago M. Mola" Date: Tue, 25 Jul 2017 15:00:01 +0200 Subject: storage: reuse deltas from packfiles * plumbing: add DeltaObject interface for EncodedObjects that are deltas and hold additional information about them, such as the hash of the base object. * plumbing/storer: add DeltaObjectStorer interface for object storers that can return DeltaObject. Note that calls to EncodedObject will never return instances of DeltaObject. That requires explicit calls to DeltaObject. * storage/filesystem: implement DeltaObjectStorer interface. * plumbing/packfile: packfile encoder now supports reusing deltas that are already computed (e.g. from an existing packfile) if the storage implements DeltaObjectStorer. Reusing deltas boosts performance of packfile generation (e.g. on push). --- storage/filesystem/deltaobject.go | 37 +++++++++++++ storage/filesystem/object.go | 104 ++++++++++++++++++++++++++++++++++--- storage/filesystem/object_test.go | 4 +- storage/filesystem/storage_test.go | 9 ++++ 4 files changed, 145 insertions(+), 9 deletions(-) create mode 100644 storage/filesystem/deltaobject.go (limited to 'storage/filesystem') diff --git a/storage/filesystem/deltaobject.go b/storage/filesystem/deltaobject.go new file mode 100644 index 0000000..66cfb71 --- /dev/null +++ b/storage/filesystem/deltaobject.go @@ -0,0 +1,37 @@ +package filesystem + +import ( + "gopkg.in/src-d/go-git.v4/plumbing" +) + +type deltaObject struct { + plumbing.EncodedObject + base plumbing.Hash + hash plumbing.Hash + size int64 +} + +func newDeltaObject( + obj plumbing.EncodedObject, + hash plumbing.Hash, + base plumbing.Hash, + size int64) plumbing.DeltaObject { + return &deltaObject{ + EncodedObject: obj, + hash: hash, + base: base, + size: size, + } +} + +func (o *deltaObject) BaseHash() plumbing.Hash { + return o.base +} + +func (o *deltaObject) ActualSize() int64 { + return o.size +} + +func (o *deltaObject) ActualHash() plumbing.Hash { + return o.hash +} diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 6dd910b..bc2b121 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -130,7 +130,27 @@ func (s *ObjectStorage) SetEncodedObject(o plumbing.EncodedObject) (plumbing.Has func (s *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { obj, err := s.getFromUnpacked(h) if err == plumbing.ErrObjectNotFound { - obj, err = s.getFromPackfile(h) + obj, err = s.getFromPackfile(h, false) + } + + if err != nil { + return nil, err + } + + if plumbing.AnyObject != t && obj.Type() != t { + return nil, plumbing.ErrObjectNotFound + } + + return obj, nil +} + +// DeltaObject returns the object with the given hash, by searching for +// it in the packfile and the git object directories. +func (s *ObjectStorage) DeltaObject(t plumbing.ObjectType, + h plumbing.Hash) (plumbing.EncodedObject, error) { + obj, err := s.getFromUnpacked(h) + if err == plumbing.ErrObjectNotFound { + obj, err = s.getFromPackfile(h, true) } if err != nil { @@ -182,12 +202,14 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb // Get returns the object with the given hash, by searching for it in // the packfile. -func (s *ObjectStorage) getFromPackfile(h plumbing.Hash) (plumbing.EncodedObject, error) { +func (s *ObjectStorage) getFromPackfile(h plumbing.Hash, canBeDelta bool) ( + plumbing.EncodedObject, error) { + if err := s.requireIndex(); err != nil { return nil, err } - pack, offset := s.findObjectInPackfile(h) + pack, hash, offset := s.findObjectInPackfile(h) if offset == -1 { return nil, plumbing.ErrObjectNotFound } @@ -199,26 +221,94 @@ func (s *ObjectStorage) getFromPackfile(h plumbing.Hash) (plumbing.EncodedObject defer ioutil.CheckClose(f, &err) + idx := s.index[pack] + if canBeDelta { + return s.decodeDeltaObjectAt(f, idx, offset, hash) + } + + return s.decodeObjectAt(f, idx, offset) +} + +func (s *ObjectStorage) decodeObjectAt( + f billy.File, + idx *packfile.Index, + offset int64) (plumbing.EncodedObject, error) { + if _, err := f.Seek(0, io.SeekStart); err != nil { + return nil, err + } + p := packfile.NewScanner(f) + d, err := packfile.NewDecoder(p, memory.NewStorage()) if err != nil { return nil, err } + d.SetIndex(idx) d.DeltaBaseCache = s.DeltaBaseCache - d.SetIndex(s.index[pack]) obj, err := d.DecodeObjectAt(offset) return obj, err } -func (s *ObjectStorage) findObjectInPackfile(h plumbing.Hash) (plumbing.Hash, int64) { +func (s *ObjectStorage) decodeDeltaObjectAt( + f billy.File, + idx *packfile.Index, + offset int64, + hash plumbing.Hash) (plumbing.EncodedObject, error) { + if _, err := f.Seek(0, io.SeekStart); err != nil { + return nil, err + } + + p := packfile.NewScanner(f) + if _, err := p.SeekFromStart(offset); err != nil { + return nil, err + } + + header, err := p.NextObjectHeader() + if err != nil { + return nil, err + } + + var ( + base plumbing.Hash + ) + + switch header.Type { + case plumbing.REFDeltaObject: + base = header.Reference + case plumbing.OFSDeltaObject: + e, ok := idx.LookupOffset(uint64(header.OffsetReference)) + if !ok { + return nil, plumbing.ErrObjectNotFound + } + + base = e.Hash + default: + return s.decodeObjectAt(f, idx, offset) + } + + obj := &plumbing.MemoryObject{} + obj.SetType(header.Type) + w, err := obj.Writer() + if err != nil { + return nil, err + } + + if _, _, err := p.NextObject(w); err != nil { + return nil, err + } + + return newDeltaObject(obj, hash, base, header.Length), nil +} + +func (s *ObjectStorage) findObjectInPackfile(h plumbing.Hash) (plumbing.Hash, plumbing.Hash, int64) { for packfile, index := range s.index { if e, ok := index.LookupHash(h); ok { - return packfile, int64(e.Offset) + return packfile, e.Hash, int64(e.Offset) } } - return plumbing.ZeroHash, -1 + return plumbing.ZeroHash, plumbing.ZeroHash, -1 } // IterEncodedObjects returns an iterator for all the objects in the packfile diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go index d741fa2..504bd45 100644 --- a/storage/filesystem/object_test.go +++ b/storage/filesystem/object_test.go @@ -52,12 +52,12 @@ func (s *FsSuite) TestGetFromPackfileMultiplePackfiles(c *C) { c.Assert(err, IsNil) expected := plumbing.NewHash("8d45a34641d73851e01d3754320b33bb5be3c4d3") - obj, err := o.getFromPackfile(expected) + obj, err := o.getFromPackfile(expected, false) c.Assert(err, IsNil) c.Assert(obj.Hash(), Equals, expected) expected = plumbing.NewHash("e9cfa4c9ca160546efd7e8582ec77952a27b17db") - obj, err = o.getFromPackfile(expected) + obj, err = o.getFromPackfile(expected, false) c.Assert(err, IsNil) c.Assert(obj.Hash(), Equals, expected) } diff --git a/storage/filesystem/storage_test.go b/storage/filesystem/storage_test.go index 22709f5..b165c5e 100644 --- a/storage/filesystem/storage_test.go +++ b/storage/filesystem/storage_test.go @@ -4,6 +4,7 @@ import ( "io/ioutil" "testing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/test" . "gopkg.in/check.v1" @@ -25,6 +26,14 @@ func (s *StorageSuite) SetUpTest(c *C) { storage, err := NewStorage(osfs.New(s.dir)) c.Assert(err, IsNil) + // ensure that right interfaces are implemented + var _ storer.EncodedObjectStorer = storage + var _ storer.IndexStorer = storage + var _ storer.ReferenceStorer = storage + var _ storer.ShallowStorer = storage + var _ storer.DeltaObjectStorer = storage + var _ storer.PackfileWriter = storage + s.BaseStorageSuite = test.NewBaseStorageSuite(storage) s.BaseStorageSuite.SetUpTest(c) } -- cgit