diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2018-10-16 10:11:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-10-16 10:11:39 +0200 |
commit | 41d6f2c31e68a9fdcbff4a3da8c40247f1293cc9 (patch) | |
tree | 655fed8324e58a6ba44ef45f1bb2e090abe26451 | |
parent | 8153e040f68da6002096ef177a11510f4fb06769 (diff) | |
parent | 6faf286b97ff2e13fbdaf2c6179f8aef36b4498c (diff) | |
download | go-git-41d6f2c31e68a9fdcbff4a3da8c40247f1293cc9.tar.gz |
Merge pull request #982 from keybase/strib/gh-KBFS-3474-object-sizes
tree: add a Size() method for getting plaintext size
-rw-r--r-- | plumbing/format/packfile/packfile.go | 18 | ||||
-rw-r--r-- | plumbing/object/tree.go | 11 | ||||
-rw-r--r-- | plumbing/object/tree_test.go | 6 | ||||
-rw-r--r-- | plumbing/storer/object.go | 2 | ||||
-rw-r--r-- | plumbing/storer/object_test.go | 10 | ||||
-rw-r--r-- | storage/filesystem/object.go | 73 | ||||
-rw-r--r-- | storage/filesystem/object_test.go | 38 | ||||
-rw-r--r-- | storage/memory/storage.go | 10 |
8 files changed, 168 insertions, 0 deletions
diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 852a834..0d13066 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -90,6 +90,24 @@ func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { return p.nextObject() } +// GetSizeByOffset retrieves the size of the encoded object from the +// packfile with the given offset. +func (p *Packfile) GetSizeByOffset(o int64) (size int64, err error) { + if _, err := p.s.SeekFromStart(o); err != nil { + if err == io.EOF || isInvalid(err) { + return 0, plumbing.ErrObjectNotFound + } + + return 0, err + } + + h, err := p.nextObjectHeader() + if err != nil { + return 0, err + } + return h.Length, nil +} + func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) { h, err := p.s.NextObjectHeader() p.s.pendingObject = nil diff --git a/plumbing/object/tree.go b/plumbing/object/tree.go index c36a137..78d61a1 100644 --- a/plumbing/object/tree.go +++ b/plumbing/object/tree.go @@ -87,6 +87,17 @@ func (t *Tree) File(path string) (*File, error) { return NewFile(path, e.Mode, blob), nil } +// Size returns the plaintext size of an object, without reading it +// into memory. +func (t *Tree) Size(path string) (int64, error) { + e, err := t.FindEntry(path) + if err != nil { + return 0, ErrEntryNotFound + } + + return t.s.EncodedObjectSize(e.Hash) +} + // Tree returns the tree identified by the `path` argument. // The path is interpreted as relative to the tree receiver. func (t *Tree) Tree(path string) (*Tree, error) { diff --git a/plumbing/object/tree_test.go b/plumbing/object/tree_test.go index 7366421..889c63a 100644 --- a/plumbing/object/tree_test.go +++ b/plumbing/object/tree_test.go @@ -98,6 +98,12 @@ func (s *TreeSuite) TestFileFailsWithExistingTrees(c *C) { c.Assert(err, Equals, ErrFileNotFound) } +func (s *TreeSuite) TestSize(c *C) { + size, err := s.Tree.Size("LICENSE") + c.Assert(err, IsNil) + c.Assert(size, Equals, int64(1072)) +} + func (s *TreeSuite) TestFiles(c *C) { var count int err := s.Tree.Files().ForEach(func(f *File) error { diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go index 92aa629..2ac9b09 100644 --- a/plumbing/storer/object.go +++ b/plumbing/storer/object.go @@ -40,6 +40,8 @@ type EncodedObjectStorer interface { // HasEncodedObject returns ErrObjNotFound if the object doesn't // exist. If the object does exist, it returns nil. HasEncodedObject(plumbing.Hash) error + // EncodedObjectSize returns the plaintext size of the encoded object. + EncodedObjectSize(plumbing.Hash) (int64, error) } // DeltaObjectStorer is an EncodedObjectStorer that can return delta diff --git a/plumbing/storer/object_test.go b/plumbing/storer/object_test.go index 6b4fe0f..bc22f7b 100644 --- a/plumbing/storer/object_test.go +++ b/plumbing/storer/object_test.go @@ -141,6 +141,16 @@ func (o *MockObjectStorage) HasEncodedObject(h plumbing.Hash) error { return plumbing.ErrObjectNotFound } +func (o *MockObjectStorage) EncodedObjectSize(h plumbing.Hash) ( + size int64, err error) { + for _, o := range o.db { + if o.Hash() == h { + return o.Size(), nil + } + } + return 0, plumbing.ErrObjectNotFound +} + func (o *MockObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { for _, o := range o.db { if o.Hash() == h { diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 68bd140..6cd2d4c 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -160,6 +160,79 @@ func (s *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) { return nil } +func (s *ObjectStorage) encodedObjectSizeFromUnpacked(h plumbing.Hash) ( + size int64, err error) { + f, err := s.dir.Object(h) + if err != nil { + if os.IsNotExist(err) { + return 0, plumbing.ErrObjectNotFound + } + + return 0, err + } + + r, err := objfile.NewReader(f) + if err != nil { + return 0, err + } + defer ioutil.CheckClose(r, &err) + + _, size, err = r.Header() + return size, err +} + +func (s *ObjectStorage) encodedObjectSizeFromPackfile(h plumbing.Hash) ( + size int64, err error) { + if err := s.requireIndex(); err != nil { + return 0, err + } + + pack, _, offset := s.findObjectInPackfile(h) + if offset == -1 { + return 0, plumbing.ErrObjectNotFound + } + + f, err := s.dir.ObjectPack(pack) + if err != nil { + return 0, err + } + defer ioutil.CheckClose(f, &err) + + idx := s.index[pack] + hash, err := idx.FindHash(offset) + if err == nil { + obj, ok := s.deltaBaseCache.Get(hash) + if ok { + return obj.Size(), nil + } + } else if err != nil && err != plumbing.ErrObjectNotFound { + return 0, err + } + + var p *packfile.Packfile + if s.deltaBaseCache != nil { + p = packfile.NewPackfileWithCache(idx, s.dir.Fs(), f, s.deltaBaseCache) + } else { + p = packfile.NewPackfile(idx, s.dir.Fs(), f) + } + + return p.GetSizeByOffset(offset) +} + +// EncodedObjectSize returns the plaintext size of the given object, +// without actually reading the full object data from storage. +func (s *ObjectStorage) EncodedObjectSize(h plumbing.Hash) ( + size int64, err error) { + size, err = s.encodedObjectSizeFromUnpacked(h) + if err != nil && err != plumbing.ErrObjectNotFound { + return 0, err + } else if err == nil { + return size, nil + } + + return s.encodedObjectSizeFromPackfile(h) +} + // EncodedObject returns the object with the given hash, by searching for it in // the packfile and the git object directories. func (s *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go index 407abf2..4e6bbfb 100644 --- a/storage/filesystem/object_test.go +++ b/storage/filesystem/object_test.go @@ -83,6 +83,44 @@ func (s *FsSuite) TestGetFromPackfileKeepDescriptors(c *C) { }) } +func (s *FsSuite) TestGetSizeOfObjectFile(c *C) { + fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit() + o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) + + // Get the size of `tree_walker.go`. + expected := plumbing.NewHash("cbd81c47be12341eb1185b379d1c82675aeded6a") + size, err := o.EncodedObjectSize(expected) + c.Assert(err, IsNil) + c.Assert(size, Equals, int64(2412)) +} + +func (s *FsSuite) TestGetSizeFromPackfile(c *C) { + fixtures.Basic().ByTag(".git").Test(c, func(f *fixtures.Fixture) { + fs := f.DotGit() + o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) + + // Get the size of `binary.jpg`. + expected := plumbing.NewHash("d5c0f4ab811897cadf03aec358ae60d21f91c50d") + size, err := o.EncodedObjectSize(expected) + c.Assert(err, IsNil) + c.Assert(size, Equals, int64(76110)) + }) +} + +func (s *FsSuite) TestGetSizeOfAllObjectFiles(c *C) { + fs := fixtures.ByTag(".git").One().DotGit() + o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) + + // Get the size of `tree_walker.go`. + err := o.ForEachObjectHash(func(h plumbing.Hash) error { + size, err := o.EncodedObjectSize(h) + c.Assert(err, IsNil) + c.Assert(size, Not(Equals), int64(0)) + return nil + }) + c.Assert(err, IsNil) +} + func (s *FsSuite) TestGetFromPackfileMultiplePackfiles(c *C) { fs := fixtures.ByTag(".git").ByTag("multi-packfile").One().DotGit() o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) diff --git a/storage/memory/storage.go b/storage/memory/storage.go index 2e32509..6e11742 100644 --- a/storage/memory/storage.go +++ b/storage/memory/storage.go @@ -122,6 +122,16 @@ func (o *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) { return nil } +func (o *ObjectStorage) EncodedObjectSize(h plumbing.Hash) ( + size int64, err error) { + obj, ok := o.Objects[h] + if !ok { + return 0, plumbing.ErrObjectNotFound + } + + return obj.Size(), nil +} + func (o *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { obj, ok := o.Objects[h] if !ok || (plumbing.AnyObject != t && obj.Type() != t) { |