diff options
author | zeripath <art27@cantab.net> | 2021-06-30 09:25:19 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-30 10:25:19 +0200 |
commit | b4368b2a2ca4103b1ff4e37c34a963127342747e (patch) | |
tree | 5a3616045c4be8e7d64706017cf4380f6937ad32 /storage | |
parent | da810275bf682d29a530ed819aff175f47bd7634 (diff) | |
download | go-git-b4368b2a2ca4103b1ff4e37c34a963127342747e.tar.gz |
plumbing: format/packfile, prevent large objects from being read into memory completely (#330)
This PR adds code to prevent large objects from being read into memory
from packfiles or the filesystem.
Objects greater than 1Mb are now no longer directly stored in the cache
or read completely into memory.
This PR differs and improves the previous broken #323 by fixing several
bugs in the reader and transparently wrapping ReaderAt as a Reader.
Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'storage')
-rw-r--r-- | storage/filesystem/dotgit/reader.go | 79 | ||||
-rw-r--r-- | storage/filesystem/object.go | 21 | ||||
-rw-r--r-- | storage/filesystem/object_test.go | 63 | ||||
-rw-r--r-- | storage/filesystem/storage.go | 3 |
4 files changed, 156 insertions, 10 deletions
diff --git a/storage/filesystem/dotgit/reader.go b/storage/filesystem/dotgit/reader.go new file mode 100644 index 0000000..a82ac94 --- /dev/null +++ b/storage/filesystem/dotgit/reader.go @@ -0,0 +1,79 @@ +package dotgit + +import ( + "fmt" + "io" + "os" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/format/objfile" + "github.com/go-git/go-git/v5/utils/ioutil" +) + +var _ (plumbing.EncodedObject) = &EncodedObject{} + +type EncodedObject struct { + dir *DotGit + h plumbing.Hash + t plumbing.ObjectType + sz int64 +} + +func (e *EncodedObject) Hash() plumbing.Hash { + return e.h +} + +func (e *EncodedObject) Reader() (io.ReadCloser, error) { + f, err := e.dir.Object(e.h) + if err != nil { + if os.IsNotExist(err) { + return nil, plumbing.ErrObjectNotFound + } + + return nil, err + } + r, err := objfile.NewReader(f) + if err != nil { + return nil, err + } + + t, size, err := r.Header() + if err != nil { + _ = r.Close() + return nil, err + } + if t != e.t { + _ = r.Close() + return nil, objfile.ErrHeader + } + if size != e.sz { + _ = r.Close() + return nil, objfile.ErrHeader + } + return ioutil.NewReadCloserWithCloser(r, f.Close), nil +} + +func (e *EncodedObject) SetType(plumbing.ObjectType) {} + +func (e *EncodedObject) Type() plumbing.ObjectType { + return e.t +} + +func (e *EncodedObject) Size() int64 { + return e.sz +} + +func (e *EncodedObject) SetSize(int64) {} + +func (e *EncodedObject) Writer() (io.WriteCloser, error) { + return nil, fmt.Errorf("Not supported") +} + +func NewEncodedObject(dir *DotGit, h plumbing.Hash, t plumbing.ObjectType, size int64) *EncodedObject { + return &EncodedObject{ + dir: dir, + h: h, + t: t, + sz: size, + } +} diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 0c25dad..5c91bcd 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -204,9 +204,9 @@ func (s *ObjectStorage) packfile(idx idxfile.Index, pack plumbing.Hash) (*packfi var p *packfile.Packfile if s.objectCache != nil { - p = packfile.NewPackfileWithCache(idx, s.dir.Fs(), f, s.objectCache) + p = packfile.NewPackfileWithCache(idx, s.dir.Fs(), f, s.objectCache, s.options.LargeObjectThreshold) } else { - p = packfile.NewPackfile(idx, s.dir.Fs(), f) + p = packfile.NewPackfile(idx, s.dir.Fs(), f, s.options.LargeObjectThreshold) } return p, s.storePackfileInCache(pack, p) @@ -389,7 +389,6 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb return cacheObj, nil } - obj = s.NewEncodedObject() r, err := objfile.NewReader(f) if err != nil { return nil, err @@ -402,6 +401,13 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb return nil, err } + if s.options.LargeObjectThreshold > 0 && size > s.options.LargeObjectThreshold { + obj = dotgit.NewEncodedObject(s.dir, h, t, size) + return obj, nil + } + + obj = s.NewEncodedObject() + obj.SetType(t) obj.SetSize(size) w, err := obj.Writer() @@ -595,6 +601,7 @@ func (s *ObjectStorage) buildPackfileIters( return newPackfileIter( s.dir.Fs(), pack, t, seen, s.index[h], s.objectCache, s.options.KeepDescriptors, + s.options.LargeObjectThreshold, ) }, }, nil @@ -684,6 +691,7 @@ func NewPackfileIter( idxFile billy.File, t plumbing.ObjectType, keepPack bool, + largeObjectThreshold int64, ) (storer.EncodedObjectIter, error) { idx := idxfile.NewMemoryIndex() if err := idxfile.NewDecoder(idxFile).Decode(idx); err != nil { @@ -695,7 +703,7 @@ func NewPackfileIter( } seen := make(map[plumbing.Hash]struct{}) - return newPackfileIter(fs, f, t, seen, idx, nil, keepPack) + return newPackfileIter(fs, f, t, seen, idx, nil, keepPack, largeObjectThreshold) } func newPackfileIter( @@ -706,12 +714,13 @@ func newPackfileIter( index idxfile.Index, cache cache.Object, keepPack bool, + largeObjectThreshold int64, ) (storer.EncodedObjectIter, error) { var p *packfile.Packfile if cache != nil { - p = packfile.NewPackfileWithCache(index, fs, f, cache) + p = packfile.NewPackfileWithCache(index, fs, f, cache, largeObjectThreshold) } else { - p = packfile.NewPackfile(index, fs, f) + p = packfile.NewPackfile(index, fs, f, largeObjectThreshold) } iter, err := p.GetByType(t) diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go index 22f5b0c..59b40d3 100644 --- a/storage/filesystem/object_test.go +++ b/storage/filesystem/object_test.go @@ -107,6 +107,27 @@ func (s *FsSuite) TestGetFromPackfileMaxOpenDescriptors(c *C) { c.Assert(err, IsNil) } +func (s *FsSuite) TestGetFromPackfileMaxOpenDescriptorsLargeObjectThreshold(c *C) { + fs := fixtures.ByTag(".git").ByTag("multi-packfile").One().DotGit() + o := NewObjectStorageWithOptions(dotgit.New(fs), cache.NewObjectLRUDefault(), Options{ + MaxOpenDescriptors: 1, + LargeObjectThreshold: 1, + }) + + expected := plumbing.NewHash("8d45a34641d73851e01d3754320b33bb5be3c4d3") + obj, err := o.getFromPackfile(expected, false) + c.Assert(err, IsNil) + c.Assert(obj.Hash(), Equals, expected) + + expected = plumbing.NewHash("e9cfa4c9ca160546efd7e8582ec77952a27b17db") + obj, err = o.getFromPackfile(expected, false) + c.Assert(err, IsNil) + c.Assert(obj.Hash(), Equals, expected) + + err = o.Close() + c.Assert(err, IsNil) +} + func (s *FsSuite) TestGetSizeOfObjectFile(c *C) { fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit() o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) @@ -160,6 +181,21 @@ func (s *FsSuite) TestGetFromPackfileMultiplePackfiles(c *C) { c.Assert(obj.Hash(), Equals, expected) } +func (s *FsSuite) TestGetFromPackfileMultiplePackfilesLargeObjectThreshold(c *C) { + fs := fixtures.ByTag(".git").ByTag("multi-packfile").One().DotGit() + o := NewObjectStorageWithOptions(dotgit.New(fs), cache.NewObjectLRUDefault(), Options{LargeObjectThreshold: 1}) + + expected := plumbing.NewHash("8d45a34641d73851e01d3754320b33bb5be3c4d3") + obj, err := o.getFromPackfile(expected, false) + c.Assert(err, IsNil) + c.Assert(obj.Hash(), Equals, expected) + + expected = plumbing.NewHash("e9cfa4c9ca160546efd7e8582ec77952a27b17db") + obj, err = o.getFromPackfile(expected, false) + c.Assert(err, IsNil) + c.Assert(obj.Hash(), Equals, expected) +} + func (s *FsSuite) TestIter(c *C) { fixtures.ByTag(".git").ByTag("packfile").Test(c, func(f *fixtures.Fixture) { fs := f.DotGit() @@ -179,6 +215,25 @@ func (s *FsSuite) TestIter(c *C) { }) } +func (s *FsSuite) TestIterLargeObjectThreshold(c *C) { + fixtures.ByTag(".git").ByTag("packfile").Test(c, func(f *fixtures.Fixture) { + fs := f.DotGit() + o := NewObjectStorageWithOptions(dotgit.New(fs), cache.NewObjectLRUDefault(), Options{LargeObjectThreshold: 1}) + + iter, err := o.IterEncodedObjects(plumbing.AnyObject) + c.Assert(err, IsNil) + + var count int32 + err = iter.ForEach(func(o plumbing.EncodedObject) error { + count++ + return nil + }) + + c.Assert(err, IsNil) + c.Assert(count, Equals, f.ObjectsCount) + }) +} + func (s *FsSuite) TestIterWithType(c *C) { fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) { for _, t := range objectTypes { @@ -215,7 +270,7 @@ func (s *FsSuite) TestPackfileIter(c *C) { idxf, err := dg.ObjectPackIdx(h) c.Assert(err, IsNil) - iter, err := NewPackfileIter(fs, f, idxf, t, false) + iter, err := NewPackfileIter(fs, f, idxf, t, false, 0) c.Assert(err, IsNil) err = iter.ForEach(func(o plumbing.EncodedObject) error { @@ -298,7 +353,7 @@ func (s *FsSuite) TestPackfileIterKeepDescriptors(c *C) { idxf, err := dg.ObjectPackIdx(h) c.Assert(err, IsNil) - iter, err := NewPackfileIter(fs, f, idxf, t, true) + iter, err := NewPackfileIter(fs, f, idxf, t, true, 0) c.Assert(err, IsNil) err = iter.ForEach(func(o plumbing.EncodedObject) error { @@ -377,7 +432,7 @@ func BenchmarkPackfileIter(b *testing.B) { b.Fatal(err) } - iter, err := NewPackfileIter(fs, f, idxf, t, false) + iter, err := NewPackfileIter(fs, f, idxf, t, false, 0) if err != nil { b.Fatal(err) } @@ -425,7 +480,7 @@ func BenchmarkPackfileIterReadContent(b *testing.B) { b.Fatal(err) } - iter, err := NewPackfileIter(fs, f, idxf, t, false) + iter, err := NewPackfileIter(fs, f, idxf, t, false, 0) if err != nil { b.Fatal(err) } diff --git a/storage/filesystem/storage.go b/storage/filesystem/storage.go index 8b69b27..7e7a2c5 100644 --- a/storage/filesystem/storage.go +++ b/storage/filesystem/storage.go @@ -34,6 +34,9 @@ type Options struct { // MaxOpenDescriptors is the max number of file descriptors to keep // open. If KeepDescriptors is true, all file descriptors will remain open. MaxOpenDescriptors int + // LargeObjectThreshold maximum object size (in bytes) that will be read in to memory. + // If left unset or set to 0 there is no limit + LargeObjectThreshold int64 } // NewStorage returns a new Storage backed by a given `fs.Filesystem` and cache. |