From b4368b2a2ca4103b1ff4e37c34a963127342747e Mon Sep 17 00:00:00 2001 From: zeripath Date: Wed, 30 Jun 2021 09:25:19 +0100 Subject: plumbing: format/packfile, prevent large objects from being read into memory completely (#330) This PR adds code to prevent large objects from being read into memory from packfiles or the filesystem. Objects greater than 1Mb are now no longer directly stored in the cache or read completely into memory. This PR differs and improves the previous broken #323 by fixing several bugs in the reader and transparently wrapping ReaderAt as a Reader. Signed-off-by: Andrew Thornton --- storage/filesystem/object_test.go | 63 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) (limited to 'storage/filesystem/object_test.go') diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go index 22f5b0c..59b40d3 100644 --- a/storage/filesystem/object_test.go +++ b/storage/filesystem/object_test.go @@ -107,6 +107,27 @@ func (s *FsSuite) TestGetFromPackfileMaxOpenDescriptors(c *C) { c.Assert(err, IsNil) } +func (s *FsSuite) TestGetFromPackfileMaxOpenDescriptorsLargeObjectThreshold(c *C) { + fs := fixtures.ByTag(".git").ByTag("multi-packfile").One().DotGit() + o := NewObjectStorageWithOptions(dotgit.New(fs), cache.NewObjectLRUDefault(), Options{ + MaxOpenDescriptors: 1, + LargeObjectThreshold: 1, + }) + + expected := plumbing.NewHash("8d45a34641d73851e01d3754320b33bb5be3c4d3") + obj, err := o.getFromPackfile(expected, false) + c.Assert(err, IsNil) + c.Assert(obj.Hash(), Equals, expected) + + expected = plumbing.NewHash("e9cfa4c9ca160546efd7e8582ec77952a27b17db") + obj, err = o.getFromPackfile(expected, false) + c.Assert(err, IsNil) + c.Assert(obj.Hash(), Equals, expected) + + err = o.Close() + c.Assert(err, IsNil) +} + func (s *FsSuite) TestGetSizeOfObjectFile(c *C) { fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit() o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault()) @@ -160,6 +181,21 @@ func (s *FsSuite) TestGetFromPackfileMultiplePackfiles(c *C) { c.Assert(obj.Hash(), Equals, expected) } +func (s *FsSuite) TestGetFromPackfileMultiplePackfilesLargeObjectThreshold(c *C) { + fs := fixtures.ByTag(".git").ByTag("multi-packfile").One().DotGit() + o := NewObjectStorageWithOptions(dotgit.New(fs), cache.NewObjectLRUDefault(), Options{LargeObjectThreshold: 1}) + + expected := plumbing.NewHash("8d45a34641d73851e01d3754320b33bb5be3c4d3") + obj, err := o.getFromPackfile(expected, false) + c.Assert(err, IsNil) + c.Assert(obj.Hash(), Equals, expected) + + expected = plumbing.NewHash("e9cfa4c9ca160546efd7e8582ec77952a27b17db") + obj, err = o.getFromPackfile(expected, false) + c.Assert(err, IsNil) + c.Assert(obj.Hash(), Equals, expected) +} + func (s *FsSuite) TestIter(c *C) { fixtures.ByTag(".git").ByTag("packfile").Test(c, func(f *fixtures.Fixture) { fs := f.DotGit() @@ -179,6 +215,25 @@ func (s *FsSuite) TestIter(c *C) { }) } +func (s *FsSuite) TestIterLargeObjectThreshold(c *C) { + fixtures.ByTag(".git").ByTag("packfile").Test(c, func(f *fixtures.Fixture) { + fs := f.DotGit() + o := NewObjectStorageWithOptions(dotgit.New(fs), cache.NewObjectLRUDefault(), Options{LargeObjectThreshold: 1}) + + iter, err := o.IterEncodedObjects(plumbing.AnyObject) + c.Assert(err, IsNil) + + var count int32 + err = iter.ForEach(func(o plumbing.EncodedObject) error { + count++ + return nil + }) + + c.Assert(err, IsNil) + c.Assert(count, Equals, f.ObjectsCount) + }) +} + func (s *FsSuite) TestIterWithType(c *C) { fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) { for _, t := range objectTypes { @@ -215,7 +270,7 @@ func (s *FsSuite) TestPackfileIter(c *C) { idxf, err := dg.ObjectPackIdx(h) c.Assert(err, IsNil) - iter, err := NewPackfileIter(fs, f, idxf, t, false) + iter, err := NewPackfileIter(fs, f, idxf, t, false, 0) c.Assert(err, IsNil) err = iter.ForEach(func(o plumbing.EncodedObject) error { @@ -298,7 +353,7 @@ func (s *FsSuite) TestPackfileIterKeepDescriptors(c *C) { idxf, err := dg.ObjectPackIdx(h) c.Assert(err, IsNil) - iter, err := NewPackfileIter(fs, f, idxf, t, true) + iter, err := NewPackfileIter(fs, f, idxf, t, true, 0) c.Assert(err, IsNil) err = iter.ForEach(func(o plumbing.EncodedObject) error { @@ -377,7 +432,7 @@ func BenchmarkPackfileIter(b *testing.B) { b.Fatal(err) } - iter, err := NewPackfileIter(fs, f, idxf, t, false) + iter, err := NewPackfileIter(fs, f, idxf, t, false, 0) if err != nil { b.Fatal(err) } @@ -425,7 +480,7 @@ func BenchmarkPackfileIterReadContent(b *testing.B) { b.Fatal(err) } - iter, err := NewPackfileIter(fs, f, idxf, t, false) + iter, err := NewPackfileIter(fs, f, idxf, t, false, 0) if err != nil { b.Fatal(err) } -- cgit