aboutsummaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2021-06-30 09:25:19 +0100
committerGitHub <noreply@github.com>2021-06-30 10:25:19 +0200
commitb4368b2a2ca4103b1ff4e37c34a963127342747e (patch)
tree5a3616045c4be8e7d64706017cf4380f6937ad32 /storage
parentda810275bf682d29a530ed819aff175f47bd7634 (diff)
downloadgo-git-b4368b2a2ca4103b1ff4e37c34a963127342747e.tar.gz
plumbing: format/packfile, prevent large objects from being read into memory completely (#330)
This PR adds code to prevent large objects from being read into memory from packfiles or the filesystem. Objects greater than 1Mb are now no longer directly stored in the cache or read completely into memory. This PR differs and improves the previous broken #323 by fixing several bugs in the reader and transparently wrapping ReaderAt as a Reader. Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'storage')
-rw-r--r--storage/filesystem/dotgit/reader.go79
-rw-r--r--storage/filesystem/object.go21
-rw-r--r--storage/filesystem/object_test.go63
-rw-r--r--storage/filesystem/storage.go3
4 files changed, 156 insertions, 10 deletions
diff --git a/storage/filesystem/dotgit/reader.go b/storage/filesystem/dotgit/reader.go
new file mode 100644
index 0000000..a82ac94
--- /dev/null
+++ b/storage/filesystem/dotgit/reader.go
@@ -0,0 +1,79 @@
+package dotgit
+
+import (
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/go-git/go-git/v5/plumbing"
+ "github.com/go-git/go-git/v5/plumbing/format/objfile"
+ "github.com/go-git/go-git/v5/utils/ioutil"
+)
+
+var _ (plumbing.EncodedObject) = &EncodedObject{}
+
+type EncodedObject struct {
+ dir *DotGit
+ h plumbing.Hash
+ t plumbing.ObjectType
+ sz int64
+}
+
+func (e *EncodedObject) Hash() plumbing.Hash {
+ return e.h
+}
+
+func (e *EncodedObject) Reader() (io.ReadCloser, error) {
+ f, err := e.dir.Object(e.h)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil, plumbing.ErrObjectNotFound
+ }
+
+ return nil, err
+ }
+ r, err := objfile.NewReader(f)
+ if err != nil {
+ return nil, err
+ }
+
+ t, size, err := r.Header()
+ if err != nil {
+ _ = r.Close()
+ return nil, err
+ }
+ if t != e.t {
+ _ = r.Close()
+ return nil, objfile.ErrHeader
+ }
+ if size != e.sz {
+ _ = r.Close()
+ return nil, objfile.ErrHeader
+ }
+ return ioutil.NewReadCloserWithCloser(r, f.Close), nil
+}
+
+func (e *EncodedObject) SetType(plumbing.ObjectType) {}
+
+func (e *EncodedObject) Type() plumbing.ObjectType {
+ return e.t
+}
+
+func (e *EncodedObject) Size() int64 {
+ return e.sz
+}
+
+func (e *EncodedObject) SetSize(int64) {}
+
+func (e *EncodedObject) Writer() (io.WriteCloser, error) {
+ return nil, fmt.Errorf("Not supported")
+}
+
+func NewEncodedObject(dir *DotGit, h plumbing.Hash, t plumbing.ObjectType, size int64) *EncodedObject {
+ return &EncodedObject{
+ dir: dir,
+ h: h,
+ t: t,
+ sz: size,
+ }
+}
diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go
index 0c25dad..5c91bcd 100644
--- a/storage/filesystem/object.go
+++ b/storage/filesystem/object.go
@@ -204,9 +204,9 @@ func (s *ObjectStorage) packfile(idx idxfile.Index, pack plumbing.Hash) (*packfi
var p *packfile.Packfile
if s.objectCache != nil {
- p = packfile.NewPackfileWithCache(idx, s.dir.Fs(), f, s.objectCache)
+ p = packfile.NewPackfileWithCache(idx, s.dir.Fs(), f, s.objectCache, s.options.LargeObjectThreshold)
} else {
- p = packfile.NewPackfile(idx, s.dir.Fs(), f)
+ p = packfile.NewPackfile(idx, s.dir.Fs(), f, s.options.LargeObjectThreshold)
}
return p, s.storePackfileInCache(pack, p)
@@ -389,7 +389,6 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb
return cacheObj, nil
}
- obj = s.NewEncodedObject()
r, err := objfile.NewReader(f)
if err != nil {
return nil, err
@@ -402,6 +401,13 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb
return nil, err
}
+ if s.options.LargeObjectThreshold > 0 && size > s.options.LargeObjectThreshold {
+ obj = dotgit.NewEncodedObject(s.dir, h, t, size)
+ return obj, nil
+ }
+
+ obj = s.NewEncodedObject()
+
obj.SetType(t)
obj.SetSize(size)
w, err := obj.Writer()
@@ -595,6 +601,7 @@ func (s *ObjectStorage) buildPackfileIters(
return newPackfileIter(
s.dir.Fs(), pack, t, seen, s.index[h],
s.objectCache, s.options.KeepDescriptors,
+ s.options.LargeObjectThreshold,
)
},
}, nil
@@ -684,6 +691,7 @@ func NewPackfileIter(
idxFile billy.File,
t plumbing.ObjectType,
keepPack bool,
+ largeObjectThreshold int64,
) (storer.EncodedObjectIter, error) {
idx := idxfile.NewMemoryIndex()
if err := idxfile.NewDecoder(idxFile).Decode(idx); err != nil {
@@ -695,7 +703,7 @@ func NewPackfileIter(
}
seen := make(map[plumbing.Hash]struct{})
- return newPackfileIter(fs, f, t, seen, idx, nil, keepPack)
+ return newPackfileIter(fs, f, t, seen, idx, nil, keepPack, largeObjectThreshold)
}
func newPackfileIter(
@@ -706,12 +714,13 @@ func newPackfileIter(
index idxfile.Index,
cache cache.Object,
keepPack bool,
+ largeObjectThreshold int64,
) (storer.EncodedObjectIter, error) {
var p *packfile.Packfile
if cache != nil {
- p = packfile.NewPackfileWithCache(index, fs, f, cache)
+ p = packfile.NewPackfileWithCache(index, fs, f, cache, largeObjectThreshold)
} else {
- p = packfile.NewPackfile(index, fs, f)
+ p = packfile.NewPackfile(index, fs, f, largeObjectThreshold)
}
iter, err := p.GetByType(t)
diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go
index 22f5b0c..59b40d3 100644
--- a/storage/filesystem/object_test.go
+++ b/storage/filesystem/object_test.go
@@ -107,6 +107,27 @@ func (s *FsSuite) TestGetFromPackfileMaxOpenDescriptors(c *C) {
c.Assert(err, IsNil)
}
+func (s *FsSuite) TestGetFromPackfileMaxOpenDescriptorsLargeObjectThreshold(c *C) {
+ fs := fixtures.ByTag(".git").ByTag("multi-packfile").One().DotGit()
+ o := NewObjectStorageWithOptions(dotgit.New(fs), cache.NewObjectLRUDefault(), Options{
+ MaxOpenDescriptors: 1,
+ LargeObjectThreshold: 1,
+ })
+
+ expected := plumbing.NewHash("8d45a34641d73851e01d3754320b33bb5be3c4d3")
+ obj, err := o.getFromPackfile(expected, false)
+ c.Assert(err, IsNil)
+ c.Assert(obj.Hash(), Equals, expected)
+
+ expected = plumbing.NewHash("e9cfa4c9ca160546efd7e8582ec77952a27b17db")
+ obj, err = o.getFromPackfile(expected, false)
+ c.Assert(err, IsNil)
+ c.Assert(obj.Hash(), Equals, expected)
+
+ err = o.Close()
+ c.Assert(err, IsNil)
+}
+
func (s *FsSuite) TestGetSizeOfObjectFile(c *C) {
fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit()
o := NewObjectStorage(dotgit.New(fs), cache.NewObjectLRUDefault())
@@ -160,6 +181,21 @@ func (s *FsSuite) TestGetFromPackfileMultiplePackfiles(c *C) {
c.Assert(obj.Hash(), Equals, expected)
}
+func (s *FsSuite) TestGetFromPackfileMultiplePackfilesLargeObjectThreshold(c *C) {
+ fs := fixtures.ByTag(".git").ByTag("multi-packfile").One().DotGit()
+ o := NewObjectStorageWithOptions(dotgit.New(fs), cache.NewObjectLRUDefault(), Options{LargeObjectThreshold: 1})
+
+ expected := plumbing.NewHash("8d45a34641d73851e01d3754320b33bb5be3c4d3")
+ obj, err := o.getFromPackfile(expected, false)
+ c.Assert(err, IsNil)
+ c.Assert(obj.Hash(), Equals, expected)
+
+ expected = plumbing.NewHash("e9cfa4c9ca160546efd7e8582ec77952a27b17db")
+ obj, err = o.getFromPackfile(expected, false)
+ c.Assert(err, IsNil)
+ c.Assert(obj.Hash(), Equals, expected)
+}
+
func (s *FsSuite) TestIter(c *C) {
fixtures.ByTag(".git").ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
fs := f.DotGit()
@@ -179,6 +215,25 @@ func (s *FsSuite) TestIter(c *C) {
})
}
+func (s *FsSuite) TestIterLargeObjectThreshold(c *C) {
+ fixtures.ByTag(".git").ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
+ fs := f.DotGit()
+ o := NewObjectStorageWithOptions(dotgit.New(fs), cache.NewObjectLRUDefault(), Options{LargeObjectThreshold: 1})
+
+ iter, err := o.IterEncodedObjects(plumbing.AnyObject)
+ c.Assert(err, IsNil)
+
+ var count int32
+ err = iter.ForEach(func(o plumbing.EncodedObject) error {
+ count++
+ return nil
+ })
+
+ c.Assert(err, IsNil)
+ c.Assert(count, Equals, f.ObjectsCount)
+ })
+}
+
func (s *FsSuite) TestIterWithType(c *C) {
fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) {
for _, t := range objectTypes {
@@ -215,7 +270,7 @@ func (s *FsSuite) TestPackfileIter(c *C) {
idxf, err := dg.ObjectPackIdx(h)
c.Assert(err, IsNil)
- iter, err := NewPackfileIter(fs, f, idxf, t, false)
+ iter, err := NewPackfileIter(fs, f, idxf, t, false, 0)
c.Assert(err, IsNil)
err = iter.ForEach(func(o plumbing.EncodedObject) error {
@@ -298,7 +353,7 @@ func (s *FsSuite) TestPackfileIterKeepDescriptors(c *C) {
idxf, err := dg.ObjectPackIdx(h)
c.Assert(err, IsNil)
- iter, err := NewPackfileIter(fs, f, idxf, t, true)
+ iter, err := NewPackfileIter(fs, f, idxf, t, true, 0)
c.Assert(err, IsNil)
err = iter.ForEach(func(o plumbing.EncodedObject) error {
@@ -377,7 +432,7 @@ func BenchmarkPackfileIter(b *testing.B) {
b.Fatal(err)
}
- iter, err := NewPackfileIter(fs, f, idxf, t, false)
+ iter, err := NewPackfileIter(fs, f, idxf, t, false, 0)
if err != nil {
b.Fatal(err)
}
@@ -425,7 +480,7 @@ func BenchmarkPackfileIterReadContent(b *testing.B) {
b.Fatal(err)
}
- iter, err := NewPackfileIter(fs, f, idxf, t, false)
+ iter, err := NewPackfileIter(fs, f, idxf, t, false, 0)
if err != nil {
b.Fatal(err)
}
diff --git a/storage/filesystem/storage.go b/storage/filesystem/storage.go
index 8b69b27..7e7a2c5 100644
--- a/storage/filesystem/storage.go
+++ b/storage/filesystem/storage.go
@@ -34,6 +34,9 @@ type Options struct {
// MaxOpenDescriptors is the max number of file descriptors to keep
// open. If KeepDescriptors is true, all file descriptors will remain open.
MaxOpenDescriptors int
+ // LargeObjectThreshold maximum object size (in bytes) that will be read in to memory.
+ // If left unset or set to 0 there is no limit
+ LargeObjectThreshold int64
}
// NewStorage returns a new Storage backed by a given `fs.Filesystem` and cache.