diff options
author | zeripath <art27@cantab.net> | 2021-06-30 09:25:19 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-30 10:25:19 +0200 |
commit | b4368b2a2ca4103b1ff4e37c34a963127342747e (patch) | |
tree | 5a3616045c4be8e7d64706017cf4380f6937ad32 /plumbing/format/packfile/packfile.go | |
parent | da810275bf682d29a530ed819aff175f47bd7634 (diff) | |
download | go-git-b4368b2a2ca4103b1ff4e37c34a963127342747e.tar.gz |
plumbing: format/packfile, prevent large objects from being read into memory completely (#330)
This PR adds code to prevent large objects from being read into memory
from packfiles or the filesystem.
Objects greater than 1Mb are now no longer directly stored in the cache
or read completely into memory.
This PR differs and improves the previous broken #323 by fixing several
bugs in the reader and transparently wrapping ReaderAt as a Reader.
Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'plumbing/format/packfile/packfile.go')
-rw-r--r-- | plumbing/format/packfile/packfile.go | 92 |
1 files changed, 85 insertions, 7 deletions
diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index ddd7f62..8dd6041 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -2,6 +2,8 @@ package packfile import ( "bytes" + "compress/zlib" + "fmt" "io" "os" @@ -35,11 +37,12 @@ const smallObjectThreshold = 16 * 1024 // Packfile allows retrieving information from inside a packfile. type Packfile struct { idxfile.Index - fs billy.Filesystem - file billy.File - s *Scanner - deltaBaseCache cache.Object - offsetToType map[int64]plumbing.ObjectType + fs billy.Filesystem + file billy.File + s *Scanner + deltaBaseCache cache.Object + offsetToType map[int64]plumbing.ObjectType + largeObjectThreshold int64 } // NewPackfileWithCache creates a new Packfile with the given object cache. @@ -50,6 +53,7 @@ func NewPackfileWithCache( fs billy.Filesystem, file billy.File, cache cache.Object, + largeObjectThreshold int64, ) *Packfile { s := NewScanner(file) return &Packfile{ @@ -59,6 +63,7 @@ func NewPackfileWithCache( s, cache, make(map[int64]plumbing.ObjectType), + largeObjectThreshold, } } @@ -66,8 +71,8 @@ func NewPackfileWithCache( // and packfile idx. // If the filesystem is provided, the packfile will return FSObjects, otherwise // it will return MemoryObjects. -func NewPackfile(index idxfile.Index, fs billy.Filesystem, file billy.File) *Packfile { - return NewPackfileWithCache(index, fs, file, cache.NewObjectLRUDefault()) +func NewPackfile(index idxfile.Index, fs billy.Filesystem, file billy.File, largeObjectThreshold int64) *Packfile { + return NewPackfileWithCache(index, fs, file, cache.NewObjectLRUDefault(), largeObjectThreshold) } // Get retrieves the encoded object in the packfile with the given hash. @@ -263,6 +268,7 @@ func (p *Packfile) getNextObject(h *ObjectHeader, hash plumbing.Hash) (plumbing. p.fs, p.file.Name(), p.deltaBaseCache, + p.largeObjectThreshold, ), nil } @@ -282,6 +288,50 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { return obj.Reader() } +func asyncReader(p *Packfile) (io.ReadCloser, error) { + reader := ioutil.NewReaderUsingReaderAt(p.file, p.s.r.offset) + zr := zlibReaderPool.Get().(io.ReadCloser) + + if err := zr.(zlib.Resetter).Reset(reader, nil); err != nil { + return nil, fmt.Errorf("zlib reset error: %s", err) + } + + return ioutil.NewReadCloserWithCloser(zr, func() error { + zlibReaderPool.Put(zr) + return nil + }), nil + +} + +func (p *Packfile) getReaderDirect(h *ObjectHeader) (io.ReadCloser, error) { + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + return asyncReader(p) + case plumbing.REFDeltaObject: + deltaRc, err := asyncReader(p) + if err != nil { + return nil, err + } + r, err := p.readREFDeltaObjectContent(h, deltaRc) + if err != nil { + return nil, err + } + return r, nil + case plumbing.OFSDeltaObject: + deltaRc, err := asyncReader(p) + if err != nil { + return nil, err + } + r, err := p.readOFSDeltaObjectContent(h, deltaRc) + if err != nil { + return nil, err + } + return r, nil + default: + return nil, ErrInvalidObject.AddDetails("type %q", h.Type) + } +} + func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) { var obj = new(plumbing.MemoryObject) obj.SetSize(h.Length) @@ -334,6 +384,20 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf) } +func (p *Packfile) readREFDeltaObjectContent(h *ObjectHeader, deltaRC io.Reader) (io.ReadCloser, error) { + var err error + + base, ok := p.cacheGet(h.Reference) + if !ok { + base, err = p.Get(h.Reference) + if err != nil { + return nil, err + } + } + + return ReaderFromDelta(base, deltaRC) +} + func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error { var err error @@ -364,6 +428,20 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf) } +func (p *Packfile) readOFSDeltaObjectContent(h *ObjectHeader, deltaRC io.Reader) (io.ReadCloser, error) { + hash, err := p.FindHash(h.OffsetReference) + if err != nil { + return nil, err + } + + base, err := p.objectAtOffset(h.OffsetReference, hash) + if err != nil { + return nil, err + } + + return ReaderFromDelta(base, deltaRC) +} + func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error { hash, err := p.FindHash(offset) if err != nil { |