aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2021-05-12 21:42:07 +0100
committerGitHub <noreply@github.com>2021-05-12 22:42:07 +0200
commit720c192831a890d0a36b4c6720b60411fa4a0159 (patch)
tree9f3020c074fe9b113e22d6f2c601e8a36bf0ac49
parente6e23391e4d044cc85e09b4420a2533715e7312d (diff)
downloadgo-git-720c192831a890d0a36b4c6720b60411fa4a0159.tar.gz
plumbing: format/packfile, prevent large objects from being read into memory completely (#303)v5.4.0
This PR adds code to prevent large objects from being read into memory from packfiles or the filesystem. Objects greater than 1Mb are now no longer directly stored in the cache or read completely into memory. Signed-off-by: Andrew Thornton <art27@cantab.net>
-rw-r--r--plumbing/format/packfile/fsobject.go15
-rw-r--r--plumbing/format/packfile/packfile.go73
-rw-r--r--plumbing/format/packfile/patch_delta.go210
-rw-r--r--plumbing/format/packfile/scanner.go15
-rw-r--r--storage/filesystem/dotgit/reader.go79
-rw-r--r--storage/filesystem/object.go9
-rw-r--r--utils/ioutil/common.go22
7 files changed, 422 insertions, 1 deletions
diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go
index c5edaf5..4aa3c8e 100644
--- a/plumbing/format/packfile/fsobject.go
+++ b/plumbing/format/packfile/fsobject.go
@@ -7,6 +7,7 @@ import (
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/cache"
"github.com/go-git/go-git/v5/plumbing/format/idxfile"
+ "github.com/go-git/go-git/v5/utils/ioutil"
)
// FSObject is an object from the packfile on the filesystem.
@@ -63,6 +64,20 @@ func (o *FSObject) Reader() (io.ReadCloser, error) {
}
p := NewPackfileWithCache(o.index, nil, f, o.cache)
+ if o.size > LargeObjectThreshold {
+ // We have a big object
+ h, err := p.objectHeaderAtOffset(o.offset)
+ if err != nil {
+ return nil, err
+ }
+
+ r, err := p.getReaderDirect(h)
+ if err != nil {
+ _ = f.Close()
+ return nil, err
+ }
+ return ioutil.NewReadCloserWithCloser(r, f.Close), nil
+ }
r, err := p.getObjectContent(o.offset)
if err != nil {
_ = f.Close()
diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go
index ddd7f62..77861d2 100644
--- a/plumbing/format/packfile/packfile.go
+++ b/plumbing/format/packfile/packfile.go
@@ -32,6 +32,12 @@ var (
// wrapped in FSObject.
const smallObjectThreshold = 16 * 1024
+// Conversely there are large objects that should not be cached and kept
+// in memory as they're too large to be reasonably cached. Objects larger
+// than this threshold are now always never read into memory to be stored
+// in the cache
+const LargeObjectThreshold = 1024 * 1024
+
// Packfile allows retrieving information from inside a packfile.
type Packfile struct {
idxfile.Index
@@ -282,6 +288,37 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) {
return obj.Reader()
}
+func (p *Packfile) getReaderDirect(h *ObjectHeader) (io.ReadCloser, error) {
+ switch h.Type {
+ case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
+ return p.s.ReadObject()
+ case plumbing.REFDeltaObject:
+ deltaRC, err := p.s.ReadObject()
+ if err != nil {
+ return nil, err
+ }
+ r, err := p.readREFDeltaObjectContent(h, deltaRC)
+ if err != nil {
+ _ = deltaRC.Close()
+ return nil, err
+ }
+ return r, nil
+ case plumbing.OFSDeltaObject:
+ deltaRC, err := p.s.ReadObject()
+ if err != nil {
+ return nil, err
+ }
+ r, err := p.readOFSDeltaObjectContent(h, deltaRC)
+ if err != nil {
+ _ = deltaRC.Close()
+ return nil, err
+ }
+ return r, nil
+ default:
+ return nil, ErrInvalidObject.AddDetails("type %q", h.Type)
+ }
+}
+
func (p *Packfile) getNextMemoryObject(h *ObjectHeader) (plumbing.EncodedObject, error) {
var obj = new(plumbing.MemoryObject)
obj.SetSize(h.Length)
@@ -334,6 +371,20 @@ func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plu
return p.fillREFDeltaObjectContentWithBuffer(obj, ref, buf)
}
+func (p *Packfile) readREFDeltaObjectContent(h *ObjectHeader, deltaRC io.ReadCloser) (io.ReadCloser, error) {
+ var err error
+
+ base, ok := p.cacheGet(h.Reference)
+ if !ok {
+ base, err = p.Get(h.Reference)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return ReaderFromDelta(h, base, deltaRC)
+}
+
func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, ref plumbing.Hash, buf *bytes.Buffer) error {
var err error
@@ -364,6 +415,28 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset
return p.fillOFSDeltaObjectContentWithBuffer(obj, offset, buf)
}
+func (p *Packfile) readOFSDeltaObjectContent(h *ObjectHeader, deltaRC io.ReadCloser) (io.ReadCloser, error) {
+ hash, err := p.FindHash(h.OffsetReference)
+ if err != nil {
+ return nil, err
+ }
+
+ base, err := p.objectAtOffset(h.OffsetReference, hash)
+ if err != nil {
+ return nil, err
+ }
+
+ base, ok := p.cacheGet(h.Reference)
+ if !ok {
+ base, err = p.Get(h.Reference)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return ReaderFromDelta(h, base, deltaRC)
+}
+
func (p *Packfile) fillOFSDeltaObjectContentWithBuffer(obj plumbing.EncodedObject, offset int64, buf *bytes.Buffer) error {
hash, err := p.FindHash(offset)
if err != nil {
diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go
index 9e90f30..76fef4a 100644
--- a/plumbing/format/packfile/patch_delta.go
+++ b/plumbing/format/packfile/patch_delta.go
@@ -1,9 +1,11 @@
package packfile
import (
+ "bufio"
"bytes"
"errors"
"io"
+ "math"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/utils/ioutil"
@@ -73,6 +75,131 @@ func PatchDelta(src, delta []byte) ([]byte, error) {
return b.Bytes(), nil
}
+func ReaderFromDelta(h *ObjectHeader, base plumbing.EncodedObject, deltaRC io.ReadCloser) (io.ReadCloser, error) {
+ deltaBuf := bufio.NewReaderSize(deltaRC, 1024)
+ srcSz, err := decodeLEB128ByteReader(deltaBuf)
+ if err != nil {
+ if err == io.EOF {
+ return nil, ErrInvalidDelta
+ }
+ return nil, err
+ }
+ if srcSz != uint(base.Size()) {
+ return nil, ErrInvalidDelta
+ }
+
+ targetSz, err := decodeLEB128ByteReader(deltaBuf)
+ if err != nil {
+ if err == io.EOF {
+ return nil, ErrInvalidDelta
+ }
+ return nil, err
+ }
+ remainingTargetSz := targetSz
+
+ dstRd, dstWr := io.Pipe()
+
+ go func() {
+ baseRd, err := base.Reader()
+ if err != nil {
+ _ = dstWr.CloseWithError(ErrInvalidDelta)
+ return
+ }
+ defer baseRd.Close()
+
+ baseBuf := bufio.NewReader(baseRd)
+ basePos := uint(0)
+
+ for {
+ cmd, err := deltaBuf.ReadByte()
+ if err == io.EOF {
+ _ = dstWr.CloseWithError(ErrInvalidDelta)
+ return
+ }
+ if err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+
+ if isCopyFromSrc(cmd) {
+ offset, err := decodeOffsetByteReader(cmd, deltaBuf)
+ if err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+ sz, err := decodeSizeByteReader(cmd, deltaBuf)
+ if err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+
+ if invalidSize(sz, targetSz) ||
+ invalidOffsetSize(offset, sz, srcSz) {
+ _ = dstWr.Close()
+ return
+ }
+
+ discard := offset - basePos
+ if discard < 0 {
+ _ = baseRd.Close()
+ baseRd, err = base.Reader()
+ if err != nil {
+ _ = dstWr.CloseWithError(ErrInvalidDelta)
+ return
+ }
+ baseBuf.Reset(baseRd)
+ discard = offset
+ }
+ for discard > math.MaxInt32 {
+ n, err := baseBuf.Discard(math.MaxInt32)
+ if err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+ basePos += uint(n)
+ discard -= uint(n)
+ }
+ for discard > 0 {
+ n, err := baseBuf.Discard(int(discard))
+ if err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+ basePos += uint(n)
+ discard -= uint(n)
+ }
+ if _, err := io.Copy(dstWr, io.LimitReader(baseBuf, int64(sz))); err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+ remainingTargetSz -= sz
+ basePos += sz
+ } else if isCopyFromDelta(cmd) {
+ sz := uint(cmd) // cmd is the size itself
+ if invalidSize(sz, targetSz) {
+ _ = dstWr.CloseWithError(ErrInvalidDelta)
+ return
+ }
+ if _, err := io.Copy(dstWr, io.LimitReader(deltaBuf, int64(sz))); err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+
+ remainingTargetSz -= sz
+ } else {
+ _ = dstWr.CloseWithError(ErrDeltaCmd)
+ return
+ }
+ if remainingTargetSz <= 0 {
+ _ = dstWr.Close()
+ return
+ }
+ }
+ }()
+
+ return dstRd, nil
+}
+
func patchDelta(dst *bytes.Buffer, src, delta []byte) error {
if len(delta) < deltaSizeMin {
return ErrInvalidDelta
@@ -161,6 +288,25 @@ func decodeLEB128(input []byte) (uint, []byte) {
return num, input[sz:]
}
+func decodeLEB128ByteReader(input io.ByteReader) (uint, error) {
+ var num, sz uint
+ for {
+ b, err := input.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+
+ num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks
+ sz++
+
+ if uint(b)&continuation == 0 {
+ break
+ }
+ }
+
+ return num, nil
+}
+
const (
payload = 0x7f // 0111 1111
continuation = 0x80 // 1000 0000
@@ -174,6 +320,40 @@ func isCopyFromDelta(cmd byte) bool {
return (cmd&0x80) == 0 && cmd != 0
}
+func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) {
+ var offset uint
+ if (cmd & 0x01) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ offset = uint(next)
+ }
+ if (cmd & 0x02) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ offset |= uint(next) << 8
+ }
+ if (cmd & 0x04) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ offset |= uint(next) << 16
+ }
+ if (cmd & 0x08) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ offset |= uint(next) << 24
+ }
+
+ return offset, nil
+}
+
func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) {
var offset uint
if (cmd & 0x01) != 0 {
@@ -208,6 +388,36 @@ func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) {
return offset, delta, nil
}
+func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) {
+ var sz uint
+ if (cmd & 0x10) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ sz = uint(next)
+ }
+ if (cmd & 0x20) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ sz |= uint(next) << 8
+ }
+ if (cmd & 0x40) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ sz |= uint(next) << 16
+ }
+ if sz == 0 {
+ sz = 0x10000
+ }
+
+ return sz, nil
+}
+
func decodeSize(cmd byte, delta []byte) (uint, []byte, error) {
var sz uint
if (cmd & 0x10) != 0 {
diff --git a/plumbing/format/packfile/scanner.go b/plumbing/format/packfile/scanner.go
index 6e6a687..5d9e8fb 100644
--- a/plumbing/format/packfile/scanner.go
+++ b/plumbing/format/packfile/scanner.go
@@ -320,6 +320,21 @@ func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err erro
return
}
+// ReadObject returns a reader for the object content and an error
+func (s *Scanner) ReadObject() (io.ReadCloser, error) {
+ s.pendingObject = nil
+ zr := zlibReaderPool.Get().(io.ReadCloser)
+
+ if err := zr.(zlib.Resetter).Reset(s.r, nil); err != nil {
+ return nil, fmt.Errorf("zlib reset error: %s", err)
+ }
+
+ return ioutil.NewReadCloserWithCloser(zr, func() error {
+ zlibReaderPool.Put(zr)
+ return nil
+ }), nil
+}
+
// ReadRegularObject reads and write a non-deltified object
// from it zlib stream in an object entry in the packfile.
func (s *Scanner) copyObject(w io.Writer) (n int64, err error) {
diff --git a/storage/filesystem/dotgit/reader.go b/storage/filesystem/dotgit/reader.go
new file mode 100644
index 0000000..a82ac94
--- /dev/null
+++ b/storage/filesystem/dotgit/reader.go
@@ -0,0 +1,79 @@
+package dotgit
+
+import (
+ "fmt"
+ "io"
+ "os"
+
+ "github.com/go-git/go-git/v5/plumbing"
+ "github.com/go-git/go-git/v5/plumbing/format/objfile"
+ "github.com/go-git/go-git/v5/utils/ioutil"
+)
+
+var _ (plumbing.EncodedObject) = &EncodedObject{}
+
+type EncodedObject struct {
+ dir *DotGit
+ h plumbing.Hash
+ t plumbing.ObjectType
+ sz int64
+}
+
+func (e *EncodedObject) Hash() plumbing.Hash {
+ return e.h
+}
+
+func (e *EncodedObject) Reader() (io.ReadCloser, error) {
+ f, err := e.dir.Object(e.h)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil, plumbing.ErrObjectNotFound
+ }
+
+ return nil, err
+ }
+ r, err := objfile.NewReader(f)
+ if err != nil {
+ return nil, err
+ }
+
+ t, size, err := r.Header()
+ if err != nil {
+ _ = r.Close()
+ return nil, err
+ }
+ if t != e.t {
+ _ = r.Close()
+ return nil, objfile.ErrHeader
+ }
+ if size != e.sz {
+ _ = r.Close()
+ return nil, objfile.ErrHeader
+ }
+ return ioutil.NewReadCloserWithCloser(r, f.Close), nil
+}
+
+func (e *EncodedObject) SetType(plumbing.ObjectType) {}
+
+func (e *EncodedObject) Type() plumbing.ObjectType {
+ return e.t
+}
+
+func (e *EncodedObject) Size() int64 {
+ return e.sz
+}
+
+func (e *EncodedObject) SetSize(int64) {}
+
+func (e *EncodedObject) Writer() (io.WriteCloser, error) {
+ return nil, fmt.Errorf("Not supported")
+}
+
+func NewEncodedObject(dir *DotGit, h plumbing.Hash, t plumbing.ObjectType, size int64) *EncodedObject {
+ return &EncodedObject{
+ dir: dir,
+ h: h,
+ t: t,
+ sz: size,
+ }
+}
diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go
index 0c25dad..4862362 100644
--- a/storage/filesystem/object.go
+++ b/storage/filesystem/object.go
@@ -389,7 +389,6 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb
return cacheObj, nil
}
- obj = s.NewEncodedObject()
r, err := objfile.NewReader(f)
if err != nil {
return nil, err
@@ -402,6 +401,14 @@ func (s *ObjectStorage) getFromUnpacked(h plumbing.Hash) (obj plumbing.EncodedOb
return nil, err
}
+ if size > packfile.LargeObjectThreshold {
+ obj = dotgit.NewEncodedObject(s.dir, h, t, size)
+ s.objectCache.Put(obj)
+ return obj, nil
+ }
+
+ obj = s.NewEncodedObject()
+
obj.SetType(t)
obj.SetSize(size)
w, err := obj.Writer()
diff --git a/utils/ioutil/common.go b/utils/ioutil/common.go
index b52e85a..a0e79a2 100644
--- a/utils/ioutil/common.go
+++ b/utils/ioutil/common.go
@@ -55,6 +55,28 @@ func NewReadCloser(r io.Reader, c io.Closer) io.ReadCloser {
return &readCloser{Reader: r, closer: c}
}
+type readCloserCloser struct {
+ io.ReadCloser
+ closer func() error
+}
+
+func (r *readCloserCloser) Close() (err error) {
+ defer func() {
+ if err == nil {
+ err = r.closer()
+ return
+ }
+ _ = r.closer()
+ }()
+ return r.ReadCloser.Close()
+}
+
+// NewReadCloserWithCloser creates an `io.ReadCloser` with the given `io.ReaderCloser` and
+// `io.Closer` that ensures that the closer is closed on close
+func NewReadCloserWithCloser(r io.ReadCloser, c func() error) io.ReadCloser {
+ return &readCloserCloser{ReadCloser: r, closer: c}
+}
+
type writeCloser struct {
io.Writer
closer io.Closer