aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/packfile/patch_delta.go
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2021-06-30 09:25:19 +0100
committerGitHub <noreply@github.com>2021-06-30 10:25:19 +0200
commitb4368b2a2ca4103b1ff4e37c34a963127342747e (patch)
tree5a3616045c4be8e7d64706017cf4380f6937ad32 /plumbing/format/packfile/patch_delta.go
parentda810275bf682d29a530ed819aff175f47bd7634 (diff)
downloadgo-git-b4368b2a2ca4103b1ff4e37c34a963127342747e.tar.gz
plumbing: format/packfile, prevent large objects from being read into memory completely (#330)
This PR adds code to prevent large objects from being read into memory from packfiles or the filesystem. Objects greater than 1Mb are now no longer directly stored in the cache or read completely into memory. This PR differs and improves the previous broken #323 by fixing several bugs in the reader and transparently wrapping ReaderAt as a Reader. Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'plumbing/format/packfile/patch_delta.go')
-rw-r--r--plumbing/format/packfile/patch_delta.go210
1 files changed, 210 insertions, 0 deletions
diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go
index 9e90f30..17da11e 100644
--- a/plumbing/format/packfile/patch_delta.go
+++ b/plumbing/format/packfile/patch_delta.go
@@ -1,9 +1,11 @@
package packfile
import (
+ "bufio"
"bytes"
"errors"
"io"
+ "math"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/utils/ioutil"
@@ -73,6 +75,131 @@ func PatchDelta(src, delta []byte) ([]byte, error) {
return b.Bytes(), nil
}
+func ReaderFromDelta(base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadCloser, error) {
+ deltaBuf := bufio.NewReaderSize(deltaRC, 1024)
+ srcSz, err := decodeLEB128ByteReader(deltaBuf)
+ if err != nil {
+ if err == io.EOF {
+ return nil, ErrInvalidDelta
+ }
+ return nil, err
+ }
+ if srcSz != uint(base.Size()) {
+ return nil, ErrInvalidDelta
+ }
+
+ targetSz, err := decodeLEB128ByteReader(deltaBuf)
+ if err != nil {
+ if err == io.EOF {
+ return nil, ErrInvalidDelta
+ }
+ return nil, err
+ }
+ remainingTargetSz := targetSz
+
+ dstRd, dstWr := io.Pipe()
+
+ go func() {
+ baseRd, err := base.Reader()
+ if err != nil {
+ _ = dstWr.CloseWithError(ErrInvalidDelta)
+ return
+ }
+ defer baseRd.Close()
+
+ baseBuf := bufio.NewReader(baseRd)
+ basePos := uint(0)
+
+ for {
+ cmd, err := deltaBuf.ReadByte()
+ if err == io.EOF {
+ _ = dstWr.CloseWithError(ErrInvalidDelta)
+ return
+ }
+ if err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+
+ if isCopyFromSrc(cmd) {
+ offset, err := decodeOffsetByteReader(cmd, deltaBuf)
+ if err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+ sz, err := decodeSizeByteReader(cmd, deltaBuf)
+ if err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+
+ if invalidSize(sz, targetSz) ||
+ invalidOffsetSize(offset, sz, srcSz) {
+ _ = dstWr.Close()
+ return
+ }
+
+ discard := offset - basePos
+ if basePos > offset {
+ _ = baseRd.Close()
+ baseRd, err = base.Reader()
+ if err != nil {
+ _ = dstWr.CloseWithError(ErrInvalidDelta)
+ return
+ }
+ baseBuf.Reset(baseRd)
+ discard = offset
+ }
+ for discard > math.MaxInt32 {
+ n, err := baseBuf.Discard(math.MaxInt32)
+ if err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+ basePos += uint(n)
+ discard -= uint(n)
+ }
+ for discard > 0 {
+ n, err := baseBuf.Discard(int(discard))
+ if err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+ basePos += uint(n)
+ discard -= uint(n)
+ }
+ if _, err := io.Copy(dstWr, io.LimitReader(baseBuf, int64(sz))); err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+ remainingTargetSz -= sz
+ basePos += sz
+ } else if isCopyFromDelta(cmd) {
+ sz := uint(cmd) // cmd is the size itself
+ if invalidSize(sz, targetSz) {
+ _ = dstWr.CloseWithError(ErrInvalidDelta)
+ return
+ }
+ if _, err := io.Copy(dstWr, io.LimitReader(deltaBuf, int64(sz))); err != nil {
+ _ = dstWr.CloseWithError(err)
+ return
+ }
+
+ remainingTargetSz -= sz
+ } else {
+ _ = dstWr.CloseWithError(ErrDeltaCmd)
+ return
+ }
+ if remainingTargetSz <= 0 {
+ _ = dstWr.Close()
+ return
+ }
+ }
+ }()
+
+ return dstRd, nil
+}
+
func patchDelta(dst *bytes.Buffer, src, delta []byte) error {
if len(delta) < deltaSizeMin {
return ErrInvalidDelta
@@ -161,6 +288,25 @@ func decodeLEB128(input []byte) (uint, []byte) {
return num, input[sz:]
}
+func decodeLEB128ByteReader(input io.ByteReader) (uint, error) {
+ var num, sz uint
+ for {
+ b, err := input.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+
+ num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks
+ sz++
+
+ if uint(b)&continuation == 0 {
+ break
+ }
+ }
+
+ return num, nil
+}
+
const (
payload = 0x7f // 0111 1111
continuation = 0x80 // 1000 0000
@@ -174,6 +320,40 @@ func isCopyFromDelta(cmd byte) bool {
return (cmd&0x80) == 0 && cmd != 0
}
+func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) {
+ var offset uint
+ if (cmd & 0x01) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ offset = uint(next)
+ }
+ if (cmd & 0x02) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ offset |= uint(next) << 8
+ }
+ if (cmd & 0x04) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ offset |= uint(next) << 16
+ }
+ if (cmd & 0x08) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ offset |= uint(next) << 24
+ }
+
+ return offset, nil
+}
+
func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) {
var offset uint
if (cmd & 0x01) != 0 {
@@ -208,6 +388,36 @@ func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) {
return offset, delta, nil
}
+func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) {
+ var sz uint
+ if (cmd & 0x10) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ sz = uint(next)
+ }
+ if (cmd & 0x20) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ sz |= uint(next) << 8
+ }
+ if (cmd & 0x40) != 0 {
+ next, err := delta.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ sz |= uint(next) << 16
+ }
+ if sz == 0 {
+ sz = 0x10000
+ }
+
+ return sz, nil
+}
+
func decodeSize(cmd byte, delta []byte) (uint, []byte, error) {
var sz uint
if (cmd & 0x10) != 0 {