aboutsummaryrefslogtreecommitdiffstats
path: root/packfile/reader.go
diff options
context:
space:
mode:
Diffstat (limited to 'packfile/reader.go')
-rw-r--r--packfile/reader.go255
1 files changed, 103 insertions, 152 deletions
diff --git a/packfile/reader.go b/packfile/reader.go
index 4c110b4..564d2e2 100644
--- a/packfile/reader.go
+++ b/packfile/reader.go
@@ -4,7 +4,6 @@ import (
"bytes"
"compress/zlib"
"encoding/binary"
- "errors"
"fmt"
"io"
"io/ioutil"
@@ -13,34 +12,14 @@ import (
const MaxObjectsLimit = 1000000
type PackfileReader struct {
- r io.Reader
+ buf *bytes.Reader
+ size int
objects map[string]packfileObject
offsets map[int]string
deltas []packfileDelta
- // The give back logic is explained in the giveBack method.
- startedGivingBack bool
- givebackBuffer []byte
- givenBack io.Reader
- contentCallback ContentCallback
-}
-
-// Sometimes, after reading an object from a packfile, there will be
-// a few bytes with garbage data before the next object comes by.
-// There is no way of reliably noticing this until when trying to read the
-// next object and failing because zlib parses an invalid header. We can't
-// notice before, because parsing the object's header (size, type, etc.)
-// doesn't fail.
-//
-// At that point, we want to give back to the reader the bytes we've read
-// since the last object, shift the input by one byte, and try again. That's
-// why we save the bytes we read on each object and, if it fails in the middle
-// of parsing it, those bytes will be read the next times you call Read() on
-// a objectReader derived from a PackfileReader.readObject, until they run out.
-func (pr *PackfileReader) giveBack() {
- pr.givenBack = bytes.NewReader(pr.givebackBuffer)
- pr.givebackBuffer = nil
+ contentCallback ContentCallback
}
type packfileObject struct {
@@ -53,19 +32,28 @@ type packfileDelta struct {
delta []byte
}
-func NewPackfileReader(r io.Reader, contentCallback ContentCallback) (*PackfileReader, error) {
+func NewPackfileReader(r io.Reader, fn ContentCallback) (*PackfileReader, error) {
+ buf, err := ioutil.ReadAll(r)
+ if err != nil {
+ return nil, err
+ }
+
return &PackfileReader{
- r: r,
- objects: map[string]packfileObject{},
- offsets: map[int]string{},
- contentCallback: contentCallback,
+ buf: bytes.NewReader(buf),
+ size: len(buf),
+ objects: make(map[string]packfileObject, 0),
+ offsets: make(map[int]string, 0),
+ contentCallback: fn,
}, nil
}
-func (p *PackfileReader) Read() (*Packfile, error) {
+func (pr *PackfileReader) Pos() int { return pr.size - pr.buf.Len() }
+func (pr *PackfileReader) Size() int { return pr.size }
+
+func (pr *PackfileReader) Read() (*Packfile, error) {
packfile := NewPackfile()
- if err := p.validateSignature(); err != nil {
+ if err := pr.validateHeader(); err != nil {
if err == io.EOF {
// This is an empty repo. It's OK.
return packfile, nil
@@ -73,13 +61,12 @@ func (p *PackfileReader) Read() (*Packfile, error) {
return nil, err
}
- var err error
- ver, err := p.readInt32()
+ ver, err := pr.readInt32()
if err != nil {
return nil, err
}
- count, err := p.readInt32()
+ count, err := pr.readInt32()
if err != nil {
return nil, err
}
@@ -91,57 +78,50 @@ func (p *PackfileReader) Read() (*Packfile, error) {
return nil, NewError("too many objects (%d)", packfile.ObjectCount)
}
- if err := p.readObjects(packfile); err != nil {
+ if err := pr.readObjects(packfile); err != nil {
return nil, err
}
return packfile, nil
}
-func (p *PackfileReader) validateSignature() error {
- var signature = make([]byte, 4)
- if _, err := p.r.Read(signature); err != nil {
+func (pr *PackfileReader) validateHeader() error {
+ var header = make([]byte, 4)
+ if _, err := pr.buf.Read(header); err != nil {
return err
}
- if !bytes.Equal(signature, []byte{'P', 'A', 'C', 'K'}) {
+ if !bytes.Equal(header, []byte{'P', 'A', 'C', 'K'}) {
return NewError("Pack file does not start with 'PACK'")
}
return nil
}
-func (p *PackfileReader) readInt32() (uint32, error) {
+func (pr *PackfileReader) readInt32() (uint32, error) {
var value uint32
- if err := binary.Read(p.r, binary.BigEndian, &value); err != nil {
- fmt.Println(err)
-
+ if err := binary.Read(pr.buf, binary.BigEndian, &value); err != nil {
return 0, err
}
return value, nil
}
-func (p *PackfileReader) readObjects(packfile *Packfile) error {
+func (pr *PackfileReader) readObjects(packfile *Packfile) error {
// This code has 50-80 µs of overhead per object not counting zlib inflation.
// Together with zlib inflation, it's 400-410 µs for small objects.
// That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB,
// of which 12-20 % is _not_ zlib inflation (ie. is our code).
- p.startedGivingBack = true
var unknownForBytes [4]byte
-
- offset := 12
for i := 0; i < packfile.ObjectCount; i++ {
- r, err := p.readObject(packfile, offset)
+ var pos = pr.Pos()
+ obj, err := pr.readObject(packfile)
if err != nil && err != io.EOF {
return err
}
- p.offsets[offset] = r.hash
- offset += r.counter + 4
-
- p.r.Read(unknownForBytes[:])
+ pr.offsets[pos] = obj.hash
if err == io.EOF {
break
@@ -151,30 +131,8 @@ func (p *PackfileReader) readObjects(packfile *Packfile) error {
return nil
}
-const (
- OBJ_COMMIT = 1
- OBJ_TREE = 2
- OBJ_BLOB = 3
- OBJ_TAG = 4
- OBJ_OFS_DELTA = 6
- OBJ_REF_DELTA = 7
-)
-
-const SIZE_LIMIT uint64 = 1 << 32 //4GB
-
-type objectReader struct {
- pr *PackfileReader
- pf *Packfile
- offset int
- hash string
-
- typ int8
- size uint64
- counter int
-}
-
-func (p *PackfileReader) readObject(packfile *Packfile, offset int) (*objectReader, error) {
- o, err := newObjectReader(p, packfile, offset)
+func (pr *PackfileReader) readObject(packfile *Packfile) (*objectReader, error) {
+ o, err := newObjectReader(pr, packfile)
if err != nil {
return nil, err
}
@@ -189,17 +147,38 @@ func (p *PackfileReader) readObject(packfile *Packfile, offset int) (*objectRead
default:
err = NewError("Invalid git object tag %q", o.typ)
}
- if err == ErrZlibHeader {
- p.giveBack()
- io.CopyN(ioutil.Discard, p.r, 1)
- return p.readObject(packfile, offset)
+
+ if err != nil {
+ return nil, err
}
return o, err
}
-func newObjectReader(pr *PackfileReader, pf *Packfile, offset int) (*objectReader, error) {
- o := &objectReader{pr: pr, pf: pf, offset: offset}
+const (
+ OBJ_COMMIT = 1
+ OBJ_TREE = 2
+ OBJ_BLOB = 3
+ OBJ_TAG = 4
+ OBJ_OFS_DELTA = 6
+ OBJ_REF_DELTA = 7
+)
+
+const SIZE_LIMIT uint64 = 1 << 32 // 4GB
+
+type objectReader struct {
+ pr *PackfileReader
+ pf *Packfile
+ hash string
+ steps int
+
+ typ int8
+ size uint64
+}
+
+func newObjectReader(pr *PackfileReader, pf *Packfile) (*objectReader, error) {
+ o := &objectReader{pr: pr, pf: pf}
+
var buf [1]byte
if _, err := o.Read(buf[:]); err != nil {
return nil, err
@@ -207,6 +186,7 @@ func newObjectReader(pr *PackfileReader, pf *Packfile, offset int) (*objectReade
o.typ = int8((buf[0] >> 4) & 7)
o.size = uint64(buf[0] & 15)
+ o.steps++ // byte we just read to get `o.typ` and `o.size`
var shift uint = 4
for buf[0]&0x80 == 0x80 {
@@ -215,6 +195,7 @@ func newObjectReader(pr *PackfileReader, pf *Packfile, offset int) (*objectReade
}
o.size += uint64(buf[0]&0x7f) << shift
+ o.steps++ // byte we just read to update `o.size`
shift += 7
}
@@ -223,7 +204,9 @@ func newObjectReader(pr *PackfileReader, pf *Packfile, offset int) (*objectReade
func (o *objectReader) readREFDelta() error {
var ref [20]byte
- o.Read(ref[:])
+ if _, err := o.Read(ref[:]); err != nil {
+ return err
+ }
buf, err := o.inflate()
if err != nil {
@@ -249,15 +232,32 @@ func (o *objectReader) readREFDelta() error {
return nil
}
+func decodeOffset(src io.ByteReader, steps int) (int, error) {
+ b, err := src.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ var offset = int(b & 0x7f)
+ for (b & 0x80) != 0 {
+ offset += 1 // WHY?
+ b, err = src.ReadByte()
+ if err != nil {
+ return 0, err
+ }
+ offset = (offset << 7) + int(b&0x7f)
+ }
+ // offset needs to be aware of the bytes we read for `o.typ` and `o.size`
+ offset += steps
+ return -offset, nil
+}
+
func (o *objectReader) readOFSDelta() error {
+ var pos = o.pr.Pos()
+
// read negative offset
- var b uint8
- binary.Read(o, binary.BigEndian, &b)
- var noffset int = int(b & 0x7f)
- for (b & 0x80) != 0 {
- noffset += 1
- binary.Read(o, binary.BigEndian, &b)
- noffset = (noffset << 7) + int(b&0x7f)
+ offset, err := decodeOffset(o.pr.buf, o.steps)
+ if err != nil {
+ return err
}
buf, err := o.inflate()
@@ -265,10 +265,10 @@ func (o *objectReader) readOFSDelta() error {
return err
}
- refhash := o.pr.offsets[o.offset-noffset]
+ refhash := o.pr.offsets[pos+offset]
referenced, ok := o.pr.objects[refhash]
if !ok {
- return NewError("can't find a pack entry at %d", o.offset-noffset)
+ return NewError("can't find a pack entry at %d", pos+offset)
} else {
patched := PatchDelta(referenced.bytes, buf)
if patched == nil {
@@ -328,87 +328,40 @@ func (o *objectReader) addObject(bytes []byte) error {
o.hash = hash
return nil
-
}
func (o *objectReader) inflate() ([]byte, error) {
- //Quick fix "Invalid git object tag '\x00'" when the length of a object is 0
- if o.size == 0 {
- var buf [4]byte
- if _, err := o.Read(buf[:]); err != nil {
- return nil, err
- }
-
- return nil, nil
- }
-
- zr, err := zlib.NewReader(o)
+ zr, err := zlib.NewReader(o.pr.buf)
if err != nil {
- if err.Error() == "zlib: invalid header" {
- return nil, ErrZlibHeader
+ if err == zlib.ErrHeader {
+ return nil, zlib.ErrHeader
} else {
return nil, NewError("error opening packfile's object zlib: %v", err)
}
}
-
defer zr.Close()
if o.size > SIZE_LIMIT {
return nil, NewError("the object size exceeed the allowed limit: %d", o.size)
}
- var arrbuf [4096]byte // Stack-allocated for <4 KB objects.
- var buf []byte
- if uint64(len(arrbuf)) >= o.size {
- buf = arrbuf[:o.size]
- } else {
- buf = make([]byte, o.size)
- }
-
- read := 0
- for read < int(o.size) {
- n, err := zr.Read(buf[read:])
- if err != nil {
- return nil, err
- }
-
- read += n
- }
+ var buf bytes.Buffer
+ io.Copy(&buf, zr) // also: io.CopyN(&buf, zr, int64(o.size))
- if read != int(o.size) {
- return nil, NewError("inflated size mismatch, expected %d, got %d", o.size, read)
+ var bufLen = buf.Len()
+ if bufLen != int(o.size) {
+ return nil, NewError("inflated size mismatch, expected %d, got %d", o.size, bufLen)
}
- return buf, nil
+ return buf.Bytes(), nil
}
func (o *objectReader) Read(p []byte) (int, error) {
- i := 0
- if o.pr.givenBack != nil {
- i1, err := o.pr.givenBack.Read(p)
- if err == nil {
- i += i1
- } else {
- o.pr.givenBack = nil
- }
- }
-
- i2, err := o.pr.r.Read(p[i:])
- i += i2
- o.counter += i
- if err == nil && o.pr.startedGivingBack {
- o.pr.givebackBuffer = append(o.pr.givebackBuffer, p[:i]...)
- }
- return i, err
+ return o.pr.buf.Read(p)
}
func (o *objectReader) ReadByte() (byte, error) {
- var c byte
- if err := binary.Read(o, binary.BigEndian, &c); err != nil {
- return 0, err
- }
-
- return c, nil
+ return o.pr.buf.ReadByte()
}
type ReaderError struct {
@@ -420,5 +373,3 @@ func NewError(format string, args ...interface{}) error {
}
func (e *ReaderError) Error() string { return e.Msg }
-
-var ErrZlibHeader = errors.New("zlib: invalid header")