aboutsummaryrefslogtreecommitdiffstats
path: root/formats/packfile/reader.go
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2015-10-25 20:30:36 +0100
committerMáximo Cuadros <mcuadros@gmail.com>2015-10-25 20:30:36 +0100
commit9a44cd8ccff143a112436c38bfe5581e74b68f07 (patch)
treef4d2f38cc61647bf159a7c870913e6f6b60828b2 /formats/packfile/reader.go
parentbe69a505926451bf10450ac68d40265a6f43e150 (diff)
downloadgo-git-9a44cd8ccff143a112436c38bfe5581e74b68f07.tar.gz
formats/packfile: new reader API
Diffstat (limited to 'formats/packfile/reader.go')
-rw-r--r--formats/packfile/reader.go256
1 files changed, 123 insertions, 133 deletions
diff --git a/formats/packfile/reader.go b/formats/packfile/reader.go
index c355e12..6ccf384 100644
--- a/formats/packfile/reader.go
+++ b/formats/packfile/reader.go
@@ -5,15 +5,29 @@ import (
"encoding/binary"
"fmt"
"io"
+ "io/ioutil"
+
+ "gopkg.in/src-d/go-git.v2/common"
"github.com/klauspost/compress/zlib"
)
type Format int
+var (
+ EmptyRepositoryErr = newError("empty repository")
+ UnsupportedVersionErr = newError("unsupported packfile version")
+ MaxObjectsLimitReachedErr = newError("max. objects limit reached")
+ MalformedPackfileErr = newError("malformed pack file, does not start with 'PACK'")
+ InvalidObjectErr = newError("invalid git object")
+ PatchingErr = newError("patching error")
+ PackEntryNotFoundErr = newError("can't find a pack entry")
+ ObjectNotFoundErr = newError("can't find a object")
+ ZLibErr = newError("zlib reading error")
+)
+
const (
DefaultMaxObjectsLimit = 1 << 20
- DefaultMaxObjectSize = 1 << 32 // 4GB
VersionSupported = 2
UnknownFormat Format = 0
@@ -21,7 +35,8 @@ const (
REFDeltaFormat Format = 2
)
-type PackfileReader struct {
+// Reader reads a packfile from a binary string splitting it on objects
+type Reader struct {
// MaxObjectsLimit is the limit of objects to be load in the packfile, if
// a packfile excess this number an error is throw, the default value
// is defined by DefaultMaxObjectsLimit, usually the default limit is more
@@ -29,116 +44,98 @@ type PackfileReader struct {
// where the number of object is bigger the memory can be exhausted.
MaxObjectsLimit uint32
- // MaxObjectSize is the maximum size in bytes, reading objects with a bigger
- // size cause a error. The default value is defined by DefaultMaxObjectSize
- MaxObjectSize uint64
-
// Format specifies if we are using ref-delta's or ofs-delta's, choosing the
// correct format the memory usage is optimized
// https://github.com/git/git/blob/8d530c4d64ffcc853889f7b385f554d53db375ed/Documentation/technical/protocol-capabilities.txt#L154
Format Format
r *trackingReader
- objects map[Hash]*RAWObject
- offsets map[int]*RAWObject
+ s common.ObjectStorage
+ offsets map[int64]common.Hash
}
-func NewPackfileReader(r io.Reader, fn ContentCallback) (*PackfileReader, error) {
- return &PackfileReader{
+// NewReader returns a new Reader that reads from a io.Reader
+func NewReader(r io.Reader) *Reader {
+ return &Reader{
MaxObjectsLimit: DefaultMaxObjectsLimit,
- MaxObjectSize: DefaultMaxObjectSize,
r: &trackingReader{r: r},
- objects: make(map[Hash]*RAWObject, 0),
- offsets: make(map[int]*RAWObject, 0),
- }, nil
+ offsets: make(map[int64]common.Hash, 0),
+ }
}
-func (pr *PackfileReader) Read() (chan *RAWObject, error) {
- if err := pr.validateHeader(); err != nil {
+// Read reads the objects and stores it at the ObjectStorage
+func (r *Reader) Read(s common.ObjectStorage) (int64, error) {
+ r.s = s
+ if err := r.validateHeader(); err != nil {
if err == io.EOF {
- // This is an empty repo. It's OK.
- return nil, nil
+ return -1, EmptyRepositoryErr
}
- return nil, err
+ return -1, err
}
- version, err := pr.readInt32()
+ version, err := r.readInt32()
if err != nil {
- return nil, err
+ return -1, err
}
if version > VersionSupported {
- return nil, NewError("unsupported packfile version %d", version)
+ return -1, UnsupportedVersionErr
}
- count, err := pr.readInt32()
+ count, err := r.readInt32()
if err != nil {
- return nil, err
+ return -1, err
}
- if count > pr.MaxObjectsLimit {
- return nil, NewError("too many objects %d, limit is %d", count, pr.MaxObjectsLimit)
+ if count > r.MaxObjectsLimit {
+ return -1, MaxObjectsLimitReachedErr
}
- ch := make(chan *RAWObject, 1)
- go pr.readObjects(ch, count)
-
- // packfile.Size = int64(pr.r.Pos())
-
- return ch, nil
+ return r.r.position, r.readObjects(count)
}
-func (pr *PackfileReader) validateHeader() error {
+func (r *Reader) validateHeader() error {
var header = make([]byte, 4)
- if _, err := pr.r.Read(header); err != nil {
+ if _, err := r.r.Read(header); err != nil {
return err
}
if !bytes.Equal(header, []byte{'P', 'A', 'C', 'K'}) {
- return NewError("Pack file does not start with 'PACK'")
+ return MalformedPackfileErr
}
return nil
}
-func (pr *PackfileReader) readInt32() (uint32, error) {
+func (r *Reader) readInt32() (uint32, error) {
var value uint32
- if err := binary.Read(pr.r, binary.BigEndian, &value); err != nil {
+ if err := binary.Read(r.r, binary.BigEndian, &value); err != nil {
return 0, err
}
return value, nil
}
-func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error {
+func (r *Reader) readObjects(count uint32) error {
// This code has 50-80 µs of overhead per object not counting zlib inflation.
// Together with zlib inflation, it's 400-410 µs for small objects.
// That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB,
// of which 12-20 % is _not_ zlib inflation (ie. is our code).
- defer func() {
- close(ch)
- }()
-
for i := 0; i < int(count); i++ {
- var pos = pr.Pos()
- obj, err := pr.newRAWObject()
+ start := r.r.position
+ obj, err := r.newRAWObject()
if err != nil && err != io.EOF {
fmt.Println(err)
return err
}
- if pr.Format == UnknownFormat || pr.Format == OFSDeltaFormat {
- pr.offsets[pos] = obj
- }
-
- if pr.Format == UnknownFormat || pr.Format == REFDeltaFormat {
- pr.objects[obj.Hash] = obj
+ if r.Format == UnknownFormat || r.Format == OFSDeltaFormat {
+ r.offsets[start] = obj.Hash()
}
- ch <- obj
-
+ r.s.Set(obj)
if err == io.EOF {
break
}
@@ -147,154 +144,147 @@ func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error {
return nil
}
-func (pr *PackfileReader) Pos() int { return pr.r.Pos() }
-
-func (pr *PackfileReader) newRAWObject() (*RAWObject, error) {
- raw := &RAWObject{}
- steps := 0
+func (r *Reader) newRAWObject() (common.Object, error) {
+ raw := r.s.New()
+ var steps int64
var buf [1]byte
- if _, err := pr.r.Read(buf[:]); err != nil {
+ if _, err := r.r.Read(buf[:]); err != nil {
return nil, err
}
- raw.Type = ObjectType((buf[0] >> 4) & 7)
- raw.Size = uint64(buf[0] & 15)
+ typ := common.ObjectType((buf[0] >> 4) & 7)
+ size := int64(buf[0] & 15)
steps++ // byte we just read to get `o.typ` and `o.size`
var shift uint = 4
for buf[0]&0x80 == 0x80 {
- if _, err := pr.r.Read(buf[:]); err != nil {
+ if _, err := r.r.Read(buf[:]); err != nil {
return nil, err
}
- raw.Size += uint64(buf[0]&0x7f) << shift
+ size += int64(buf[0]&0x7f) << shift
steps++ // byte we just read to update `o.size`
shift += 7
}
+ raw.SetType(typ)
+ raw.SetSize(size)
+
var err error
- switch raw.Type {
- case REFDeltaObject:
- err = pr.readREFDelta(raw)
- case OFSDeltaObject:
- err = pr.readOFSDelta(raw, steps)
- case CommitObject, TreeObject, BlobObject, TagObject:
- err = pr.readObject(raw)
+ switch raw.Type() {
+ case common.REFDeltaObject:
+ err = r.readREFDelta(raw)
+ case common.OFSDeltaObject:
+ err = r.readOFSDelta(raw, steps)
+ case common.CommitObject, common.TreeObject, common.BlobObject, common.TagObject:
+ err = r.readObject(raw)
default:
- err = NewError("Invalid git object tag %q", raw.Type)
+ err = InvalidObjectErr.n("tag %q", raw.Type)
}
return raw, err
}
-func (pr *PackfileReader) readREFDelta(raw *RAWObject) error {
- var ref Hash
- if _, err := pr.r.Read(ref[:]); err != nil {
+func (r *Reader) readREFDelta(raw common.Object) error {
+ var ref common.Hash
+ if _, err := r.r.Read(ref[:]); err != nil {
return err
}
- buf, err := pr.inflate(raw.Size)
- if err != nil {
+ buf := bytes.NewBuffer(nil)
+ if err := r.inflate(buf); err != nil {
return err
}
- referenced, ok := pr.objects[ref]
+ referenced, ok := r.s.Get(ref)
if !ok {
- fmt.Println("not found", ref)
- } else {
- patched := PatchDelta(referenced.Bytes, buf[:])
- if patched == nil {
- return NewError("error while patching %x", ref)
- }
+ return ObjectNotFoundErr.n("%s", ref)
+ }
- raw.Type = referenced.Type
- raw.Bytes = patched
- raw.Size = uint64(len(patched))
- raw.Hash = ComputeHash(raw.Type, raw.Bytes)
+ d, _ := ioutil.ReadAll(referenced.Reader())
+ patched := patchDelta(d, buf.Bytes())
+ if patched == nil {
+ return PatchingErr.n("hash %q", ref)
}
+ raw.SetType(referenced.Type())
+ raw.SetSize(int64(len(patched)))
+ raw.Writer().Write(patched)
+
return nil
}
-func (pr *PackfileReader) readOFSDelta(raw *RAWObject, steps int) error {
- var pos = pr.Pos()
-
- // read negative offset
- offset, err := decodeOffset(pr.r, steps)
+func (r *Reader) readOFSDelta(raw common.Object, steps int64) error {
+ start := r.r.position
+ offset, err := decodeOffset(r.r, steps)
if err != nil {
return err
}
- buf, err := pr.inflate(raw.Size)
- if err != nil {
+ buf := bytes.NewBuffer(nil)
+ if err := r.inflate(buf); err != nil {
return err
}
- ref, ok := pr.offsets[pos+offset]
+ ref, ok := r.offsets[start+offset]
if !ok {
- return NewError("can't find a pack entry at %d", pos+offset)
+ return PackEntryNotFoundErr.n("offset %d", start+offset)
}
- patched := PatchDelta(ref.Bytes, buf)
+ referenced, _ := r.s.Get(ref)
+ d, _ := ioutil.ReadAll(referenced.Reader())
+ patched := patchDelta(d, buf.Bytes())
if patched == nil {
- return NewError("error while patching %q", ref)
+ return PatchingErr.n("hash %q", ref)
}
- raw.Type = ref.Type
- raw.Bytes = patched
- raw.Size = uint64(len(patched))
- raw.Hash = ComputeHash(raw.Type, raw.Bytes)
+ raw.SetType(referenced.Type())
+ raw.SetSize(int64(len(patched)))
+ raw.Writer().Write(patched)
return nil
}
-func (pr *PackfileReader) readObject(raw *RAWObject) error {
- buf, err := pr.inflate(raw.Size)
- if err != nil {
- return err
- }
-
- raw.Bytes = buf
- raw.Hash = ComputeHash(raw.Type, raw.Bytes)
-
- return nil
+func (r *Reader) readObject(raw common.Object) error {
+ return r.inflate(raw.Writer())
}
-func (pr *PackfileReader) inflate(size uint64) ([]byte, error) {
- zr, err := zlib.NewReader(pr.r)
+func (r *Reader) inflate(w io.Writer) error {
+ zr, err := zlib.NewReader(r.r)
if err != nil {
if err == zlib.ErrHeader {
- return nil, zlib.ErrHeader
+ return zlib.ErrHeader
}
- return nil, NewError("error opening packfile's object zlib: %v", err)
+ return ZLibErr.n("%s", err)
}
defer zr.Close()
- if size > pr.MaxObjectSize {
- return nil, NewError("the object size %q exceeed the allowed limit: %q",
- size, pr.MaxObjectSize)
- }
-
- var buf bytes.Buffer
- io.Copy(&buf, zr) // also: io.CopyN(&buf, zr, int64(o.size))
-
- if buf.Len() != int(size) {
- return nil, NewError(
- "inflated size mismatch, expected %d, got %d", size, buf.Len())
- }
-
- return buf.Bytes(), nil
+ _, err = io.Copy(w, zr)
+ return err
}
type ReaderError struct {
- Msg string // description of error
+ reason, additional string
+}
+
+func newError(reason string) *ReaderError {
+ return &ReaderError{reason: reason}
}
-func NewError(format string, args ...interface{}) error {
- return &ReaderError{Msg: fmt.Sprintf(format, args...)}
+func (e *ReaderError) Error() string {
+ if e.additional == "" {
+ return e.reason
+ }
+
+ return fmt.Sprintf("%s: %s", e.reason, e.additional)
}
-func (e *ReaderError) Error() string { return e.Msg }
+func (e *ReaderError) n(format string, args ...interface{}) *ReaderError {
+ return &ReaderError{
+ reason: e.reason,
+ additional: fmt.Sprintf(format, args...),
+ }
+}