aboutsummaryrefslogtreecommitdiffstats
path: root/formats/packfile/reader.go
diff options
context:
space:
mode:
Diffstat (limited to 'formats/packfile/reader.go')
-rw-r--r--formats/packfile/reader.go338
1 files changed, 0 insertions, 338 deletions
diff --git a/formats/packfile/reader.go b/formats/packfile/reader.go
deleted file mode 100644
index 3f7081b..0000000
--- a/formats/packfile/reader.go
+++ /dev/null
@@ -1,338 +0,0 @@
-package packfile
-
-import (
- "bytes"
- "encoding/binary"
- "fmt"
- "io"
- "io/ioutil"
-
- "gopkg.in/src-d/go-git.v3/core"
-
- "github.com/klauspost/compress/zlib"
-)
-
-type Format int
-
-var (
- EmptyRepositoryErr = newError("empty repository")
- UnsupportedVersionErr = newError("unsupported packfile version")
- MaxObjectsLimitReachedErr = newError("max. objects limit reached")
- MalformedPackfileErr = newError("malformed pack file, does not start with 'PACK'")
- InvalidObjectErr = newError("invalid git object")
- PatchingErr = newError("patching error")
- PackEntryNotFoundErr = newError("can't find a pack entry")
- ErrObjectNotFound = newError("can't find a object")
- ZLibErr = newError("zlib reading error")
-)
-
-const (
- DefaultMaxObjectsLimit = 1 << 20
-
- VersionSupported = 2
- UnknownFormat Format = 0
- OFSDeltaFormat Format = 1
- REFDeltaFormat Format = 2
-)
-
-// Reader reads a packfile from a binary string splitting it on objects
-type Reader struct {
- // MaxObjectsLimit is the limit of objects to be load in the packfile, if
- // a packfile excess this number an error is throw, the default value
- // is defined by DefaultMaxObjectsLimit, usually the default limit is more
- // than enough to work with any repository, working extremely big repositories
- // where the number of object is bigger the memory can be exhausted.
- MaxObjectsLimit uint32
-
- // Format specifies if we are using ref-delta's or ofs-delta's, choosing the
- // correct format the memory usage is optimized
- // https://github.com/git/git/blob/8d530c4d64ffcc853889f7b385f554d53db375ed/Documentation/technical/protocol-capabilities.txt#L154
- Format Format
-
- r *trackingReader
- s core.ObjectStorage
- offsets map[int64]core.Hash
-}
-
-// NewReader returns a new Reader that reads from a io.Reader
-func NewReader(r io.Reader) *Reader {
- return &Reader{
- MaxObjectsLimit: DefaultMaxObjectsLimit,
-
- r: NewTrackingReader(r),
- offsets: make(map[int64]core.Hash, 0),
- }
-}
-
-// Read reads the objects and stores it at the ObjectStorage
-func (r *Reader) Read(s core.ObjectStorage) (int64, error) {
- r.s = s
- if err := r.validateHeader(); err != nil {
- if err == io.EOF {
- return -1, EmptyRepositoryErr
- }
-
- return -1, err
- }
-
- version, err := r.readInt32()
- if err != nil {
- return -1, err
- }
-
- if version > VersionSupported {
- return -1, UnsupportedVersionErr
- }
-
- count, err := r.readInt32()
- if err != nil {
- return -1, err
- }
-
- if count > r.MaxObjectsLimit {
- return -1, MaxObjectsLimitReachedErr
- }
-
- return r.r.position, r.readObjects(count)
-}
-
-func (r *Reader) validateHeader() error {
- var header = make([]byte, 4)
- if _, err := io.ReadFull(r.r, header); err != nil {
- return err
- }
-
- if !bytes.Equal(header, []byte{'P', 'A', 'C', 'K'}) {
- return MalformedPackfileErr
- }
-
- return nil
-}
-
-func (r *Reader) readInt32() (uint32, error) {
- var value uint32
- if err := binary.Read(r.r, binary.BigEndian, &value); err != nil {
- return 0, err
- }
-
- return value, nil
-}
-
-func (r *Reader) readObjects(count uint32) error {
- // This code has 50-80 µs of overhead per object not counting zlib inflation.
- // Together with zlib inflation, it's 400-410 µs for small objects.
- // That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB,
- // of which 12-20 % is _not_ zlib inflation (ie. is our code).
- for i := 0; i < int(count); i++ {
- start := r.r.position
- obj, err := r.newObject()
- if err != nil && err != io.EOF {
- return err
- }
-
- if r.Format == UnknownFormat || r.Format == OFSDeltaFormat {
- r.offsets[start] = obj.Hash()
- }
-
- r.s.Set(obj)
- if err == io.EOF {
- break
- }
- }
-
- return nil
-}
-
-func (r *Reader) newObject() (core.Object, error) {
- raw, err := r.s.New()
- if err != nil {
- return nil, err
- }
- var steps int64
-
- var buf [1]byte
- if _, err := r.r.Read(buf[:]); err != nil {
- return nil, err
- }
-
- typ := core.ObjectType((buf[0] >> 4) & 7)
- size := int64(buf[0] & 15)
- steps++ // byte we just read to get `o.typ` and `o.size`
-
- var shift uint = 4
- for buf[0]&0x80 == 0x80 {
- if _, err := r.r.Read(buf[:]); err != nil {
- return nil, err
- }
-
- size += int64(buf[0]&0x7f) << shift
- steps++ // byte we just read to update `o.size`
- shift += 7
- }
-
- raw.SetType(typ)
- raw.SetSize(size)
-
- switch raw.Type() {
- case core.REFDeltaObject:
- err = r.readREFDelta(raw)
- case core.OFSDeltaObject:
- err = r.readOFSDelta(raw, steps)
- case core.CommitObject, core.TreeObject, core.BlobObject, core.TagObject:
- err = r.readObject(raw)
- default:
- err = InvalidObjectErr.n("tag %q", raw.Type)
- }
-
- return raw, err
-}
-
-func (r *Reader) readREFDelta(raw core.Object) (err error) {
- var ref core.Hash
- if _, err := io.ReadFull(r.r, ref[:]); err != nil {
- return err
- }
-
- buf := bytes.NewBuffer(nil)
- if err := r.inflate(buf); err != nil {
- return err
- }
-
- referenced, err := r.s.Get(ref)
- if err != nil {
- if err == core.ErrObjectNotFound {
- return ErrObjectNotFound.n("%s", ref)
- }
- return err
- }
-
- reader, err := referenced.Reader()
- if err != nil {
- return err
- }
- defer checkClose(reader, &err)
-
- d, err := ioutil.ReadAll(reader)
- if err != nil {
- return err
- }
-
- patched := patchDelta(d, buf.Bytes())
- if patched == nil {
- return PatchingErr.n("hash %q", ref)
- }
-
- raw.SetType(referenced.Type())
- raw.SetSize(int64(len(patched)))
-
- writer, err := raw.Writer()
- if err != nil {
- return err
- }
- defer checkClose(writer, &err)
-
- writer.Write(patched)
-
- return nil
-}
-
-func (r *Reader) readOFSDelta(raw core.Object, steps int64) (err error) {
- start := r.r.position
- offset, err := decodeOffset(r.r, steps)
- if err != nil {
- return err
- }
-
- buf := bytes.NewBuffer(nil)
- if err = r.inflate(buf); err != nil {
- return err
- }
-
- ref, ok := r.offsets[start+offset]
- if !ok {
- return PackEntryNotFoundErr.n("offset %d", start+offset)
- }
-
- referenced, err := r.s.Get(ref)
- if err != nil {
- return err
- }
-
- reader, err := referenced.Reader()
- if err != nil {
- return err
- }
- defer checkClose(reader, &err)
-
- d, err := ioutil.ReadAll(reader)
- if err != nil {
- return err
- }
-
- patched := patchDelta(d, buf.Bytes())
- if patched == nil {
- return PatchingErr.n("hash %q", ref)
- }
-
- raw.SetType(referenced.Type())
- raw.SetSize(int64(len(patched)))
-
- writer, err := raw.Writer()
- if err != nil {
- return err
- }
- defer checkClose(writer, &err)
-
- writer.Write(patched)
-
- return nil
-}
-
-func (r *Reader) readObject(raw core.Object) (err error) {
- writer, err := raw.Writer()
- if err != nil {
- return err
- }
- defer checkClose(writer, &err)
-
- return r.inflate(writer)
-}
-
-func (r *Reader) inflate(w io.Writer) error {
- zr, err := zlib.NewReader(r.r)
- if err != nil {
- if err == zlib.ErrHeader {
- return zlib.ErrHeader
- }
-
- return ZLibErr.n("%s", err)
- }
-
- defer zr.Close()
-
- _, err = io.Copy(w, zr)
- return err
-}
-
-type ReaderError struct {
- reason, additional string
-}
-
-func newError(reason string) *ReaderError {
- return &ReaderError{reason: reason}
-}
-
-func (e *ReaderError) Error() string {
- if e.additional == "" {
- return e.reason
- }
-
- return fmt.Sprintf("%s: %s", e.reason, e.additional)
-}
-
-func (e *ReaderError) n(format string, args ...interface{}) *ReaderError {
- return &ReaderError{
- reason: e.reason,
- additional: fmt.Sprintf(format, args...),
- }
-}