diff options
Diffstat (limited to 'formats/packfile/reader.go')
-rw-r--r-- | formats/packfile/reader.go | 256 |
1 files changed, 123 insertions, 133 deletions
diff --git a/formats/packfile/reader.go b/formats/packfile/reader.go index c355e12..6ccf384 100644 --- a/formats/packfile/reader.go +++ b/formats/packfile/reader.go @@ -5,15 +5,29 @@ import ( "encoding/binary" "fmt" "io" + "io/ioutil" + + "gopkg.in/src-d/go-git.v2/common" "github.com/klauspost/compress/zlib" ) type Format int +var ( + EmptyRepositoryErr = newError("empty repository") + UnsupportedVersionErr = newError("unsupported packfile version") + MaxObjectsLimitReachedErr = newError("max. objects limit reached") + MalformedPackfileErr = newError("malformed pack file, does not start with 'PACK'") + InvalidObjectErr = newError("invalid git object") + PatchingErr = newError("patching error") + PackEntryNotFoundErr = newError("can't find a pack entry") + ObjectNotFoundErr = newError("can't find a object") + ZLibErr = newError("zlib reading error") +) + const ( DefaultMaxObjectsLimit = 1 << 20 - DefaultMaxObjectSize = 1 << 32 // 4GB VersionSupported = 2 UnknownFormat Format = 0 @@ -21,7 +35,8 @@ const ( REFDeltaFormat Format = 2 ) -type PackfileReader struct { +// Reader reads a packfile from a binary string splitting it on objects +type Reader struct { // MaxObjectsLimit is the limit of objects to be load in the packfile, if // a packfile excess this number an error is throw, the default value // is defined by DefaultMaxObjectsLimit, usually the default limit is more @@ -29,116 +44,98 @@ type PackfileReader struct { // where the number of object is bigger the memory can be exhausted. MaxObjectsLimit uint32 - // MaxObjectSize is the maximum size in bytes, reading objects with a bigger - // size cause a error. The default value is defined by DefaultMaxObjectSize - MaxObjectSize uint64 - // Format specifies if we are using ref-delta's or ofs-delta's, choosing the // correct format the memory usage is optimized // https://github.com/git/git/blob/8d530c4d64ffcc853889f7b385f554d53db375ed/Documentation/technical/protocol-capabilities.txt#L154 Format Format r *trackingReader - objects map[Hash]*RAWObject - offsets map[int]*RAWObject + s common.ObjectStorage + offsets map[int64]common.Hash } -func NewPackfileReader(r io.Reader, fn ContentCallback) (*PackfileReader, error) { - return &PackfileReader{ +// NewReader returns a new Reader that reads from a io.Reader +func NewReader(r io.Reader) *Reader { + return &Reader{ MaxObjectsLimit: DefaultMaxObjectsLimit, - MaxObjectSize: DefaultMaxObjectSize, r: &trackingReader{r: r}, - objects: make(map[Hash]*RAWObject, 0), - offsets: make(map[int]*RAWObject, 0), - }, nil + offsets: make(map[int64]common.Hash, 0), + } } -func (pr *PackfileReader) Read() (chan *RAWObject, error) { - if err := pr.validateHeader(); err != nil { +// Read reads the objects and stores it at the ObjectStorage +func (r *Reader) Read(s common.ObjectStorage) (int64, error) { + r.s = s + if err := r.validateHeader(); err != nil { if err == io.EOF { - // This is an empty repo. It's OK. - return nil, nil + return -1, EmptyRepositoryErr } - return nil, err + return -1, err } - version, err := pr.readInt32() + version, err := r.readInt32() if err != nil { - return nil, err + return -1, err } if version > VersionSupported { - return nil, NewError("unsupported packfile version %d", version) + return -1, UnsupportedVersionErr } - count, err := pr.readInt32() + count, err := r.readInt32() if err != nil { - return nil, err + return -1, err } - if count > pr.MaxObjectsLimit { - return nil, NewError("too many objects %d, limit is %d", count, pr.MaxObjectsLimit) + if count > r.MaxObjectsLimit { + return -1, MaxObjectsLimitReachedErr } - ch := make(chan *RAWObject, 1) - go pr.readObjects(ch, count) - - // packfile.Size = int64(pr.r.Pos()) - - return ch, nil + return r.r.position, r.readObjects(count) } -func (pr *PackfileReader) validateHeader() error { +func (r *Reader) validateHeader() error { var header = make([]byte, 4) - if _, err := pr.r.Read(header); err != nil { + if _, err := r.r.Read(header); err != nil { return err } if !bytes.Equal(header, []byte{'P', 'A', 'C', 'K'}) { - return NewError("Pack file does not start with 'PACK'") + return MalformedPackfileErr } return nil } -func (pr *PackfileReader) readInt32() (uint32, error) { +func (r *Reader) readInt32() (uint32, error) { var value uint32 - if err := binary.Read(pr.r, binary.BigEndian, &value); err != nil { + if err := binary.Read(r.r, binary.BigEndian, &value); err != nil { return 0, err } return value, nil } -func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error { +func (r *Reader) readObjects(count uint32) error { // This code has 50-80 µs of overhead per object not counting zlib inflation. // Together with zlib inflation, it's 400-410 µs for small objects. // That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB, // of which 12-20 % is _not_ zlib inflation (ie. is our code). - defer func() { - close(ch) - }() - for i := 0; i < int(count); i++ { - var pos = pr.Pos() - obj, err := pr.newRAWObject() + start := r.r.position + obj, err := r.newRAWObject() if err != nil && err != io.EOF { fmt.Println(err) return err } - if pr.Format == UnknownFormat || pr.Format == OFSDeltaFormat { - pr.offsets[pos] = obj - } - - if pr.Format == UnknownFormat || pr.Format == REFDeltaFormat { - pr.objects[obj.Hash] = obj + if r.Format == UnknownFormat || r.Format == OFSDeltaFormat { + r.offsets[start] = obj.Hash() } - ch <- obj - + r.s.Set(obj) if err == io.EOF { break } @@ -147,154 +144,147 @@ func (pr *PackfileReader) readObjects(ch chan *RAWObject, count uint32) error { return nil } -func (pr *PackfileReader) Pos() int { return pr.r.Pos() } - -func (pr *PackfileReader) newRAWObject() (*RAWObject, error) { - raw := &RAWObject{} - steps := 0 +func (r *Reader) newRAWObject() (common.Object, error) { + raw := r.s.New() + var steps int64 var buf [1]byte - if _, err := pr.r.Read(buf[:]); err != nil { + if _, err := r.r.Read(buf[:]); err != nil { return nil, err } - raw.Type = ObjectType((buf[0] >> 4) & 7) - raw.Size = uint64(buf[0] & 15) + typ := common.ObjectType((buf[0] >> 4) & 7) + size := int64(buf[0] & 15) steps++ // byte we just read to get `o.typ` and `o.size` var shift uint = 4 for buf[0]&0x80 == 0x80 { - if _, err := pr.r.Read(buf[:]); err != nil { + if _, err := r.r.Read(buf[:]); err != nil { return nil, err } - raw.Size += uint64(buf[0]&0x7f) << shift + size += int64(buf[0]&0x7f) << shift steps++ // byte we just read to update `o.size` shift += 7 } + raw.SetType(typ) + raw.SetSize(size) + var err error - switch raw.Type { - case REFDeltaObject: - err = pr.readREFDelta(raw) - case OFSDeltaObject: - err = pr.readOFSDelta(raw, steps) - case CommitObject, TreeObject, BlobObject, TagObject: - err = pr.readObject(raw) + switch raw.Type() { + case common.REFDeltaObject: + err = r.readREFDelta(raw) + case common.OFSDeltaObject: + err = r.readOFSDelta(raw, steps) + case common.CommitObject, common.TreeObject, common.BlobObject, common.TagObject: + err = r.readObject(raw) default: - err = NewError("Invalid git object tag %q", raw.Type) + err = InvalidObjectErr.n("tag %q", raw.Type) } return raw, err } -func (pr *PackfileReader) readREFDelta(raw *RAWObject) error { - var ref Hash - if _, err := pr.r.Read(ref[:]); err != nil { +func (r *Reader) readREFDelta(raw common.Object) error { + var ref common.Hash + if _, err := r.r.Read(ref[:]); err != nil { return err } - buf, err := pr.inflate(raw.Size) - if err != nil { + buf := bytes.NewBuffer(nil) + if err := r.inflate(buf); err != nil { return err } - referenced, ok := pr.objects[ref] + referenced, ok := r.s.Get(ref) if !ok { - fmt.Println("not found", ref) - } else { - patched := PatchDelta(referenced.Bytes, buf[:]) - if patched == nil { - return NewError("error while patching %x", ref) - } + return ObjectNotFoundErr.n("%s", ref) + } - raw.Type = referenced.Type - raw.Bytes = patched - raw.Size = uint64(len(patched)) - raw.Hash = ComputeHash(raw.Type, raw.Bytes) + d, _ := ioutil.ReadAll(referenced.Reader()) + patched := patchDelta(d, buf.Bytes()) + if patched == nil { + return PatchingErr.n("hash %q", ref) } + raw.SetType(referenced.Type()) + raw.SetSize(int64(len(patched))) + raw.Writer().Write(patched) + return nil } -func (pr *PackfileReader) readOFSDelta(raw *RAWObject, steps int) error { - var pos = pr.Pos() - - // read negative offset - offset, err := decodeOffset(pr.r, steps) +func (r *Reader) readOFSDelta(raw common.Object, steps int64) error { + start := r.r.position + offset, err := decodeOffset(r.r, steps) if err != nil { return err } - buf, err := pr.inflate(raw.Size) - if err != nil { + buf := bytes.NewBuffer(nil) + if err := r.inflate(buf); err != nil { return err } - ref, ok := pr.offsets[pos+offset] + ref, ok := r.offsets[start+offset] if !ok { - return NewError("can't find a pack entry at %d", pos+offset) + return PackEntryNotFoundErr.n("offset %d", start+offset) } - patched := PatchDelta(ref.Bytes, buf) + referenced, _ := r.s.Get(ref) + d, _ := ioutil.ReadAll(referenced.Reader()) + patched := patchDelta(d, buf.Bytes()) if patched == nil { - return NewError("error while patching %q", ref) + return PatchingErr.n("hash %q", ref) } - raw.Type = ref.Type - raw.Bytes = patched - raw.Size = uint64(len(patched)) - raw.Hash = ComputeHash(raw.Type, raw.Bytes) + raw.SetType(referenced.Type()) + raw.SetSize(int64(len(patched))) + raw.Writer().Write(patched) return nil } -func (pr *PackfileReader) readObject(raw *RAWObject) error { - buf, err := pr.inflate(raw.Size) - if err != nil { - return err - } - - raw.Bytes = buf - raw.Hash = ComputeHash(raw.Type, raw.Bytes) - - return nil +func (r *Reader) readObject(raw common.Object) error { + return r.inflate(raw.Writer()) } -func (pr *PackfileReader) inflate(size uint64) ([]byte, error) { - zr, err := zlib.NewReader(pr.r) +func (r *Reader) inflate(w io.Writer) error { + zr, err := zlib.NewReader(r.r) if err != nil { if err == zlib.ErrHeader { - return nil, zlib.ErrHeader + return zlib.ErrHeader } - return nil, NewError("error opening packfile's object zlib: %v", err) + return ZLibErr.n("%s", err) } defer zr.Close() - if size > pr.MaxObjectSize { - return nil, NewError("the object size %q exceeed the allowed limit: %q", - size, pr.MaxObjectSize) - } - - var buf bytes.Buffer - io.Copy(&buf, zr) // also: io.CopyN(&buf, zr, int64(o.size)) - - if buf.Len() != int(size) { - return nil, NewError( - "inflated size mismatch, expected %d, got %d", size, buf.Len()) - } - - return buf.Bytes(), nil + _, err = io.Copy(w, zr) + return err } type ReaderError struct { - Msg string // description of error + reason, additional string +} + +func newError(reason string) *ReaderError { + return &ReaderError{reason: reason} } -func NewError(format string, args ...interface{}) error { - return &ReaderError{Msg: fmt.Sprintf(format, args...)} +func (e *ReaderError) Error() string { + if e.additional == "" { + return e.reason + } + + return fmt.Sprintf("%s: %s", e.reason, e.additional) } -func (e *ReaderError) Error() string { return e.Msg } +func (e *ReaderError) n(format string, args ...interface{}) *ReaderError { + return &ReaderError{ + reason: e.reason, + additional: fmt.Sprintf(format, args...), + } +} |