diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2016-09-06 19:59:44 +0200 |
---|---|---|
committer | Máximo Cuadros <mcuadros@gmail.com> | 2016-09-06 19:59:44 +0200 |
commit | 98a22e72a808aa0d5dd62339817404fd9e1c4db6 (patch) | |
tree | d5544dce0176b55172f2fd25564618e8c0f18558 /formats/packfile/parser.go | |
parent | ae2b10d50da5455b382ab9d543be4fe859afe9e0 (diff) | |
download | go-git-98a22e72a808aa0d5dd62339817404fd9e1c4db6.tar.gz |
format: packfile new interface (wip)
Diffstat (limited to 'formats/packfile/parser.go')
-rw-r--r-- | formats/packfile/parser.go | 339 |
1 files changed, 174 insertions, 165 deletions
diff --git a/formats/packfile/parser.go b/formats/packfile/parser.go index 2930dcb..c1653c8 100644 --- a/formats/packfile/parser.go +++ b/formats/packfile/parser.go @@ -6,6 +6,7 @@ import ( "encoding/binary" "fmt" "io" + "io/ioutil" "gopkg.in/src-d/go-git.v4/core" ) @@ -22,98 +23,167 @@ var ( const ( // VersionSupported is the packfile version supported by this parser. - VersionSupported = 2 + VersionSupported uint32 = 2 ) +type ObjectHeader struct { + Type core.ObjectType + Offset int64 + Length int64 + Reference core.Hash + OffsetReference int64 +} + // A Parser is a collection of functions to read and process data form a packfile. // Values from this type are not zero-value safe. See the NewParser function bellow. type Parser struct { - ReadRecaller - ObjectFactory func() core.Object + r *trackableReader + + // pendingObject is used to detect if an object has been read, or still + // is waiting to be read + pendingObject *ObjectHeader } // NewParser returns a new Parser that reads from the packfile represented by r. -func NewParser(r ReadRecaller) *Parser { - return &Parser{ReadRecaller: r} +func NewParser(r io.Reader) *Parser { + return &Parser{r: &trackableReader{Reader: r}} } -// ReadInt32 reads 4 bytes and returns them as a Big Endian int32. -func (p Parser) readInt32() (uint32, error) { - var v uint32 - if err := binary.Read(p, binary.BigEndian, &v); err != nil { - return 0, err +// Header reads the whole packfile header (signature, version and object count). +// It returns the version and the object count and performs checks on the +// validity of the signature and the version fields. +func (p *Parser) Header() (version, objects uint32, err error) { + sig, err := p.readSignature() + if err != nil { + if err == io.EOF { + err = ErrEmptyPackfile + } + + return } - return v, nil + if !p.isValidSignature(sig) { + err = ErrBadSignature + return + } + + version, err = p.readVersion() + if err != nil { + return + } + + if !p.isSupportedVersion(version) { + err = ErrUnsupportedVersion.AddDetails("%d", version) + return + } + + objects, err = p.readCount() + return } -// ReadSignature reads an returns the signature field in the packfile. -func (p *Parser) ReadSignature() ([]byte, error) { +// readSignature reads an returns the signature field in the packfile. +func (p *Parser) readSignature() ([]byte, error) { var sig = make([]byte, 4) - if _, err := io.ReadFull(p, sig); err != nil { + if _, err := io.ReadFull(p.r, sig); err != nil { return []byte{}, err } return sig, nil } -// IsValidSignature returns if sig is a valid packfile signature. -func (p Parser) IsValidSignature(sig []byte) bool { +// isValidSignature returns if sig is a valid packfile signature. +func (p *Parser) isValidSignature(sig []byte) bool { return bytes.Equal(sig, []byte{'P', 'A', 'C', 'K'}) } -// ReadVersion reads and returns the version field of a packfile. -func (p *Parser) ReadVersion() (uint32, error) { +// readVersion reads and returns the version field of a packfile. +func (p *Parser) readVersion() (uint32, error) { return p.readInt32() } -// IsSupportedVersion returns whether version v is supported by the parser. +// isSupportedVersion returns whether version v is supported by the parser. // The current supported version is VersionSupported, defined above. -func (p *Parser) IsSupportedVersion(v uint32) bool { +func (p *Parser) isSupportedVersion(v uint32) bool { return v == VersionSupported } -// ReadCount reads and returns the count of objects field of a packfile. -func (p *Parser) ReadCount() (uint32, error) { +// readCount reads and returns the count of objects field of a packfile. +func (p *Parser) readCount() (uint32, error) { return p.readInt32() } -// ReadHeader reads the whole packfile header (signature, version and -// object count). It returns the object count and performs checks on the -// validity of the signature and the version fields. -func (p Parser) ReadHeader() (uint32, error) { - sig, err := p.ReadSignature() - if err != nil { - if err == io.EOF { - return 0, ErrEmptyPackfile - } +// ReadInt32 reads 4 bytes and returns them as a Big Endian int32. +func (p *Parser) readInt32() (uint32, error) { + var v uint32 + if err := binary.Read(p.r, binary.BigEndian, &v); err != nil { return 0, err } - if !p.IsValidSignature(sig) { - return 0, ErrBadSignature + return v, nil +} + +func (p *Parser) NextObjectHeader() (*ObjectHeader, error) { + if err := p.discardObjectIfNeeded(); err != nil { + return nil, err } - ver, err := p.ReadVersion() + h := &ObjectHeader{} + p.pendingObject = h + + var err error + h.Offset, err = p.r.Offset() if err != nil { - return 0, err + return nil, err } - if !p.IsSupportedVersion(ver) { - return 0, ErrUnsupportedVersion.AddDetails("%d", ver) + h.Type, h.Length, err = p.readObjectTypeAndLength() + if err != nil { + return nil, err } - count, err := p.ReadCount() + switch h.Type { + case core.OFSDeltaObject: + no, err := p.readNegativeOffset() + if err != nil { + return nil, err + } + + h.OffsetReference = h.Offset + no + case core.REFDeltaObject: + var err error + h.Reference, err = p.readHash() + if err != nil { + return nil, err + } + } + + return h, nil +} + +func (s *Parser) discardObjectIfNeeded() error { + if s.pendingObject == nil { + return nil + } + + h := s.pendingObject + n, err := s.NextObject(ioutil.Discard) if err != nil { - return 0, err + return err } - return count, nil + if n != h.Length { + return fmt.Errorf( + "error discarding object, discarded %d, expected %d", + n, h.Length, + ) + } + + return nil } // ReadObjectTypeAndLength reads and returns the object type and the // length field from an object entry in a packfile. -func (p Parser) ReadObjectTypeAndLength() (core.ObjectType, int64, error) { +func (p Parser) readObjectTypeAndLength() (core.ObjectType, int64, error) { t, c, err := p.readType() if err != nil { return t, 0, err @@ -127,37 +197,25 @@ func (p Parser) ReadObjectTypeAndLength() (core.ObjectType, int64, error) { func (p Parser) readType() (core.ObjectType, byte, error) { var c byte var err error - if c, err = p.ReadByte(); err != nil { + if c, err = p.r.ReadByte(); err != nil { return core.ObjectType(0), 0, err } + typ := parseType(c) return typ, c, nil } -var ( - maskContinue = uint8(128) // 1000 0000 - maskType = uint8(112) // 0111 0000 - maskFirstLength = uint8(15) // 0000 1111 - firstLengthBits = uint8(4) // the first byte has 4 bits to store the length - maskLength = uint8(127) // 0111 1111 - lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length -) - -func parseType(b byte) core.ObjectType { - return core.ObjectType((b & maskType) >> firstLengthBits) -} - // the length is codified in the last 4 bits of the first byte and in // the last 7 bits of subsequent bytes. Last byte has a 0 MSB. -func (p Parser) readLength(first byte) (int64, error) { +func (p *Parser) readLength(first byte) (int64, error) { length := int64(first & maskFirstLength) c := first shift := firstLengthBits var err error for moreBytesInLength(c) { - if c, err = p.ReadByte(); err != nil { + if c, err = p.r.ReadByte(); err != nil { return 0, err } @@ -168,56 +226,18 @@ func (p Parser) readLength(first byte) (int64, error) { return length, nil } -func moreBytesInLength(c byte) bool { - return c&maskContinue > 0 -} - -// FillObject fills the given object from an object entry in the packfile. -// Non-deltified and deltified objects are supported. -func (p Parser) FillObject(obj core.Object) error { - start, err := p.Offset() - if err != nil { - return err - } - - t, l, err := p.ReadObjectTypeAndLength() - if err != nil { - return err - } - - obj.SetSize(l) - - switch t { - case core.CommitObject, core.TreeObject, core.BlobObject, core.TagObject: - obj.SetType(t) - err = p.FillFromNonDeltaContent(obj) - case core.REFDeltaObject: - err = p.FillREFDeltaObjectContent(obj) - case core.OFSDeltaObject: - err = p.FillOFSDeltaObjectContent(obj, start) - default: - err = ErrInvalidObject.AddDetails("tag %q", t) - } - - return err +func (p *Parser) NextObject(w io.Writer) (written int64, err error) { + p.pendingObject = nil + return p.copyObject(w) } -// FillFromNonDeltaContent reads and fill a non-deltified object +// ReadRegularObject reads and write a non-deltified object // from it zlib stream in an object entry in the packfile. -func (p Parser) FillFromNonDeltaContent(obj core.Object) error { - w, err := obj.Writer() - if err != nil { - return err - } - - return p.inflate(w) -} - -func (p Parser) inflate(w io.Writer) (err error) { - zr, err := zlib.NewReader(p) +func (p *Parser) copyObject(w io.Writer) (int64, error) { + zr, err := zlib.NewReader(p.r) if err != nil { if err != zlib.ErrHeader { - return fmt.Errorf("zlib reading error: %s", err) + return -1, fmt.Errorf("zlib reading error: %s", err) } } @@ -228,78 +248,23 @@ func (p Parser) inflate(w io.Writer) (err error) { } }() - _, err = io.Copy(w, zr) - - return err + return io.Copy(w, zr) } -// FillREFDeltaObjectContent reads and returns an object specified by a -// REF-Delta entry in the packfile, form the hash onwards. -func (p Parser) FillREFDeltaObjectContent(obj core.Object) error { - refHash, err := p.ReadHash() - if err != nil { - return err - } - - base, err := p.RecallByHash(refHash) - if err != nil { - return err - } - - obj.SetType(base.Type()) - if err := p.ReadAndApplyDelta(obj, base); err != nil { - return err - } - - return nil +func (p *Parser) Checksum() (core.Hash, error) { + return p.readHash() } // ReadHash reads a hash. -func (p Parser) ReadHash() (core.Hash, error) { +func (p *Parser) readHash() (core.Hash, error) { var h core.Hash - if _, err := io.ReadFull(p, h[:]); err != nil { + if _, err := io.ReadFull(p.r, h[:]); err != nil { return core.ZeroHash, err } return h, nil } -// ReadAndApplyDelta reads and apply the base patched with the contents -// of a zlib compressed diff data in the delta portion of an object -// entry in the packfile. -func (p Parser) ReadAndApplyDelta(target, base core.Object) error { - buf := bytes.NewBuffer(nil) - if err := p.inflate(buf); err != nil { - return err - } - - return ApplyDelta(target, base, buf.Bytes()) -} - -// FillOFSDeltaObjectContent reads an fill an object specified by an -// OFS-delta entry in the packfile from it negative offset onwards. The -// start parameter is the offset of this particular object entry (the -// current offset minus the already processed type and length). -func (p Parser) FillOFSDeltaObjectContent(obj core.Object, start int64) error { - - jump, err := p.ReadNegativeOffset() - if err != nil { - return err - } - - base, err := p.RecallByOffset(start + jump) - if err != nil { - return err - } - - obj.SetType(base.Type()) - if err := p.ReadAndApplyDelta(obj, base); err != nil { - return err - } - - return nil -} - // ReadNegativeOffset reads and returns an offset from a OFS DELTA // object entry in a packfile. OFS DELTA offsets are specified in Git // VLQ special format: @@ -327,18 +292,18 @@ func (p Parser) FillOFSDeltaObjectContent(obj core.Object, start int64) error { // while (ofs >>= 7) // dheader[--pos] = 128 | (--ofs & 127); // -func (p Parser) ReadNegativeOffset() (int64, error) { +func (p *Parser) readNegativeOffset() (int64, error) { var c byte var err error - if c, err = p.ReadByte(); err != nil { + if c, err = p.r.ReadByte(); err != nil { return 0, err } var offset = int64(c & maskLength) for moreBytesInLength(c) { offset++ - if c, err = p.ReadByte(); err != nil { + if c, err = p.r.ReadByte(); err != nil { return 0, err } offset = (offset << lengthBits) + int64(c&maskLength) @@ -346,3 +311,47 @@ func (p Parser) ReadNegativeOffset() (int64, error) { return -offset, nil } + +func moreBytesInLength(c byte) bool { + return c&maskContinue > 0 +} + +var ( + maskContinue = uint8(128) // 1000 0000 + maskType = uint8(112) // 0111 0000 + maskFirstLength = uint8(15) // 0000 1111 + firstLengthBits = uint8(4) // the first byte has 4 bits to store the length + maskLength = uint8(127) // 0111 1111 + lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length +) + +func parseType(b byte) core.ObjectType { + return core.ObjectType((b & maskType) >> firstLengthBits) +} + +type trackableReader struct { + io.Reader + count int64 +} + +// Read reads up to len(p) bytes into p. +func (r *trackableReader) Read(p []byte) (n int, err error) { + n, err = r.Reader.Read(p) + r.count += int64(n) + + return +} + +// ReadByte reads a byte. +func (r *trackableReader) ReadByte() (byte, error) { + var p [1]byte + _, err := r.Reader.Read(p[:]) + r.count++ + + return p[0], err +} + +// Offset returns the number of bytes read. +func (r *trackableReader) Offset() (int64, error) { + return r.count, nil +} |