aboutsummaryrefslogtreecommitdiffstats
path: root/formats/packfile/parser.go
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2016-09-11 20:35:06 +0200
committerMáximo Cuadros <mcuadros@gmail.com>2016-09-11 20:35:06 +0200
commit58fe211f1b0e4863b425542d2fad15803276fd66 (patch)
treec200b6317b7b1c77c312faa411ab39e6e4468967 /formats/packfile/parser.go
parent6f1d1e00a7c615209cf6b25e314d033bda3b5d09 (diff)
downloadgo-git-58fe211f1b0e4863b425542d2fad15803276fd66.tar.gz
format: packfile fix ReadObjectAt without decode
Diffstat (limited to 'formats/packfile/parser.go')
-rw-r--r--formats/packfile/parser.go487
1 files changed, 0 insertions, 487 deletions
diff --git a/formats/packfile/parser.go b/formats/packfile/parser.go
deleted file mode 100644
index 6fa2f42..0000000
--- a/formats/packfile/parser.go
+++ /dev/null
@@ -1,487 +0,0 @@
-package packfile
-
-import (
- "bufio"
- "bytes"
- "compress/zlib"
- "encoding/binary"
- "fmt"
- "hash"
- "hash/crc32"
- "io"
- "io/ioutil"
-
- "gopkg.in/src-d/go-git.v4/core"
-)
-
-var (
- // ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile
- ErrEmptyPackfile = NewError("empty packfile")
- // ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect.
- ErrBadSignature = NewError("malformed pack file signature")
- // ErrUnsupportedVersion is returned by ReadHeader when the packfile version is
- // different than VersionSupported.
- ErrUnsupportedVersion = NewError("unsupported packfile version")
- // ErrSeekNotSupported returned if seek is not support
- ErrSeekNotSupported = NewError("not seek support")
-)
-
-const (
- // VersionSupported is the packfile version supported by this parser.
- VersionSupported uint32 = 2
-)
-
-// ObjectHeader contains the information related to the object, this information
-// is collected from the previous bytes to the content of the object.
-type ObjectHeader struct {
- Type core.ObjectType
- Offset int64
- Length int64
- Reference core.Hash
- OffsetReference int64
-}
-
-// A Parser is a collection of functions to read and process data form a packfile.
-// Values from this type are not zero-value safe. See the NewParser function bellow.
-type Scanner struct {
- r reader
- crc hash.Hash32
-
- // pendingObject is used to detect if an object has been read, or still
- // is waiting to be read
- pendingObject *ObjectHeader
- version, objects uint32
-}
-
-// NewParser returns a new Parser that reads from the packfile represented by r.
-func NewScannerFromReader(r io.Reader) *Scanner {
- s := &trackableReader{Reader: r}
- return NewScanner(s)
-}
-
-func NewScanner(r io.ReadSeeker) *Scanner {
- crc := crc32.NewIEEE()
- seeker := newByteReadSeeker(r)
- tee := &teeReader{seeker, crc}
-
- return &Scanner{r: tee, crc: crc}
-}
-
-// Header reads the whole packfile header (signature, version and object count).
-// It returns the version and the object count and performs checks on the
-// validity of the signature and the version fields.
-func (s *Scanner) Header() (version, objects uint32, err error) {
- if s.version != 0 {
- return s.version, s.objects, nil
- }
-
- sig, err := s.readSignature()
- if err != nil {
- if err == io.EOF {
- err = ErrEmptyPackfile
- }
-
- return
- }
-
- if !s.isValidSignature(sig) {
- err = ErrBadSignature
- return
- }
-
- version, err = s.readVersion()
- s.version = version
- if err != nil {
- return
- }
-
- if !s.isSupportedVersion(version) {
- err = ErrUnsupportedVersion.AddDetails("%d", version)
- return
- }
-
- objects, err = s.readCount()
- s.objects = objects
- return
-}
-
-// readSignature reads an returns the signature field in the packfile.
-func (s *Scanner) readSignature() ([]byte, error) {
- var sig = make([]byte, 4)
- if _, err := io.ReadFull(s.r, sig); err != nil {
- return []byte{}, err
- }
-
- return sig, nil
-}
-
-// isValidSignature returns if sig is a valid packfile signature.
-func (s *Scanner) isValidSignature(sig []byte) bool {
- return bytes.Equal(sig, []byte{'P', 'A', 'C', 'K'})
-}
-
-// readVersion reads and returns the version field of a packfile.
-func (s *Scanner) readVersion() (uint32, error) {
- return s.readInt32()
-}
-
-// isSupportedVersion returns whether version v is supported by the parser.
-// The current supported version is VersionSupported, defined above.
-func (s *Scanner) isSupportedVersion(v uint32) bool {
- return v == VersionSupported
-}
-
-// readCount reads and returns the count of objects field of a packfile.
-func (s *Scanner) readCount() (uint32, error) {
- return s.readInt32()
-}
-
-// ReadInt32 reads 4 bytes and returns them as a Big Endian int32.
-func (s *Scanner) readInt32() (uint32, error) {
- var v uint32
- if err := binary.Read(s.r, binary.BigEndian, &v); err != nil {
- return 0, err
- }
-
- return v, nil
-}
-
-// NextObjectHeader returns the ObjectHeader for the next object in the reader
-func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) {
- if err := s.doPending(); err != nil {
- return nil, err
- }
-
- s.crc.Reset()
-
- h := &ObjectHeader{}
- s.pendingObject = h
-
- var err error
- h.Offset, err = s.r.Seek(0, io.SeekCurrent)
- if err != nil {
- return nil, err
- }
-
- h.Type, h.Length, err = s.readObjectTypeAndLength()
- if err != nil {
- return nil, err
- }
-
- switch h.Type {
- case core.OFSDeltaObject:
- no, err := s.readNegativeOffset()
- if err != nil {
- return nil, err
- }
-
- h.OffsetReference = h.Offset + no
- case core.REFDeltaObject:
- var err error
- h.Reference, err = s.readHash()
- if err != nil {
- return nil, err
- }
- }
-
- return h, nil
-}
-
-func (s *Scanner) doPending() error {
- if s.version == 0 {
- var err error
- s.version, s.objects, err = s.Header()
- if err != nil {
- return err
- }
- }
-
- return s.discardObjectIfNeeded()
-}
-
-func (s *Scanner) discardObjectIfNeeded() error {
- if s.pendingObject == nil {
- return nil
- }
-
- h := s.pendingObject
- n, _, err := s.NextObject(ioutil.Discard)
- if err != nil {
- return err
- }
-
- if n != h.Length {
- return fmt.Errorf(
- "error discarding object, discarded %d, expected %d",
- n, h.Length,
- )
- }
-
- return nil
-}
-
-// ReadObjectTypeAndLength reads and returns the object type and the
-// length field from an object entry in a packfile.
-func (s *Scanner) readObjectTypeAndLength() (core.ObjectType, int64, error) {
- t, c, err := s.readType()
- if err != nil {
- return t, 0, err
- }
-
- l, err := s.readLength(c)
-
- return t, l, err
-}
-
-func (s *Scanner) readType() (core.ObjectType, byte, error) {
- var c byte
- var err error
- if c, err = s.readByte(); err != nil {
- return core.ObjectType(0), 0, err
- }
-
- typ := parseType(c)
-
- return typ, c, nil
-}
-
-// the length is codified in the last 4 bits of the first byte and in
-// the last 7 bits of subsequent bytes. Last byte has a 0 MSB.
-func (s *Scanner) readLength(first byte) (int64, error) {
- length := int64(first & maskFirstLength)
-
- c := first
- shift := firstLengthBits
- var err error
- for moreBytesInLength(c) {
- if c, err = s.readByte(); err != nil {
- return 0, err
- }
-
- length += int64(c&maskLength) << shift
- shift += lengthBits
- }
-
- return length, nil
-}
-
-func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err error) {
- defer s.crc.Reset()
-
- s.pendingObject = nil
- written, err = s.copyObject(w)
- crc32 = s.crc.Sum32()
- return
-}
-
-// ReadRegularObject reads and write a non-deltified object
-// from it zlib stream in an object entry in the packfile.
-func (s *Scanner) copyObject(w io.Writer) (int64, error) {
- zr, err := zlib.NewReader(s.r)
- if err != nil {
- return -1, fmt.Errorf("zlib reading error: %s", err)
- }
-
- defer func() {
- closeErr := zr.Close()
- if err == nil {
- err = closeErr
- }
- }()
-
- return io.Copy(w, zr)
-}
-
-// Seek sets a new offset from start, returns the old position before the change
-func (s *Scanner) Seek(offset int64) (previous int64, err error) {
- // if seeking we asume that you are not interested on the header
- if s.version == 0 {
- s.version = VersionSupported
- }
-
- previous, err = s.r.Seek(0, io.SeekCurrent)
- if err != nil {
- return -1, err
- }
-
- _, err = s.r.Seek(offset, io.SeekStart)
- return previous, err
-}
-
-func (s *Scanner) Checksum() (core.Hash, error) {
- err := s.discardObjectIfNeeded()
- if err != nil {
- return core.ZeroHash, err
- }
-
- return s.readHash()
-}
-
-// ReadHash reads a hash.
-func (s *Scanner) readHash() (core.Hash, error) {
- var h core.Hash
- if _, err := io.ReadFull(s.r, h[:]); err != nil {
- return core.ZeroHash, err
- }
-
- return h, nil
-}
-
-// ReadNegativeOffset reads and returns an offset from a OFS DELTA
-// object entry in a packfile. OFS DELTA offsets are specified in Git
-// VLQ special format:
-//
-// Ordinary VLQ has some redundancies, example: the number 358 can be
-// encoded as the 2-octet VLQ 0x8166 or the 3-octet VLQ 0x808166 or the
-// 4-octet VLQ 0x80808166 and so forth.
-//
-// To avoid these redundancies, the VLQ format used in Git removes this
-// prepending redundancy and extends the representable range of shorter
-// VLQs by adding an offset to VLQs of 2 or more octets in such a way
-// that the lowest possible value for such an (N+1)-octet VLQ becomes
-// exactly one more than the maximum possible value for an N-octet VLQ.
-// In particular, since a 1-octet VLQ can store a maximum value of 127,
-// the minimum 2-octet VLQ (0x8000) is assigned the value 128 instead of
-// 0. Conversely, the maximum value of such a 2-octet VLQ (0xff7f) is
-// 16511 instead of just 16383. Similarly, the minimum 3-octet VLQ
-// (0x808000) has a value of 16512 instead of zero, which means
-// that the maximum 3-octet VLQ (0xffff7f) is 2113663 instead of
-// just 2097151. And so forth.
-//
-// This is how the offset is saved in C:
-//
-// dheader[pos] = ofs & 127;
-// while (ofs >>= 7)
-// dheader[--pos] = 128 | (--ofs & 127);
-//
-func (s *Scanner) readNegativeOffset() (int64, error) {
- var c byte
- var err error
-
- if c, err = s.readByte(); err != nil {
- return 0, err
- }
-
- var offset = int64(c & maskLength)
- for moreBytesInLength(c) {
- offset++
- if c, err = s.readByte(); err != nil {
- return 0, err
- }
- offset = (offset << lengthBits) + int64(c&maskLength)
- }
-
- return -offset, nil
-}
-
-func (s *Scanner) readByte() (byte, error) {
- b, err := s.r.ReadByte()
- if err != nil {
- return 0, err
- }
-
- return b, err
-}
-
-func (s *Scanner) Close() error {
- _, err := io.Copy(ioutil.Discard, s.r)
- return err
-}
-
-func moreBytesInLength(c byte) bool {
- return c&maskContinue > 0
-}
-
-var (
- maskContinue = uint8(128) // 1000 0000
- maskType = uint8(112) // 0111 0000
- maskFirstLength = uint8(15) // 0000 1111
- firstLengthBits = uint8(4) // the first byte has 4 bits to store the length
- maskLength = uint8(127) // 0111 1111
- lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length
-)
-
-func parseType(b byte) core.ObjectType {
- return core.ObjectType((b & maskType) >> firstLengthBits)
-}
-
-type trackableReader struct {
- count int64
- io.Reader
-}
-
-// Read reads up to len(p) bytes into p.
-func (r *trackableReader) Read(p []byte) (n int, err error) {
- n, err = r.Reader.Read(p)
- r.count += int64(n)
-
- return
-}
-
-// Seek only supports io.SeekCurrent, any other operation fails
-func (r *trackableReader) Seek(offset int64, whence int) (int64, error) {
- if whence != io.SeekCurrent {
- return -1, ErrSeekNotSupported
- }
-
- return r.count, nil
-}
-
-func newByteReadSeeker(r io.ReadSeeker) *bufferedSeeker {
- return &bufferedSeeker{
- r: r,
- Reader: *bufio.NewReader(r),
- }
-}
-
-type bufferedSeeker struct {
- r io.ReadSeeker
- bufio.Reader
-}
-
-func (r *bufferedSeeker) Seek(offset int64, whence int) (int64, error) {
- if whence == io.SeekCurrent {
- current, err := r.r.Seek(offset, whence)
- if err != nil {
- return current, err
- }
-
- return current - int64(r.Buffered()), nil
- }
-
- defer r.Reader.Reset(r.r)
- return r.r.Seek(offset, whence)
-}
-
-type reader interface {
- io.Reader
- io.ByteReader
- io.Seeker
-}
-
-type teeReader struct {
- reader
- w hash.Hash32
-}
-
-func (r *teeReader) Read(p []byte) (n int, err error) {
- n, err = r.reader.Read(p)
- if n > 0 {
- if n, err := r.w.Write(p[:n]); err != nil {
- return n, err
- }
- }
- return
-}
-
-func (r *teeReader) ReadByte() (b byte, err error) {
- b, err = r.reader.ReadByte()
- if err == nil {
- _, err := r.w.Write([]byte{b})
- if err != nil {
- return 0, err
- }
- }
-
- return
-}