aboutsummaryrefslogtreecommitdiffstats
path: root/formats/packfile/parser.go
diff options
context:
space:
mode:
authorAlberto Cortés <alcortesm@gmail.com>2016-07-04 17:09:22 +0200
committerMáximo Cuadros <mcuadros@gmail.com>2016-07-04 17:09:22 +0200
commit5e73f01cb2e027a8f02801635b79d3a9bc866914 (patch)
treec0e7eb355c9b8633d99bab9295cb72b6c3a9c0e1 /formats/packfile/parser.go
parent808076af869550a200a3a544c9ee2fa22a8b6a85 (diff)
downloadgo-git-5e73f01cb2e027a8f02801635b79d3a9bc866914.tar.gz
Adds support to open local repositories and to use file-based object storage (#55)v3.1.0
* remove some comments * idx writer/reader * Shut up ssh tests, they are annoying * Add file scheme test to clients * Add dummy file client * Add test fot file client * Make tests use fixture endpoint * add parser for packed-refs format * add parser for packed-refs format * WIP adding dir.Refs() tests * Add test for fixture refs * refs parser for the refs directory * Documentation * Add Capabilities to file client * tgz.Exatract now accpets a path instead of a Reader * fix bug in idxfile fanout calculation * remove dead code * packfile documentation * clean packfile parser code * add core.Object.Content() and returns errors for core.ObjectStorage.Iter() * add seekable storage * add dir repos to NewRepository * clean prints * Add dir client documentation to README * Organize the README * README * Clean tgz package * Clean temp dirs after tgz tests * Gometalinter on gitdir * Clean pattern function * metalinter tgz * metalinter gitdir * gitdir coverage and remove seekable packfile filedescriptor leak * gitdir Idxfile tests and remove file descriptor leak * gitdir Idxfile tests when no idx is found * clean storage/seekable/internal/index and some formats/idxfile API issues * clean storage/seekable * clean formats/idx * turn packfile/doc.go into packfile/doc.txt * move formats/packfile/reader to decoder * fix packfile decoder error names * improve documentation * comment packfile decoder errors * comment public API (format/packfile) * remve duplicated code in packfile decoder test * move tracking_reader into an internal package and clean it * use iota for packfile format * rename packfile parse.go to packfile object_at.go * clean packfile deltas * fix delta header size bug * improve delta documentation * clean packfile deltas * clean packfiles deltas * clean repository.go * Remove go 1.5 from Travis CI Because go 1.5 does not suport internal packages. * change local repo scheme to local:// * change "local://" to "file://" as the local scheme * fix broken indentation * shortens names of variables in short scopes * more shortening of variable names * more shortening of variable names * Rename git dir client to "file", as the scheme used for it * Fix file format ctor name, now that the package name has change * Sortcut local repo constructor to not use remotes The object storage is build directly in the repository ctor, instead of creating a remote and waiting for the user to pull it. * update README and fix some errors in it * remove file scheme client * Local respositories has now a new ctor This is, they are no longer identified by the scheme of the URL, but are created different from inception. * remove unused URL field form Repository * move all git dir logic to seekable sotrage ctor * fix documentation * Make formats/file/dir an internal package to storage/seekable * change package storage/seekable to storage/fs * clean storage/fs * overall storage/fs clean * more cleaning * some metalinter fixes * upgrade cshared to last changes * remove dead code * fix test error info * remove file scheme check from clients * fix test error message * fix test error message * fix error messages * style changes * fix comments everywhere * style changes * style changes * scaffolding and tests for local packfiles without ifx files * outsource index building from packfile to the packfile decoder * refactor packfile header reading into a new function * move code to generate index from packfile back to index package * add header parsing * fix documentation errata * add undeltified and OFS delta support for index building from the packfile * add tests for packfile with ref-deltas * support for packfiles with ref-deltas and no idx * refactor packfile format parser to reuse code * refactor packfile format parser to reuse code * refactor packfile format parser to reuse code * refactor packfile format parser to reuse code * refactor packfile format parser to reuse code * WIP refactor packfile format parser to reuse code * refactor packfile format parser to reuse code * remove prints from tests * remove prints from tests * refactor packfile.core into packfile.parser * rename packfile reader to something that shows it is a recaller * rename cannot recall error * rename packfile.Reader to packfile.ReadRecaller and document * speed up test by using StreamReader instead of SeekableReader when possible * clean packfile StreamReader * stream_reader tests * refactor packfile.StreamReader into packfile.StreamReadRecaller * refactor packfile.SeekableReader into packfile.SeekableReadRecaller and document it * generalize packfile.StreamReadRecaller test to all packfile.ReadRecaller implementations * speed up storage/fs tests * speed up tests in . by loading packfiles in memory * speed up repository tests by using and smaller fixture * restore doc.go files * rename packfile.ReadRecaller implementations to shorter names * update comments to type changes * packfile.Parser test (WIP) * packfile.Parser tests and add ForgetAll() to packfile.ReadRecaller * add test for packfile.ReadRecaller.ForgetAll() * clarify seekable being able to recallByOffset forgetted objects * use better names for internal maps * metalinter packfile package * speed up some tests * documentation fixes * change storage.fs package name to storage.proxy to avoid confusion with new filesystem support * New fs package and os transparent implementation Now NewRepositoryFromFS receives a fs and a path and tests are modified accordingly, but it is still not using for anything. * add fs to gitdir and proxy.store * reduce fs interface for easier implementation * remove garbage dirs from tgz tests * change file name gitdir/dir.go to gitdir/gitdir.go * fs.OS tests * metalinter utils/fs * add NewRepositoryFromFS documentation to README * Readability fixes to README * move tgz to an external dependency * move filesystem impl. example to example dir * rename proxy/store.go to proxy/storage.go for coherence with memory/storage.go * rename proxy package to seekable
Diffstat (limited to 'formats/packfile/parser.go')
-rw-r--r--formats/packfile/parser.go353
1 files changed, 353 insertions, 0 deletions
diff --git a/formats/packfile/parser.go b/formats/packfile/parser.go
new file mode 100644
index 0000000..d3463bd
--- /dev/null
+++ b/formats/packfile/parser.go
@@ -0,0 +1,353 @@
+package packfile
+
+import (
+ "bytes"
+ "compress/zlib"
+ "encoding/binary"
+ "fmt"
+ "io"
+
+ "gopkg.in/src-d/go-git.v3/core"
+ "gopkg.in/src-d/go-git.v3/storage/memory"
+)
+
+var (
+ // ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile
+ ErrEmptyPackfile = NewError("empty packfile")
+ // ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect.
+ ErrBadSignature = NewError("malformed pack file signature")
+ // ErrUnsupportedVersion is returned by ReadHeader when the packfile version is
+ // different than VersionSupported.
+ ErrUnsupportedVersion = NewError("unsupported packfile version")
+)
+
+const (
+ // VersionSupported is the packfile version supported by this parser.
+ VersionSupported = 2
+)
+
+// A Parser is a collection of functions to read and process data form a packfile.
+// Values from this type are not zero-value safe. See the NewParser function bellow.
+type Parser struct {
+ ReadRecaller
+}
+
+// NewParser returns a new Parser that reads from the packfile represented by r.
+func NewParser(r ReadRecaller) *Parser {
+ return &Parser{ReadRecaller: r}
+}
+
+// ReadInt32 reads 4 bytes and returns them as a Big Endian int32.
+func (p Parser) readInt32() (uint32, error) {
+ var v uint32
+ if err := binary.Read(p, binary.BigEndian, &v); err != nil {
+ return 0, err
+ }
+
+ return v, nil
+}
+
+// ReadSignature reads an returns the signature field in the packfile.
+func (p *Parser) ReadSignature() ([]byte, error) {
+ var sig = make([]byte, 4)
+ if _, err := io.ReadFull(p, sig); err != nil {
+ return []byte{}, err
+ }
+
+ return sig, nil
+}
+
+// IsValidSignature returns if sig is a valid packfile signature.
+func (p Parser) IsValidSignature(sig []byte) bool {
+ return bytes.Equal(sig, []byte{'P', 'A', 'C', 'K'})
+}
+
+// ReadVersion reads and returns the version field of a packfile.
+func (p *Parser) ReadVersion() (uint32, error) {
+ return p.readInt32()
+}
+
+// IsSupportedVersion returns whether version v is supported by the parser.
+// The current supported version is VersionSupported, defined above.
+func (p *Parser) IsSupportedVersion(v uint32) bool {
+ return v == VersionSupported
+}
+
+// ReadCount reads and returns the count of objects field of a packfile.
+func (p *Parser) ReadCount() (uint32, error) {
+ return p.readInt32()
+}
+
+// ReadHeader reads the whole packfile header (signature, version and
+// object count). It returns the object count and performs checks on the
+// validity of the signature and the version fields.
+func (p Parser) ReadHeader() (uint32, error) {
+ sig, err := p.ReadSignature()
+ if err != nil {
+ if err == io.EOF {
+ return 0, ErrEmptyPackfile
+ }
+ return 0, err
+ }
+
+ if !p.IsValidSignature(sig) {
+ return 0, ErrBadSignature
+ }
+
+ ver, err := p.ReadVersion()
+ if err != nil {
+ return 0, err
+ }
+
+ if !p.IsSupportedVersion(ver) {
+ return 0, ErrUnsupportedVersion.AddDetails("%d", ver)
+ }
+
+ count, err := p.ReadCount()
+ if err != nil {
+ return 0, err
+ }
+
+ return count, nil
+}
+
+// ReadObjectTypeAndLength reads and returns the object type and the
+// length field from an object entry in a packfile.
+func (p Parser) ReadObjectTypeAndLength() (core.ObjectType, int64, error) {
+ t, c, err := p.readType()
+ if err != nil {
+ return t, 0, err
+ }
+
+ l, err := p.readLength(c)
+
+ return t, l, err
+}
+
+func (p Parser) readType() (core.ObjectType, byte, error) {
+ var c byte
+ var err error
+ if c, err = p.ReadByte(); err != nil {
+ return core.ObjectType(0), 0, err
+ }
+ typ := parseType(c)
+
+ return typ, c, nil
+}
+
+var (
+ maskContinue = uint8(128) // 1000 0000
+ maskType = uint8(112) // 0111 0000
+ maskFirstLength = uint8(15) // 0000 1111
+ firstLengthBits = uint8(4) // the first byte has 4 bits to store the length
+ maskLength = uint8(127) // 0111 1111
+ lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length
+)
+
+func parseType(b byte) core.ObjectType {
+ return core.ObjectType((b & maskType) >> firstLengthBits)
+}
+
+// the length is codified in the last 4 bits of the first byte and in
+// the last 7 bits of subsequent bytes. Last byte has a 0 MSB.
+func (p Parser) readLength(first byte) (int64, error) {
+ length := int64(first & maskFirstLength)
+
+ c := first
+ shift := firstLengthBits
+ var err error
+ for moreBytesInLength(c) {
+ if c, err = p.ReadByte(); err != nil {
+ return 0, err
+ }
+
+ length += int64(c&maskLength) << shift
+ shift += lengthBits
+ }
+
+ return length, nil
+}
+
+func moreBytesInLength(c byte) bool {
+ return c&maskContinue > 0
+}
+
+// ReadObject reads and returns a git object from an object entry in the packfile.
+// Non-deltified and deltified objects are supported.
+func (p Parser) ReadObject() (core.Object, error) {
+ start, err := p.Offset()
+ if err != nil {
+ return nil, err
+ }
+
+ var typ core.ObjectType
+ typ, _, err = p.ReadObjectTypeAndLength()
+ if err != nil {
+ return nil, err
+ }
+
+ var cont []byte
+ switch typ {
+ case core.CommitObject, core.TreeObject, core.BlobObject, core.TagObject:
+ cont, err = p.ReadNonDeltaObjectContent()
+ case core.REFDeltaObject:
+ cont, typ, err = p.ReadREFDeltaObjectContent()
+ case core.OFSDeltaObject:
+ cont, typ, err = p.ReadOFSDeltaObjectContent(start)
+ default:
+ err = ErrInvalidObject.AddDetails("tag %q", typ)
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ return memory.NewObject(typ, int64(len(cont)), cont), nil
+}
+
+// ReadNonDeltaObjectContent reads and returns a non-deltified object
+// from it zlib stream in an object entry in the packfile.
+func (p Parser) ReadNonDeltaObjectContent() ([]byte, error) {
+ return p.readZip()
+}
+
+func (p Parser) readZip() ([]byte, error) {
+ buf := bytes.NewBuffer(nil)
+ err := p.inflate(buf)
+
+ return buf.Bytes(), err
+}
+
+func (p Parser) inflate(w io.Writer) (err error) {
+ zr, err := zlib.NewReader(p)
+ if err != nil {
+ if err != zlib.ErrHeader {
+ return fmt.Errorf("zlib reading error: %s", err)
+ }
+ }
+
+ defer func() {
+ closeErr := zr.Close()
+ if err == nil {
+ err = closeErr
+ }
+ }()
+
+ _, err = io.Copy(w, zr)
+
+ return err
+}
+
+// ReadREFDeltaObjectContent reads and returns an object specified by a
+// REF-Delta entry in the packfile, form the hash onwards.
+func (p Parser) ReadREFDeltaObjectContent() ([]byte, core.ObjectType, error) {
+ refHash, err := p.ReadHash()
+ if err != nil {
+ return nil, core.ObjectType(0), err
+ }
+
+ refObj, err := p.RecallByHash(refHash)
+ if err != nil {
+ return nil, core.ObjectType(0), err
+ }
+
+ content, err := p.ReadSolveDelta(refObj.Content())
+ if err != nil {
+ return nil, refObj.Type(), err
+ }
+
+ return content, refObj.Type(), nil
+}
+
+// ReadHash reads a hash.
+func (p Parser) ReadHash() (core.Hash, error) {
+ var h core.Hash
+ if _, err := io.ReadFull(p, h[:]); err != nil {
+ return core.ZeroHash, err
+ }
+
+ return h, nil
+}
+
+// ReadSolveDelta reads and returns the base patched with the contents
+// of a zlib compressed diff data in the delta portion of an object
+// entry in the packfile.
+func (p Parser) ReadSolveDelta(base []byte) ([]byte, error) {
+ diff, err := p.readZip()
+ if err != nil {
+ return nil, err
+ }
+
+ return PatchDelta(base, diff), nil
+}
+
+// ReadOFSDeltaObjectContent reads an returns an object specified by an
+// OFS-delta entry in the packfile from it negative offset onwards. The
+// start parameter is the offset of this particular object entry (the
+// current offset minus the already processed type and length).
+func (p Parser) ReadOFSDeltaObjectContent(start int64) (
+ []byte, core.ObjectType, error) {
+
+ jump, err := p.ReadNegativeOffset()
+ if err != nil {
+ return nil, core.ObjectType(0), err
+ }
+
+ ref, err := p.RecallByOffset(start + jump)
+ if err != nil {
+ return nil, core.ObjectType(0), err
+ }
+
+ content, err := p.ReadSolveDelta(ref.Content())
+ if err != nil {
+ return nil, ref.Type(), err
+ }
+
+ return content, ref.Type(), nil
+}
+
+// ReadNegativeOffset reads and returns an offset from a OFS DELTA
+// object entry in a packfile. OFS DELTA offsets are specified in Git
+// VLQ special format:
+//
+// Ordinary VLQ has some redundancies, example: the number 358 can be
+// encoded as the 2-octet VLQ 0x8166 or the 3-octet VLQ 0x808166 or the
+// 4-octet VLQ 0x80808166 and so forth.
+//
+// To avoid these redundancies, the VLQ format used in Git removes this
+// prepending redundancy and extends the representable range of shorter
+// VLQs by adding an offset to VLQs of 2 or more octets in such a way
+// that the lowest possible value for such an (N+1)-octet VLQ becomes
+// exactly one more than the maximum possible value for an N-octet VLQ.
+// In particular, since a 1-octet VLQ can store a maximum value of 127,
+// the minimum 2-octet VLQ (0x8000) is assigned the value 128 instead of
+// 0. Conversely, the maximum value of such a 2-octet VLQ (0xff7f) is
+// 16511 instead of just 16383. Similarly, the minimum 3-octet VLQ
+// (0x808000) has a value of 16512 instead of zero, which means
+// that the maximum 3-octet VLQ (0xffff7f) is 2113663 instead of
+// just 2097151. And so forth.
+//
+// This is how the offset is saved in C:
+//
+// dheader[pos] = ofs & 127;
+// while (ofs >>= 7)
+// dheader[--pos] = 128 | (--ofs & 127);
+//
+func (p Parser) ReadNegativeOffset() (int64, error) {
+ var c byte
+ var err error
+
+ if c, err = p.ReadByte(); err != nil {
+ return 0, err
+ }
+
+ var offset = int64(c & maskLength)
+ for moreBytesInLength(c) {
+ offset++
+ if c, err = p.ReadByte(); err != nil {
+ return 0, err
+ }
+ offset = (offset << lengthBits) + int64(c&maskLength)
+ }
+
+ return -offset, nil
+}