aboutsummaryrefslogtreecommitdiffstats
path: root/formats/packfile
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2016-11-08 23:46:38 +0100
committerGitHub <noreply@github.com>2016-11-08 23:46:38 +0100
commitac095bb12c4d29722b60ba9f20590fa7cfa6bc7d (patch)
tree223f36f336ba3414b1e45cac8af6c4744a5d7ef6 /formats/packfile
parente523701393598f4fa241dd407af9ff8925507a1a (diff)
downloadgo-git-ac095bb12c4d29722b60ba9f20590fa7cfa6bc7d.tar.gz
new plumbing package (#118)
* plumbing: now core was renamed to core, and formats and clients moved inside
Diffstat (limited to 'formats/packfile')
-rw-r--r--formats/packfile/decoder.go306
-rw-r--r--formats/packfile/decoder_test.go182
-rw-r--r--formats/packfile/delta.go181
-rw-r--r--formats/packfile/doc.go168
-rw-r--r--formats/packfile/error.go30
-rw-r--r--formats/packfile/scanner.go418
-rw-r--r--formats/packfile/scanner_test.go189
7 files changed, 0 insertions, 1474 deletions
diff --git a/formats/packfile/decoder.go b/formats/packfile/decoder.go
deleted file mode 100644
index e96980a..0000000
--- a/formats/packfile/decoder.go
+++ /dev/null
@@ -1,306 +0,0 @@
-package packfile
-
-import (
- "bytes"
-
- "gopkg.in/src-d/go-git.v4/core"
-)
-
-// Format specifies if the packfile uses ref-deltas or ofs-deltas.
-type Format int
-
-// Possible values of the Format type.
-const (
- UnknownFormat Format = iota
- OFSDeltaFormat
- REFDeltaFormat
-)
-
-var (
- // ErrMaxObjectsLimitReached is returned by Decode when the number
- // of objects in the packfile is higher than
- // Decoder.MaxObjectsLimit.
- ErrMaxObjectsLimitReached = NewError("max. objects limit reached")
- // ErrInvalidObject is returned by Decode when an invalid object is
- // found in the packfile.
- ErrInvalidObject = NewError("invalid git object")
- // ErrPackEntryNotFound is returned by Decode when a reference in
- // the packfile references and unknown object.
- ErrPackEntryNotFound = NewError("can't find a pack entry")
- // ErrZLib is returned by Decode when there was an error unzipping
- // the packfile contents.
- ErrZLib = NewError("zlib reading error")
- // ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object
- // to recall cannot be returned.
- ErrCannotRecall = NewError("cannot recall object")
- // ErrNonSeekable is returned if a NewDecoder is used with a non-seekable
- // reader and without a core.ObjectStorage or ReadObjectAt method is called
- // without a seekable scanner
- ErrNonSeekable = NewError("non-seekable scanner")
- // ErrRollback error making Rollback over a transaction after an error
- ErrRollback = NewError("rollback error, during set error")
-)
-
-// Decoder reads and decodes packfiles from an input stream.
-type Decoder struct {
- s *Scanner
- o core.ObjectStorer
- tx core.Transaction
-
- offsetToHash map[int64]core.Hash
- hashToOffset map[core.Hash]int64
- crcs map[core.Hash]uint32
-}
-
-// NewDecoder returns a new Decoder that reads from r.
-func NewDecoder(s *Scanner, o core.ObjectStorer) (*Decoder, error) {
- if !s.IsSeekable && o == nil {
- return nil, ErrNonSeekable
- }
-
- return &Decoder{
- s: s,
- o: o,
-
- offsetToHash: make(map[int64]core.Hash, 0),
- hashToOffset: make(map[core.Hash]int64, 0),
- crcs: make(map[core.Hash]uint32, 0),
- }, nil
-}
-
-// Decode reads a packfile and stores it in the value pointed to by s.
-func (d *Decoder) Decode() (checksum core.Hash, err error) {
- if err := d.doDecode(); err != nil {
- return core.ZeroHash, err
- }
-
- return d.s.Checksum()
-}
-
-func (d *Decoder) doDecode() error {
- _, count, err := d.s.Header()
- if err != nil {
- return err
- }
-
- _, isTxStorer := d.o.(core.Transactioner)
- switch {
- case d.o == nil:
- return d.readObjects(int(count))
- case isTxStorer:
- return d.readObjectsWithObjectStorerTx(int(count))
- default:
- return d.readObjectsWithObjectStorer(int(count))
- }
-}
-
-func (d *Decoder) readObjects(count int) error {
- for i := 0; i < count; i++ {
- if _, err := d.ReadObject(); err != nil {
- return err
- }
- }
-
- return nil
-}
-
-func (d *Decoder) readObjectsWithObjectStorer(count int) error {
- for i := 0; i < count; i++ {
- obj, err := d.ReadObject()
- if err != nil {
- return err
- }
-
- if _, err := d.o.SetObject(obj); err != nil {
- return err
- }
- }
-
- return nil
-}
-
-func (d *Decoder) readObjectsWithObjectStorerTx(count int) error {
- tx := d.o.(core.Transactioner).Begin()
-
- for i := 0; i < count; i++ {
- obj, err := d.ReadObject()
- if err != nil {
- return err
- }
-
- if _, err := tx.SetObject(obj); err != nil {
- if rerr := d.tx.Rollback(); rerr != nil {
- return ErrRollback.AddDetails(
- "error: %s, during tx.Set error: %s", rerr, err,
- )
- }
-
- return err
- }
-
- }
-
- return tx.Commit()
-}
-
-// ReadObject reads a object from the stream and return it
-func (d *Decoder) ReadObject() (core.Object, error) {
- h, err := d.s.NextObjectHeader()
- if err != nil {
- return nil, err
- }
-
- obj := d.newObject()
- obj.SetSize(h.Length)
- obj.SetType(h.Type)
- var crc uint32
- switch h.Type {
- case core.CommitObject, core.TreeObject, core.BlobObject, core.TagObject:
- crc, err = d.fillRegularObjectContent(obj)
- case core.REFDeltaObject:
- crc, err = d.fillREFDeltaObjectContent(obj, h.Reference)
- case core.OFSDeltaObject:
- crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference)
- default:
- err = ErrInvalidObject.AddDetails("type %q", h.Type)
- }
-
- if err != nil {
- return obj, err
- }
-
- hash := obj.Hash()
- d.setOffset(hash, h.Offset)
- d.setCRC(hash, crc)
-
- return obj, nil
-}
-
-func (d *Decoder) newObject() core.Object {
- if d.o == nil {
- return &core.MemoryObject{}
- }
-
- return d.o.NewObject()
-}
-
-// ReadObjectAt reads an object at the given location
-func (d *Decoder) ReadObjectAt(offset int64) (core.Object, error) {
- if !d.s.IsSeekable {
- return nil, ErrNonSeekable
- }
-
- beforeJump, err := d.s.Seek(offset)
- if err != nil {
- return nil, err
- }
-
- defer func() {
- _, seekErr := d.s.Seek(beforeJump)
- if err == nil {
- err = seekErr
- }
- }()
-
- return d.ReadObject()
-}
-
-func (d *Decoder) fillRegularObjectContent(obj core.Object) (uint32, error) {
- w, err := obj.Writer()
- if err != nil {
- return 0, err
- }
-
- _, crc, err := d.s.NextObject(w)
- return crc, err
-}
-
-func (d *Decoder) fillREFDeltaObjectContent(obj core.Object, ref core.Hash) (uint32, error) {
- buf := bytes.NewBuffer(nil)
- _, crc, err := d.s.NextObject(buf)
- if err != nil {
- return 0, err
- }
-
- base, err := d.recallByHash(ref)
- if err != nil {
- return 0, err
- }
-
- obj.SetType(base.Type())
- return crc, ApplyDelta(obj, base, buf.Bytes())
-}
-
-func (d *Decoder) fillOFSDeltaObjectContent(obj core.Object, offset int64) (uint32, error) {
- buf := bytes.NewBuffer(nil)
- _, crc, err := d.s.NextObject(buf)
- if err != nil {
- return 0, err
- }
-
- base, err := d.recallByOffset(offset)
- if err != nil {
- return 0, err
- }
-
- obj.SetType(base.Type())
- return crc, ApplyDelta(obj, base, buf.Bytes())
-}
-
-func (d *Decoder) setOffset(h core.Hash, offset int64) {
- d.offsetToHash[offset] = h
- d.hashToOffset[h] = offset
-}
-
-func (d *Decoder) setCRC(h core.Hash, crc uint32) {
- d.crcs[h] = crc
-}
-
-func (d *Decoder) recallByOffset(o int64) (core.Object, error) {
- if d.s.IsSeekable {
- return d.ReadObjectAt(o)
- }
-
- if h, ok := d.offsetToHash[o]; ok {
- return d.tx.Object(core.AnyObject, h)
- }
-
- return nil, core.ErrObjectNotFound
-}
-
-func (d *Decoder) recallByHash(h core.Hash) (core.Object, error) {
- if d.s.IsSeekable {
- if o, ok := d.hashToOffset[h]; ok {
- return d.ReadObjectAt(o)
- }
- }
-
- obj, err := d.tx.Object(core.AnyObject, h)
- if err != core.ErrObjectNotFound {
- return obj, err
- }
-
- return nil, core.ErrObjectNotFound
-}
-
-// SetOffsets sets the offsets, required when using the method ReadObjectAt,
-// without decoding the full packfile
-func (d *Decoder) SetOffsets(offsets map[core.Hash]int64) {
- d.hashToOffset = offsets
-}
-
-// Offsets returns the objects read offset
-func (d *Decoder) Offsets() map[core.Hash]int64 {
- return d.hashToOffset
-}
-
-// CRCs returns the CRC-32 for each objected read
-func (d *Decoder) CRCs() map[core.Hash]uint32 {
- return d.crcs
-}
-
-// Close close the Scanner, usually this mean that the whole reader is read and
-// discarded
-func (d *Decoder) Close() error {
- return d.s.Close()
-}
diff --git a/formats/packfile/decoder_test.go b/formats/packfile/decoder_test.go
deleted file mode 100644
index aa178d7..0000000
--- a/formats/packfile/decoder_test.go
+++ /dev/null
@@ -1,182 +0,0 @@
-package packfile
-
-import (
- "io"
- "testing"
-
- "gopkg.in/src-d/go-git.v4/core"
- "gopkg.in/src-d/go-git.v4/fixtures"
- "gopkg.in/src-d/go-git.v4/formats/idxfile"
- "gopkg.in/src-d/go-git.v4/storage/memory"
-
- . "gopkg.in/check.v1"
-)
-
-func Test(t *testing.T) { TestingT(t) }
-
-type ReaderSuite struct {
- fixtures.Suite
-}
-
-var _ = Suite(&ReaderSuite{})
-
-func (s *ReaderSuite) TestNewDecodeNonSeekable(c *C) {
- scanner := NewScanner(nil)
- d, err := NewDecoder(scanner, nil)
-
- c.Assert(d, IsNil)
- c.Assert(err, NotNil)
-}
-
-func (s *ReaderSuite) TestDecode(c *C) {
- fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
- scanner := NewScanner(f.Packfile())
- storage := memory.NewStorage()
-
- d, err := NewDecoder(scanner, storage)
- c.Assert(err, IsNil)
- defer d.Close()
-
- ch, err := d.Decode()
- c.Assert(err, IsNil)
- c.Assert(ch, Equals, f.PackfileHash)
-
- assertObjects(c, storage, expectedHashes)
- })
-}
-
-func (s *ReaderSuite) TestDecodeInMemory(c *C) {
- fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
- scanner := NewScanner(f.Packfile())
- d, err := NewDecoder(scanner, nil)
- c.Assert(err, IsNil)
-
- ch, err := d.Decode()
- c.Assert(err, IsNil)
- c.Assert(ch, Equals, f.PackfileHash)
- })
-}
-
-var expectedHashes = []string{
- "918c48b83bd081e863dbe1b80f8998f058cd8294",
- "af2d6a6954d532f8ffb47615169c8fdf9d383a1a",
- "1669dce138d9b841a518c64b10914d88f5e488ea",
- "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69",
- "b8e471f58bcbca63b07bda20e428190409c2db47",
- "35e85108805c84807bc66a02d91535e1e24b38b9",
- "b029517f6300c2da0f4b651b8642506cd6aaf45d",
- "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88",
- "d3ff53e0564a9f87d8e84b6e28e5060e517008aa",
- "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f",
- "d5c0f4ab811897cadf03aec358ae60d21f91c50d",
- "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9",
- "cf4aa3b38974fb7d81f367c0830f7d78d65ab86b",
- "9dea2395f5403188298c1dabe8bdafe562c491e3",
- "586af567d0bb5e771e49bdd9434f5e0fb76d25fa",
- "9a48f23120e880dfbe41f7c9b7b708e9ee62a492",
- "5a877e6a906a2743ad6e45d99c1793642aaf8eda",
- "c8f1d8c61f9da76f4cb49fd86322b6e685dba956",
- "a8d315b2b1c615d43042c3a62402b8a54288cf5c",
- "a39771a7651f97faf5c72e08224d857fc35133db",
- "880cd14280f4b9b6ed3986d6671f907d7cc2a198",
- "fb72698cab7617ac416264415f13224dfd7a165e",
- "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd",
- "eba74343e2f15d62adedfd8c883ee0262b5c8021",
- "c2d30fa8ef288618f65f6eed6e168e0d514886f4",
- "8dcef98b1d52143e1e2dbc458ffe38f925786bf2",
- "aa9b383c260e1d05fbbf6b30a02914555e20c725",
- "6ecf0ef2c2dffb796033e5a02219af86ec6584e5",
- "dbd3641b371024f44d0e469a9c8f5457b0660de1",
- "e8d3ffab552895c19b9fcf7aa264d277cde33881",
- "7e59600739c96546163833214c36459e324bad0a",
-}
-
-func (s *ReaderSuite) TestDecodeCRCs(c *C) {
- f := fixtures.Basic().ByTag("ofs-delta").One()
-
- scanner := NewScanner(f.Packfile())
- storage := memory.NewStorage()
-
- d, err := NewDecoder(scanner, storage)
- c.Assert(err, IsNil)
- _, err = d.Decode()
- c.Assert(err, IsNil)
-
- var sum uint64
- for _, crc := range d.CRCs() {
- sum += uint64(crc)
- }
-
- c.Assert(int(sum), Equals, 78022211966)
-}
-
-func (s *ReaderSuite) TestReadObjectAt(c *C) {
- f := fixtures.Basic().One()
- scanner := NewScanner(f.Packfile())
- d, err := NewDecoder(scanner, nil)
- c.Assert(err, IsNil)
-
- // when the packfile is ref-delta based, the offsets are required
- if f.Is("ref-delta") {
- offsets := getOffsetsFromIdx(f.Idx())
- d.SetOffsets(offsets)
- }
-
- // the objects at reference 186, is a delta, so should be recall,
- // without being read before.
- obj, err := d.ReadObjectAt(186)
- c.Assert(err, IsNil)
- c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5")
-}
-
-func (s *ReaderSuite) TestOffsets(c *C) {
- f := fixtures.Basic().One()
- scanner := NewScanner(f.Packfile())
- d, err := NewDecoder(scanner, nil)
- c.Assert(err, IsNil)
-
- c.Assert(d.Offsets(), HasLen, 0)
-
- _, err = d.Decode()
- c.Assert(err, IsNil)
-
- c.Assert(d.Offsets(), HasLen, 31)
-}
-
-func (s *ReaderSuite) TestSetOffsets(c *C) {
- f := fixtures.Basic().One()
- scanner := NewScanner(f.Packfile())
- d, err := NewDecoder(scanner, nil)
- c.Assert(err, IsNil)
-
- h := core.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5")
- d.SetOffsets(map[core.Hash]int64{h: 42})
-
- o := d.Offsets()
- c.Assert(o, HasLen, 1)
- c.Assert(o[h], Equals, int64(42))
-}
-
-func assertObjects(c *C, s *memory.Storage, expects []string) {
- c.Assert(len(expects), Equals, len(s.Objects))
- for _, exp := range expects {
- obt, err := s.Object(core.AnyObject, core.NewHash(exp))
- c.Assert(err, IsNil)
- c.Assert(obt.Hash().String(), Equals, exp)
- }
-}
-
-func getOffsetsFromIdx(r io.Reader) map[core.Hash]int64 {
- idx := &idxfile.Idxfile{}
- err := idxfile.NewDecoder(r).Decode(idx)
- if err != nil {
- panic(err)
- }
-
- offsets := make(map[core.Hash]int64)
- for _, e := range idx.Entries {
- offsets[e.Hash] = int64(e.Offset)
- }
-
- return offsets
-}
diff --git a/formats/packfile/delta.go b/formats/packfile/delta.go
deleted file mode 100644
index d08f969..0000000
--- a/formats/packfile/delta.go
+++ /dev/null
@@ -1,181 +0,0 @@
-package packfile
-
-import (
- "io/ioutil"
-
- "gopkg.in/src-d/go-git.v4/core"
-)
-
-// See https://github.com/git/git/blob/49fa3dc76179e04b0833542fa52d0f287a4955ac/delta.h
-// https://github.com/git/git/blob/c2c5f6b1e479f2c38e0e01345350620944e3527f/patch-delta.c,
-// and https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js
-// for details about the delta format.
-
-const deltaSizeMin = 4
-
-// ApplyDelta writes to taget the result of applying the modification deltas in delta to base.
-func ApplyDelta(target, base core.Object, delta []byte) error {
- r, err := base.Reader()
- if err != nil {
- return err
- }
-
- w, err := target.Writer()
- if err != nil {
- return err
- }
-
- src, err := ioutil.ReadAll(r)
- if err != nil {
- return err
- }
-
- dst := PatchDelta(src, delta)
- target.SetSize(int64(len(dst)))
-
- if _, err := w.Write(dst); err != nil {
- return err
- }
-
- return nil
-}
-
-// PatchDelta returns the result of applying the modification deltas in delta to src.
-func PatchDelta(src, delta []byte) []byte {
- if len(delta) < deltaSizeMin {
- return nil
- }
-
- srcSz, delta := decodeLEB128(delta)
- if srcSz != uint(len(src)) {
- return nil
- }
-
- targetSz, delta := decodeLEB128(delta)
- remainingTargetSz := targetSz
-
- var dest []byte
- var cmd byte
- for {
- cmd = delta[0]
- delta = delta[1:]
- if isCopyFromSrc(cmd) {
- var offset, sz uint
- offset, delta = decodeOffset(cmd, delta)
- sz, delta = decodeSize(cmd, delta)
- if invalidSize(sz, targetSz) ||
- invalidOffsetSize(offset, sz, srcSz) {
- break
- }
- dest = append(dest, src[offset:offset+sz]...)
- remainingTargetSz -= sz
- } else if isCopyFromDelta(cmd) {
- sz := uint(cmd) // cmd is the size itself
- if invalidSize(sz, targetSz) {
- break
- }
- dest = append(dest, delta[0:sz]...)
- remainingTargetSz -= sz
- delta = delta[sz:]
- } else {
- return nil
- }
-
- if remainingTargetSz <= 0 {
- break
- }
- }
-
- return dest
-}
-
-// Decodes a number encoded as an unsigned LEB128 at the start of some
-// binary data and returns the decoded number and the rest of the
-// stream.
-//
-// This must be called twice on the delta data buffer, first to get the
-// expected source buffer size, and again to get the target buffer size.
-func decodeLEB128(input []byte) (uint, []byte) {
- var num, sz uint
- var b byte
- for {
- b = input[sz]
- num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks
- sz++
-
- if uint(b)&continuation == 0 || sz == uint(len(input)) {
- break
- }
- }
-
- return num, input[sz:]
-}
-
-const (
- payload = 0x7f // 0111 1111
- continuation = 0x80 // 1000 0000
-)
-
-func isCopyFromSrc(cmd byte) bool {
- return (cmd & 0x80) != 0
-}
-
-func isCopyFromDelta(cmd byte) bool {
- return (cmd&0x80) == 0 && cmd != 0
-}
-
-func decodeOffset(cmd byte, delta []byte) (uint, []byte) {
- var offset uint
- if (cmd & 0x01) != 0 {
- offset = uint(delta[0])
- delta = delta[1:]
- }
- if (cmd & 0x02) != 0 {
- offset |= uint(delta[0]) << 8
- delta = delta[1:]
- }
- if (cmd & 0x04) != 0 {
- offset |= uint(delta[0]) << 16
- delta = delta[1:]
- }
- if (cmd & 0x08) != 0 {
- offset |= uint(delta[0]) << 24
- delta = delta[1:]
- }
-
- return offset, delta
-}
-
-func decodeSize(cmd byte, delta []byte) (uint, []byte) {
- var sz uint
- if (cmd & 0x10) != 0 {
- sz = uint(delta[0])
- delta = delta[1:]
- }
- if (cmd & 0x20) != 0 {
- sz |= uint(delta[0]) << 8
- delta = delta[1:]
- }
- if (cmd & 0x40) != 0 {
- sz |= uint(delta[0]) << 16
- delta = delta[1:]
- }
- if sz == 0 {
- sz = 0x10000
- }
-
- return sz, delta
-}
-
-func invalidSize(sz, targetSz uint) bool {
- return sz > targetSz
-}
-
-func invalidOffsetSize(offset, sz, srcSz uint) bool {
- return sumOverflows(offset, sz) ||
- offset+sz > srcSz
-}
-
-func sumOverflows(a, b uint) bool {
- return a+b < a
-}
diff --git a/formats/packfile/doc.go b/formats/packfile/doc.go
deleted file mode 100644
index 0b173ca..0000000
--- a/formats/packfile/doc.go
+++ /dev/null
@@ -1,168 +0,0 @@
-// Package packfile implements a encoder/decoder of packfile format
-package packfile
-
-/*
-GIT pack format
-===============
-
-== pack-*.pack files have the following format:
-
- - A header appears at the beginning and consists of the following:
-
- 4-byte signature:
- The signature is: {'P', 'A', 'C', 'K'}
-
- 4-byte version number (network byte order):
- GIT currently accepts version number 2 or 3 but
- generates version 2 only.
-
- 4-byte number of objects contained in the pack (network byte order)
-
- Observation: we cannot have more than 4G versions ;-) and
- more than 4G objects in a pack.
-
- - The header is followed by number of object entries, each of
- which looks like this:
-
- (undeltified representation)
- n-byte type and length (3-bit type, (n-1)*7+4-bit length)
- compressed data
-
- (deltified representation)
- n-byte type and length (3-bit type, (n-1)*7+4-bit length)
- 20-byte base object name
- compressed delta data
-
- Observation: length of each object is encoded in a variable
- length format and is not constrained to 32-bit or anything.
-
- - The trailer records 20-byte SHA1 checksum of all of the above.
-
-== Original (version 1) pack-*.idx files have the following format:
-
- - The header consists of 256 4-byte network byte order
- integers. N-th entry of this table records the number of
- objects in the corresponding pack, the first byte of whose
- object name is less than or equal to N. This is called the
- 'first-level fan-out' table.
-
- - The header is followed by sorted 24-byte entries, one entry
- per object in the pack. Each entry is:
-
- 4-byte network byte order integer, recording where the
- object is stored in the packfile as the offset from the
- beginning.
-
- 20-byte object name.
-
- - The file is concluded with a trailer:
-
- A copy of the 20-byte SHA1 checksum at the end of
- corresponding packfile.
-
- 20-byte SHA1-checksum of all of the above.
-
-Pack Idx file:
-
- -- +--------------------------------+
-fanout | fanout[0] = 2 (for example) |-.
-table +--------------------------------+ |
- | fanout[1] | |
- +--------------------------------+ |
- | fanout[2] | |
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
- | fanout[255] = total objects |---.
- -- +--------------------------------+ | |
-main | offset | | |
-index | object name 00XXXXXXXXXXXXXXXX | | |
-table +--------------------------------+ | |
- | offset | | |
- | object name 00XXXXXXXXXXXXXXXX | | |
- +--------------------------------+<+ |
- .-| offset | |
- | | object name 01XXXXXXXXXXXXXXXX | |
- | +--------------------------------+ |
- | | offset | |
- | | object name 01XXXXXXXXXXXXXXXX | |
- | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
- | | offset | |
- | | object name FFXXXXXXXXXXXXXXXX | |
- --| +--------------------------------+<--+
-trailer | | packfile checksum |
- | +--------------------------------+
- | | idxfile checksum |
- | +--------------------------------+
- .-------.
- |
-Pack file entry: <+
-
- packed object header:
- 1-byte size extension bit (MSB)
- type (next 3 bit)
- size0 (lower 4-bit)
- n-byte sizeN (as long as MSB is set, each 7-bit)
- size0..sizeN form 4+7+7+..+7 bit integer, size0
- is the least significant part, and sizeN is the
- most significant part.
- packed object data:
- If it is not DELTA, then deflated bytes (the size above
- is the size before compression).
- If it is REF_DELTA, then
- 20-byte base object name SHA1 (the size above is the
- size of the delta data that follows).
- delta data, deflated.
- If it is OFS_DELTA, then
- n-byte offset (see below) interpreted as a negative
- offset from the type-byte of the header of the
- ofs-delta entry (the size above is the size of
- the delta data that follows).
- delta data, deflated.
-
- offset encoding:
- n bytes with MSB set in all but the last one.
- The offset is then the number constructed by
- concatenating the lower 7 bit of each byte, and
- for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1))
- to the result.
-
-
-
-== Version 2 pack-*.idx files support packs larger than 4 GiB, and
- have some other reorganizations. They have the format:
-
- - A 4-byte magic number '\377tOc' which is an unreasonable
- fanout[0] value.
-
- - A 4-byte version number (= 2)
-
- - A 256-entry fan-out table just like v1.
-
- - A table of sorted 20-byte SHA1 object names. These are
- packed together without offset values to reduce the cache
- footprint of the binary search for a specific object name.
-
- - A table of 4-byte CRC32 values of the packed object data.
- This is new in v2 so compressed data can be copied directly
- from pack to pack during repacking without undetected
- data corruption.
-
- - A table of 4-byte offset values (in network byte order).
- These are usually 31-bit pack file offsets, but large
- offsets are encoded as an index into the next table with
- the msbit set.
-
- - A table of 8-byte offset entries (empty for pack files less
- than 2 GiB). Pack files are organized with heavily used
- objects toward the front, so most object references should
- not need to refer to this table.
-
- - The same trailer as a v1 pack file:
-
- A copy of the 20-byte SHA1 checksum at the end of
- corresponding packfile.
-
- 20-byte SHA1-checksum of all of the above.
-
-From:
-https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-protocol.txt
-*/
diff --git a/formats/packfile/error.go b/formats/packfile/error.go
deleted file mode 100644
index c0b9163..0000000
--- a/formats/packfile/error.go
+++ /dev/null
@@ -1,30 +0,0 @@
-package packfile
-
-import "fmt"
-
-// Error specifies errors returned during packfile parsing.
-type Error struct {
- reason, details string
-}
-
-// NewError returns a new error.
-func NewError(reason string) *Error {
- return &Error{reason: reason}
-}
-
-// Error returns a text representation of the error.
-func (e *Error) Error() string {
- if e.details == "" {
- return e.reason
- }
-
- return fmt.Sprintf("%s: %s", e.reason, e.details)
-}
-
-// AddDetails adds details to an error, with additional text.
-func (e *Error) AddDetails(format string, args ...interface{}) *Error {
- return &Error{
- reason: e.reason,
- details: fmt.Sprintf(format, args...),
- }
-}
diff --git a/formats/packfile/scanner.go b/formats/packfile/scanner.go
deleted file mode 100644
index 69cc7d0..0000000
--- a/formats/packfile/scanner.go
+++ /dev/null
@@ -1,418 +0,0 @@
-package packfile
-
-import (
- "bufio"
- "bytes"
- "compress/zlib"
- "fmt"
- "hash"
- "hash/crc32"
- "io"
- "io/ioutil"
-
- "gopkg.in/src-d/go-git.v4/core"
- "gopkg.in/src-d/go-git.v4/utils/binary"
-)
-
-var (
- // ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile
- ErrEmptyPackfile = NewError("empty packfile")
- // ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect.
- ErrBadSignature = NewError("malformed pack file signature")
- // ErrUnsupportedVersion is returned by ReadHeader when the packfile version is
- // different than VersionSupported.
- ErrUnsupportedVersion = NewError("unsupported packfile version")
- // ErrSeekNotSupported returned if seek is not support
- ErrSeekNotSupported = NewError("not seek support")
-)
-
-const (
- // VersionSupported is the packfile version supported by this parser.
- VersionSupported uint32 = 2
-)
-
-// ObjectHeader contains the information related to the object, this information
-// is collected from the previous bytes to the content of the object.
-type ObjectHeader struct {
- Type core.ObjectType
- Offset int64
- Length int64
- Reference core.Hash
- OffsetReference int64
-}
-
-type Scanner struct {
- r reader
- crc hash.Hash32
-
- // pendingObject is used to detect if an object has been read, or still
- // is waiting to be read
- pendingObject *ObjectHeader
- version, objects uint32
-
- // lsSeekable says if this scanner can do Seek or not, to have a Scanner
- // seekable a r implementing io.Seeker is required
- IsSeekable bool
-}
-
-// NewScanner returns a new Scanner based on a reader, if the given reader
-// implements io.ReadSeeker the Scanner will be also Seekable
-func NewScanner(r io.Reader) *Scanner {
- seeker, ok := r.(io.ReadSeeker)
- if !ok {
- seeker = &trackableReader{Reader: r}
- }
-
- crc := crc32.NewIEEE()
- return &Scanner{
- r: &teeReader{
- newByteReadSeeker(seeker),
- crc,
- },
- crc: crc,
- IsSeekable: ok,
- }
-}
-
-// Header reads the whole packfile header (signature, version and object count).
-// It returns the version and the object count and performs checks on the
-// validity of the signature and the version fields.
-func (s *Scanner) Header() (version, objects uint32, err error) {
- if s.version != 0 {
- return s.version, s.objects, nil
- }
-
- sig, err := s.readSignature()
- if err != nil {
- if err == io.EOF {
- err = ErrEmptyPackfile
- }
-
- return
- }
-
- if !s.isValidSignature(sig) {
- err = ErrBadSignature
- return
- }
-
- version, err = s.readVersion()
- s.version = version
- if err != nil {
- return
- }
-
- if !s.isSupportedVersion(version) {
- err = ErrUnsupportedVersion.AddDetails("%d", version)
- return
- }
-
- objects, err = s.readCount()
- s.objects = objects
- return
-}
-
-// readSignature reads an returns the signature field in the packfile.
-func (s *Scanner) readSignature() ([]byte, error) {
- var sig = make([]byte, 4)
- if _, err := io.ReadFull(s.r, sig); err != nil {
- return []byte{}, err
- }
-
- return sig, nil
-}
-
-// isValidSignature returns if sig is a valid packfile signature.
-func (s *Scanner) isValidSignature(sig []byte) bool {
- return bytes.Equal(sig, []byte{'P', 'A', 'C', 'K'})
-}
-
-// readVersion reads and returns the version field of a packfile.
-func (s *Scanner) readVersion() (uint32, error) {
- return binary.ReadUint32(s.r)
-}
-
-// isSupportedVersion returns whether version v is supported by the parser.
-// The current supported version is VersionSupported, defined above.
-func (s *Scanner) isSupportedVersion(v uint32) bool {
- return v == VersionSupported
-}
-
-// readCount reads and returns the count of objects field of a packfile.
-func (s *Scanner) readCount() (uint32, error) {
- return binary.ReadUint32(s.r)
-}
-
-// NextObjectHeader returns the ObjectHeader for the next object in the reader
-func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) {
- if err := s.doPending(); err != nil {
- return nil, err
- }
-
- s.crc.Reset()
-
- h := &ObjectHeader{}
- s.pendingObject = h
-
- var err error
- h.Offset, err = s.r.Seek(0, io.SeekCurrent)
- if err != nil {
- return nil, err
- }
-
- h.Type, h.Length, err = s.readObjectTypeAndLength()
- if err != nil {
- return nil, err
- }
-
- switch h.Type {
- case core.OFSDeltaObject:
- no, err := binary.ReadVariableWidthInt(s.r)
- if err != nil {
- return nil, err
- }
-
- h.OffsetReference = h.Offset - no
- case core.REFDeltaObject:
- var err error
- h.Reference, err = binary.ReadHash(s.r)
- if err != nil {
- return nil, err
- }
- }
-
- return h, nil
-}
-
-func (s *Scanner) doPending() error {
- if s.version == 0 {
- var err error
- s.version, s.objects, err = s.Header()
- if err != nil {
- return err
- }
- }
-
- return s.discardObjectIfNeeded()
-}
-
-func (s *Scanner) discardObjectIfNeeded() error {
- if s.pendingObject == nil {
- return nil
- }
-
- h := s.pendingObject
- n, _, err := s.NextObject(ioutil.Discard)
- if err != nil {
- return err
- }
-
- if n != h.Length {
- return fmt.Errorf(
- "error discarding object, discarded %d, expected %d",
- n, h.Length,
- )
- }
-
- return nil
-}
-
-// ReadObjectTypeAndLength reads and returns the object type and the
-// length field from an object entry in a packfile.
-func (s *Scanner) readObjectTypeAndLength() (core.ObjectType, int64, error) {
- t, c, err := s.readType()
- if err != nil {
- return t, 0, err
- }
-
- l, err := s.readLength(c)
-
- return t, l, err
-}
-
-const (
- maskType = uint8(112) // 0111 0000
- maskFirstLength = uint8(15) // 0000 1111
- maskContinue = uint8(128) // 1000 000
- firstLengthBits = uint8(4) // the first byte has 4 bits to store the length
- maskLength = uint8(127) // 0111 1111
- lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length
-)
-
-func (s *Scanner) readType() (core.ObjectType, byte, error) {
- var c byte
- var err error
- if c, err = s.r.ReadByte(); err != nil {
- return core.ObjectType(0), 0, err
- }
-
- typ := parseType(c)
-
- return typ, c, nil
-}
-
-func parseType(b byte) core.ObjectType {
- return core.ObjectType((b & maskType) >> firstLengthBits)
-}
-
-// the length is codified in the last 4 bits of the first byte and in
-// the last 7 bits of subsequent bytes. Last byte has a 0 MSB.
-func (s *Scanner) readLength(first byte) (int64, error) {
- length := int64(first & maskFirstLength)
-
- c := first
- shift := firstLengthBits
- var err error
- for c&maskContinue > 0 {
- if c, err = s.r.ReadByte(); err != nil {
- return 0, err
- }
-
- length += int64(c&maskLength) << shift
- shift += lengthBits
- }
-
- return length, nil
-}
-
-// NextObject writes the content of the next object into the reader, returns
-// the number of bytes written, the CRC32 of the content and an error, if any
-func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err error) {
- defer s.crc.Reset()
-
- s.pendingObject = nil
- written, err = s.copyObject(w)
- crc32 = s.crc.Sum32()
- return
-}
-
-// ReadRegularObject reads and write a non-deltified object
-// from it zlib stream in an object entry in the packfile.
-func (s *Scanner) copyObject(w io.Writer) (int64, error) {
- zr, err := zlib.NewReader(s.r)
- if err != nil {
- return -1, fmt.Errorf("zlib reading error: %s", err)
- }
-
- defer func() {
- closeErr := zr.Close()
- if err == nil {
- err = closeErr
- }
- }()
-
- return io.Copy(w, zr)
-}
-
-// Seek sets a new offset from start, returns the old position before the change
-func (s *Scanner) Seek(offset int64) (previous int64, err error) {
- // if seeking we asume that you are not interested on the header
- if s.version == 0 {
- s.version = VersionSupported
- }
-
- previous, err = s.r.Seek(0, io.SeekCurrent)
- if err != nil {
- return -1, err
- }
-
- _, err = s.r.Seek(offset, io.SeekStart)
- return previous, err
-}
-
-// Checksum returns the checksum of the packfile
-func (s *Scanner) Checksum() (core.Hash, error) {
- err := s.discardObjectIfNeeded()
- if err != nil {
- return core.ZeroHash, err
- }
-
- return binary.ReadHash(s.r)
-}
-
-// Close reads the reader until io.EOF
-func (s *Scanner) Close() error {
- _, err := io.Copy(ioutil.Discard, s.r)
- return err
-}
-
-type trackableReader struct {
- count int64
- io.Reader
-}
-
-// Read reads up to len(p) bytes into p.
-func (r *trackableReader) Read(p []byte) (n int, err error) {
- n, err = r.Reader.Read(p)
- r.count += int64(n)
-
- return
-}
-
-// Seek only supports io.SeekCurrent, any other operation fails
-func (r *trackableReader) Seek(offset int64, whence int) (int64, error) {
- if whence != io.SeekCurrent {
- return -1, ErrSeekNotSupported
- }
-
- return r.count, nil
-}
-
-func newByteReadSeeker(r io.ReadSeeker) *bufferedSeeker {
- return &bufferedSeeker{
- r: r,
- Reader: *bufio.NewReader(r),
- }
-}
-
-type bufferedSeeker struct {
- r io.ReadSeeker
- bufio.Reader
-}
-
-func (r *bufferedSeeker) Seek(offset int64, whence int) (int64, error) {
- if whence == io.SeekCurrent {
- current, err := r.r.Seek(offset, whence)
- if err != nil {
- return current, err
- }
-
- return current - int64(r.Buffered()), nil
- }
-
- defer r.Reader.Reset(r.r)
- return r.r.Seek(offset, whence)
-}
-
-type reader interface {
- io.Reader
- io.ByteReader
- io.Seeker
-}
-
-type teeReader struct {
- reader
- w hash.Hash32
-}
-
-func (r *teeReader) Read(p []byte) (n int, err error) {
- n, err = r.reader.Read(p)
- if n > 0 {
- if n, err := r.w.Write(p[:n]); err != nil {
- return n, err
- }
- }
- return
-}
-
-func (r *teeReader) ReadByte() (b byte, err error) {
- b, err = r.reader.ReadByte()
- if err == nil {
- _, err := r.w.Write([]byte{b})
- if err != nil {
- return 0, err
- }
- }
-
- return
-}
diff --git a/formats/packfile/scanner_test.go b/formats/packfile/scanner_test.go
deleted file mode 100644
index 5f80da0..0000000
--- a/formats/packfile/scanner_test.go
+++ /dev/null
@@ -1,189 +0,0 @@
-package packfile
-
-import (
- "bytes"
- "io"
-
- . "gopkg.in/check.v1"
- "gopkg.in/src-d/go-git.v4/core"
- "gopkg.in/src-d/go-git.v4/fixtures"
-)
-
-type ScannerSuite struct {
- fixtures.Suite
-}
-
-var _ = Suite(&ScannerSuite{})
-
-func (s *ScannerSuite) TestHeader(c *C) {
- r := fixtures.Basic().One().Packfile()
- p := NewScanner(r)
-
- version, objects, err := p.Header()
- c.Assert(err, IsNil)
- c.Assert(version, Equals, VersionSupported)
- c.Assert(objects, Equals, uint32(31))
-}
-
-func (s *ScannerSuite) TestNextObjectHeaderWithoutHeader(c *C) {
- r := fixtures.Basic().One().Packfile()
- p := NewScanner(r)
-
- h, err := p.NextObjectHeader()
- c.Assert(err, IsNil)
- c.Assert(h, DeepEquals, &expectedHeadersOFS[0])
-
- version, objects, err := p.Header()
- c.Assert(err, IsNil)
- c.Assert(version, Equals, VersionSupported)
- c.Assert(objects, Equals, uint32(31))
-}
-
-func (s *ScannerSuite) TestNextObjectHeaderREFDelta(c *C) {
- s.testNextObjectHeader(c, "ref-delta", expectedHeadersREF)
-}
-
-func (s *ScannerSuite) TestNextObjectHeaderOFSDelta(c *C) {
- s.testNextObjectHeader(c, "ofs-delta", expectedHeadersOFS)
-}
-
-func (s *ScannerSuite) testNextObjectHeader(c *C, tag string, expected []ObjectHeader) {
- r := fixtures.Basic().ByTag(tag).One().Packfile()
- p := NewScanner(r)
-
- _, objects, err := p.Header()
- c.Assert(err, IsNil)
-
- for i := 0; i < int(objects); i++ {
- h, err := p.NextObjectHeader()
- c.Assert(err, IsNil)
- c.Assert(*h, DeepEquals, expected[i])
-
- buf := bytes.NewBuffer(nil)
- n, _, err := p.NextObject(buf)
- c.Assert(err, IsNil)
- c.Assert(n, Equals, h.Length)
- }
-
- n, err := p.Checksum()
- c.Assert(err, IsNil)
- c.Assert(n, HasLen, 20)
-}
-
-func (s *ScannerSuite) TestNextObjectHeaderWithOutReadObject(c *C) {
- f := fixtures.Basic().ByTag("ref-delta").One()
- r := f.Packfile()
- p := NewScanner(r)
-
- _, objects, err := p.Header()
- c.Assert(err, IsNil)
-
- for i := 0; i < int(objects); i++ {
- h, _ := p.NextObjectHeader()
- c.Assert(err, IsNil)
- c.Assert(*h, DeepEquals, expectedHeadersREF[i])
- }
-
- err = p.discardObjectIfNeeded()
- c.Assert(err, IsNil)
-
- n, err := p.Checksum()
- c.Assert(err, IsNil)
- c.Assert(n, Equals, f.PackfileHash)
-}
-
-func (s *ScannerSuite) TestNextObjectHeaderWithOutReadObjectNonSeekable(c *C) {
- f := fixtures.Basic().ByTag("ref-delta").One()
- r := io.MultiReader(f.Packfile())
- p := NewScanner(r)
-
- _, objects, err := p.Header()
- c.Assert(err, IsNil)
-
- for i := 0; i < int(objects); i++ {
- h, _ := p.NextObjectHeader()
- c.Assert(err, IsNil)
- c.Assert(*h, DeepEquals, expectedHeadersREF[i])
- }
-
- err = p.discardObjectIfNeeded()
- c.Assert(err, IsNil)
-
- n, err := p.Checksum()
- c.Assert(err, IsNil)
- c.Assert(n, Equals, f.PackfileHash)
-}
-
-var expectedHeadersOFS = []ObjectHeader{
- {Type: core.CommitObject, Offset: 12, Length: 254},
- {Type: core.OFSDeltaObject, Offset: 186, Length: 93, OffsetReference: 12},
- {Type: core.CommitObject, Offset: 286, Length: 242},
- {Type: core.CommitObject, Offset: 449, Length: 242},
- {Type: core.CommitObject, Offset: 615, Length: 333},
- {Type: core.CommitObject, Offset: 838, Length: 332},
- {Type: core.CommitObject, Offset: 1063, Length: 244},
- {Type: core.CommitObject, Offset: 1230, Length: 243},
- {Type: core.CommitObject, Offset: 1392, Length: 187},
- {Type: core.BlobObject, Offset: 1524, Length: 189},
- {Type: core.BlobObject, Offset: 1685, Length: 18},
- {Type: core.BlobObject, Offset: 1713, Length: 1072},
- {Type: core.BlobObject, Offset: 2351, Length: 76110},
- {Type: core.BlobObject, Offset: 78050, Length: 2780},
- {Type: core.BlobObject, Offset: 78882, Length: 217848},
- {Type: core.BlobObject, Offset: 80725, Length: 706},
- {Type: core.BlobObject, Offset: 80998, Length: 11488},
- {Type: core.BlobObject, Offset: 84032, Length: 78},
- {Type: core.TreeObject, Offset: 84115, Length: 272},
- {Type: core.OFSDeltaObject, Offset: 84375, Length: 43, OffsetReference: 84115},
- {Type: core.TreeObject, Offset: 84430, Length: 38},
- {Type: core.TreeObject, Offset: 84479, Length: 75},
- {Type: core.TreeObject, Offset: 84559, Length: 38},
- {Type: core.TreeObject, Offset: 84608, Length: 34},
- {Type: core.BlobObject, Offset: 84653, Length: 9},
- {Type: core.OFSDeltaObject, Offset: 84671, Length: 6, OffsetReference: 84375},
- {Type: core.OFSDeltaObject, Offset: 84688, Length: 9, OffsetReference: 84375},
- {Type: core.OFSDeltaObject, Offset: 84708, Length: 6, OffsetReference: 84375},
- {Type: core.OFSDeltaObject, Offset: 84725, Length: 5, OffsetReference: 84115},
- {Type: core.OFSDeltaObject, Offset: 84741, Length: 8, OffsetReference: 84375},
- {Type: core.OFSDeltaObject, Offset: 84760, Length: 4, OffsetReference: 84741},
-}
-
-var expectedHeadersREF = []ObjectHeader{
- {Type: core.CommitObject, Offset: 12, Length: 254},
- {Type: core.REFDeltaObject, Offset: 186, Length: 93,
- Reference: core.NewHash("e8d3ffab552895c19b9fcf7aa264d277cde33881")},
- {Type: core.CommitObject, Offset: 304, Length: 242},
- {Type: core.CommitObject, Offset: 467, Length: 242},
- {Type: core.CommitObject, Offset: 633, Length: 333},
- {Type: core.CommitObject, Offset: 856, Length: 332},
- {Type: core.CommitObject, Offset: 1081, Length: 243},
- {Type: core.CommitObject, Offset: 1243, Length: 244},
- {Type: core.CommitObject, Offset: 1410, Length: 187},
- {Type: core.BlobObject, Offset: 1542, Length: 189},
- {Type: core.BlobObject, Offset: 1703, Length: 18},
- {Type: core.BlobObject, Offset: 1731, Length: 1072},
- {Type: core.BlobObject, Offset: 2369, Length: 76110},
- {Type: core.TreeObject, Offset: 78068, Length: 38},
- {Type: core.BlobObject, Offset: 78117, Length: 2780},
- {Type: core.TreeObject, Offset: 79049, Length: 75},
- {Type: core.BlobObject, Offset: 79129, Length: 217848},
- {Type: core.BlobObject, Offset: 80972, Length: 706},
- {Type: core.TreeObject, Offset: 81265, Length: 38},
- {Type: core.BlobObject, Offset: 81314, Length: 11488},
- {Type: core.TreeObject, Offset: 84752, Length: 34},
- {Type: core.BlobObject, Offset: 84797, Length: 78},
- {Type: core.TreeObject, Offset: 84880, Length: 271},
- {Type: core.REFDeltaObject, Offset: 85141, Length: 6,
- Reference: core.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c")},
- {Type: core.REFDeltaObject, Offset: 85176, Length: 37,
- Reference: core.NewHash("fb72698cab7617ac416264415f13224dfd7a165e")},
- {Type: core.BlobObject, Offset: 85244, Length: 9},
- {Type: core.REFDeltaObject, Offset: 85262, Length: 9,
- Reference: core.NewHash("fb72698cab7617ac416264415f13224dfd7a165e")},
- {Type: core.REFDeltaObject, Offset: 85300, Length: 6,
- Reference: core.NewHash("fb72698cab7617ac416264415f13224dfd7a165e")},
- {Type: core.TreeObject, Offset: 85335, Length: 110},
- {Type: core.REFDeltaObject, Offset: 85448, Length: 8,
- Reference: core.NewHash("eba74343e2f15d62adedfd8c883ee0262b5c8021")},
- {Type: core.TreeObject, Offset: 85485, Length: 73},
-}