diff options
Diffstat (limited to 'formats/packfile')
-rw-r--r-- | formats/packfile/decoder.go | 306 | ||||
-rw-r--r-- | formats/packfile/decoder_test.go | 182 | ||||
-rw-r--r-- | formats/packfile/delta.go | 181 | ||||
-rw-r--r-- | formats/packfile/doc.go | 168 | ||||
-rw-r--r-- | formats/packfile/error.go | 30 | ||||
-rw-r--r-- | formats/packfile/scanner.go | 418 | ||||
-rw-r--r-- | formats/packfile/scanner_test.go | 189 |
7 files changed, 0 insertions, 1474 deletions
diff --git a/formats/packfile/decoder.go b/formats/packfile/decoder.go deleted file mode 100644 index e96980a..0000000 --- a/formats/packfile/decoder.go +++ /dev/null @@ -1,306 +0,0 @@ -package packfile - -import ( - "bytes" - - "gopkg.in/src-d/go-git.v4/core" -) - -// Format specifies if the packfile uses ref-deltas or ofs-deltas. -type Format int - -// Possible values of the Format type. -const ( - UnknownFormat Format = iota - OFSDeltaFormat - REFDeltaFormat -) - -var ( - // ErrMaxObjectsLimitReached is returned by Decode when the number - // of objects in the packfile is higher than - // Decoder.MaxObjectsLimit. - ErrMaxObjectsLimitReached = NewError("max. objects limit reached") - // ErrInvalidObject is returned by Decode when an invalid object is - // found in the packfile. - ErrInvalidObject = NewError("invalid git object") - // ErrPackEntryNotFound is returned by Decode when a reference in - // the packfile references and unknown object. - ErrPackEntryNotFound = NewError("can't find a pack entry") - // ErrZLib is returned by Decode when there was an error unzipping - // the packfile contents. - ErrZLib = NewError("zlib reading error") - // ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object - // to recall cannot be returned. - ErrCannotRecall = NewError("cannot recall object") - // ErrNonSeekable is returned if a NewDecoder is used with a non-seekable - // reader and without a core.ObjectStorage or ReadObjectAt method is called - // without a seekable scanner - ErrNonSeekable = NewError("non-seekable scanner") - // ErrRollback error making Rollback over a transaction after an error - ErrRollback = NewError("rollback error, during set error") -) - -// Decoder reads and decodes packfiles from an input stream. -type Decoder struct { - s *Scanner - o core.ObjectStorer - tx core.Transaction - - offsetToHash map[int64]core.Hash - hashToOffset map[core.Hash]int64 - crcs map[core.Hash]uint32 -} - -// NewDecoder returns a new Decoder that reads from r. -func NewDecoder(s *Scanner, o core.ObjectStorer) (*Decoder, error) { - if !s.IsSeekable && o == nil { - return nil, ErrNonSeekable - } - - return &Decoder{ - s: s, - o: o, - - offsetToHash: make(map[int64]core.Hash, 0), - hashToOffset: make(map[core.Hash]int64, 0), - crcs: make(map[core.Hash]uint32, 0), - }, nil -} - -// Decode reads a packfile and stores it in the value pointed to by s. -func (d *Decoder) Decode() (checksum core.Hash, err error) { - if err := d.doDecode(); err != nil { - return core.ZeroHash, err - } - - return d.s.Checksum() -} - -func (d *Decoder) doDecode() error { - _, count, err := d.s.Header() - if err != nil { - return err - } - - _, isTxStorer := d.o.(core.Transactioner) - switch { - case d.o == nil: - return d.readObjects(int(count)) - case isTxStorer: - return d.readObjectsWithObjectStorerTx(int(count)) - default: - return d.readObjectsWithObjectStorer(int(count)) - } -} - -func (d *Decoder) readObjects(count int) error { - for i := 0; i < count; i++ { - if _, err := d.ReadObject(); err != nil { - return err - } - } - - return nil -} - -func (d *Decoder) readObjectsWithObjectStorer(count int) error { - for i := 0; i < count; i++ { - obj, err := d.ReadObject() - if err != nil { - return err - } - - if _, err := d.o.SetObject(obj); err != nil { - return err - } - } - - return nil -} - -func (d *Decoder) readObjectsWithObjectStorerTx(count int) error { - tx := d.o.(core.Transactioner).Begin() - - for i := 0; i < count; i++ { - obj, err := d.ReadObject() - if err != nil { - return err - } - - if _, err := tx.SetObject(obj); err != nil { - if rerr := d.tx.Rollback(); rerr != nil { - return ErrRollback.AddDetails( - "error: %s, during tx.Set error: %s", rerr, err, - ) - } - - return err - } - - } - - return tx.Commit() -} - -// ReadObject reads a object from the stream and return it -func (d *Decoder) ReadObject() (core.Object, error) { - h, err := d.s.NextObjectHeader() - if err != nil { - return nil, err - } - - obj := d.newObject() - obj.SetSize(h.Length) - obj.SetType(h.Type) - var crc uint32 - switch h.Type { - case core.CommitObject, core.TreeObject, core.BlobObject, core.TagObject: - crc, err = d.fillRegularObjectContent(obj) - case core.REFDeltaObject: - crc, err = d.fillREFDeltaObjectContent(obj, h.Reference) - case core.OFSDeltaObject: - crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) - default: - err = ErrInvalidObject.AddDetails("type %q", h.Type) - } - - if err != nil { - return obj, err - } - - hash := obj.Hash() - d.setOffset(hash, h.Offset) - d.setCRC(hash, crc) - - return obj, nil -} - -func (d *Decoder) newObject() core.Object { - if d.o == nil { - return &core.MemoryObject{} - } - - return d.o.NewObject() -} - -// ReadObjectAt reads an object at the given location -func (d *Decoder) ReadObjectAt(offset int64) (core.Object, error) { - if !d.s.IsSeekable { - return nil, ErrNonSeekable - } - - beforeJump, err := d.s.Seek(offset) - if err != nil { - return nil, err - } - - defer func() { - _, seekErr := d.s.Seek(beforeJump) - if err == nil { - err = seekErr - } - }() - - return d.ReadObject() -} - -func (d *Decoder) fillRegularObjectContent(obj core.Object) (uint32, error) { - w, err := obj.Writer() - if err != nil { - return 0, err - } - - _, crc, err := d.s.NextObject(w) - return crc, err -} - -func (d *Decoder) fillREFDeltaObjectContent(obj core.Object, ref core.Hash) (uint32, error) { - buf := bytes.NewBuffer(nil) - _, crc, err := d.s.NextObject(buf) - if err != nil { - return 0, err - } - - base, err := d.recallByHash(ref) - if err != nil { - return 0, err - } - - obj.SetType(base.Type()) - return crc, ApplyDelta(obj, base, buf.Bytes()) -} - -func (d *Decoder) fillOFSDeltaObjectContent(obj core.Object, offset int64) (uint32, error) { - buf := bytes.NewBuffer(nil) - _, crc, err := d.s.NextObject(buf) - if err != nil { - return 0, err - } - - base, err := d.recallByOffset(offset) - if err != nil { - return 0, err - } - - obj.SetType(base.Type()) - return crc, ApplyDelta(obj, base, buf.Bytes()) -} - -func (d *Decoder) setOffset(h core.Hash, offset int64) { - d.offsetToHash[offset] = h - d.hashToOffset[h] = offset -} - -func (d *Decoder) setCRC(h core.Hash, crc uint32) { - d.crcs[h] = crc -} - -func (d *Decoder) recallByOffset(o int64) (core.Object, error) { - if d.s.IsSeekable { - return d.ReadObjectAt(o) - } - - if h, ok := d.offsetToHash[o]; ok { - return d.tx.Object(core.AnyObject, h) - } - - return nil, core.ErrObjectNotFound -} - -func (d *Decoder) recallByHash(h core.Hash) (core.Object, error) { - if d.s.IsSeekable { - if o, ok := d.hashToOffset[h]; ok { - return d.ReadObjectAt(o) - } - } - - obj, err := d.tx.Object(core.AnyObject, h) - if err != core.ErrObjectNotFound { - return obj, err - } - - return nil, core.ErrObjectNotFound -} - -// SetOffsets sets the offsets, required when using the method ReadObjectAt, -// without decoding the full packfile -func (d *Decoder) SetOffsets(offsets map[core.Hash]int64) { - d.hashToOffset = offsets -} - -// Offsets returns the objects read offset -func (d *Decoder) Offsets() map[core.Hash]int64 { - return d.hashToOffset -} - -// CRCs returns the CRC-32 for each objected read -func (d *Decoder) CRCs() map[core.Hash]uint32 { - return d.crcs -} - -// Close close the Scanner, usually this mean that the whole reader is read and -// discarded -func (d *Decoder) Close() error { - return d.s.Close() -} diff --git a/formats/packfile/decoder_test.go b/formats/packfile/decoder_test.go deleted file mode 100644 index aa178d7..0000000 --- a/formats/packfile/decoder_test.go +++ /dev/null @@ -1,182 +0,0 @@ -package packfile - -import ( - "io" - "testing" - - "gopkg.in/src-d/go-git.v4/core" - "gopkg.in/src-d/go-git.v4/fixtures" - "gopkg.in/src-d/go-git.v4/formats/idxfile" - "gopkg.in/src-d/go-git.v4/storage/memory" - - . "gopkg.in/check.v1" -) - -func Test(t *testing.T) { TestingT(t) } - -type ReaderSuite struct { - fixtures.Suite -} - -var _ = Suite(&ReaderSuite{}) - -func (s *ReaderSuite) TestNewDecodeNonSeekable(c *C) { - scanner := NewScanner(nil) - d, err := NewDecoder(scanner, nil) - - c.Assert(d, IsNil) - c.Assert(err, NotNil) -} - -func (s *ReaderSuite) TestDecode(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - scanner := NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - assertObjects(c, storage, expectedHashes) - }) -} - -func (s *ReaderSuite) TestDecodeInMemory(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - scanner := NewScanner(f.Packfile()) - d, err := NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - }) -} - -var expectedHashes = []string{ - "918c48b83bd081e863dbe1b80f8998f058cd8294", - "af2d6a6954d532f8ffb47615169c8fdf9d383a1a", - "1669dce138d9b841a518c64b10914d88f5e488ea", - "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", - "b8e471f58bcbca63b07bda20e428190409c2db47", - "35e85108805c84807bc66a02d91535e1e24b38b9", - "b029517f6300c2da0f4b651b8642506cd6aaf45d", - "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", - "d3ff53e0564a9f87d8e84b6e28e5060e517008aa", - "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", - "d5c0f4ab811897cadf03aec358ae60d21f91c50d", - "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", - "cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", - "9dea2395f5403188298c1dabe8bdafe562c491e3", - "586af567d0bb5e771e49bdd9434f5e0fb76d25fa", - "9a48f23120e880dfbe41f7c9b7b708e9ee62a492", - "5a877e6a906a2743ad6e45d99c1793642aaf8eda", - "c8f1d8c61f9da76f4cb49fd86322b6e685dba956", - "a8d315b2b1c615d43042c3a62402b8a54288cf5c", - "a39771a7651f97faf5c72e08224d857fc35133db", - "880cd14280f4b9b6ed3986d6671f907d7cc2a198", - "fb72698cab7617ac416264415f13224dfd7a165e", - "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", - "eba74343e2f15d62adedfd8c883ee0262b5c8021", - "c2d30fa8ef288618f65f6eed6e168e0d514886f4", - "8dcef98b1d52143e1e2dbc458ffe38f925786bf2", - "aa9b383c260e1d05fbbf6b30a02914555e20c725", - "6ecf0ef2c2dffb796033e5a02219af86ec6584e5", - "dbd3641b371024f44d0e469a9c8f5457b0660de1", - "e8d3ffab552895c19b9fcf7aa264d277cde33881", - "7e59600739c96546163833214c36459e324bad0a", -} - -func (s *ReaderSuite) TestDecodeCRCs(c *C) { - f := fixtures.Basic().ByTag("ofs-delta").One() - - scanner := NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := NewDecoder(scanner, storage) - c.Assert(err, IsNil) - _, err = d.Decode() - c.Assert(err, IsNil) - - var sum uint64 - for _, crc := range d.CRCs() { - sum += uint64(crc) - } - - c.Assert(int(sum), Equals, 78022211966) -} - -func (s *ReaderSuite) TestReadObjectAt(c *C) { - f := fixtures.Basic().One() - scanner := NewScanner(f.Packfile()) - d, err := NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - // when the packfile is ref-delta based, the offsets are required - if f.Is("ref-delta") { - offsets := getOffsetsFromIdx(f.Idx()) - d.SetOffsets(offsets) - } - - // the objects at reference 186, is a delta, so should be recall, - // without being read before. - obj, err := d.ReadObjectAt(186) - c.Assert(err, IsNil) - c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5") -} - -func (s *ReaderSuite) TestOffsets(c *C) { - f := fixtures.Basic().One() - scanner := NewScanner(f.Packfile()) - d, err := NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - c.Assert(d.Offsets(), HasLen, 0) - - _, err = d.Decode() - c.Assert(err, IsNil) - - c.Assert(d.Offsets(), HasLen, 31) -} - -func (s *ReaderSuite) TestSetOffsets(c *C) { - f := fixtures.Basic().One() - scanner := NewScanner(f.Packfile()) - d, err := NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - h := core.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5") - d.SetOffsets(map[core.Hash]int64{h: 42}) - - o := d.Offsets() - c.Assert(o, HasLen, 1) - c.Assert(o[h], Equals, int64(42)) -} - -func assertObjects(c *C, s *memory.Storage, expects []string) { - c.Assert(len(expects), Equals, len(s.Objects)) - for _, exp := range expects { - obt, err := s.Object(core.AnyObject, core.NewHash(exp)) - c.Assert(err, IsNil) - c.Assert(obt.Hash().String(), Equals, exp) - } -} - -func getOffsetsFromIdx(r io.Reader) map[core.Hash]int64 { - idx := &idxfile.Idxfile{} - err := idxfile.NewDecoder(r).Decode(idx) - if err != nil { - panic(err) - } - - offsets := make(map[core.Hash]int64) - for _, e := range idx.Entries { - offsets[e.Hash] = int64(e.Offset) - } - - return offsets -} diff --git a/formats/packfile/delta.go b/formats/packfile/delta.go deleted file mode 100644 index d08f969..0000000 --- a/formats/packfile/delta.go +++ /dev/null @@ -1,181 +0,0 @@ -package packfile - -import ( - "io/ioutil" - - "gopkg.in/src-d/go-git.v4/core" -) - -// See https://github.com/git/git/blob/49fa3dc76179e04b0833542fa52d0f287a4955ac/delta.h -// https://github.com/git/git/blob/c2c5f6b1e479f2c38e0e01345350620944e3527f/patch-delta.c, -// and https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js -// for details about the delta format. - -const deltaSizeMin = 4 - -// ApplyDelta writes to taget the result of applying the modification deltas in delta to base. -func ApplyDelta(target, base core.Object, delta []byte) error { - r, err := base.Reader() - if err != nil { - return err - } - - w, err := target.Writer() - if err != nil { - return err - } - - src, err := ioutil.ReadAll(r) - if err != nil { - return err - } - - dst := PatchDelta(src, delta) - target.SetSize(int64(len(dst))) - - if _, err := w.Write(dst); err != nil { - return err - } - - return nil -} - -// PatchDelta returns the result of applying the modification deltas in delta to src. -func PatchDelta(src, delta []byte) []byte { - if len(delta) < deltaSizeMin { - return nil - } - - srcSz, delta := decodeLEB128(delta) - if srcSz != uint(len(src)) { - return nil - } - - targetSz, delta := decodeLEB128(delta) - remainingTargetSz := targetSz - - var dest []byte - var cmd byte - for { - cmd = delta[0] - delta = delta[1:] - if isCopyFromSrc(cmd) { - var offset, sz uint - offset, delta = decodeOffset(cmd, delta) - sz, delta = decodeSize(cmd, delta) - if invalidSize(sz, targetSz) || - invalidOffsetSize(offset, sz, srcSz) { - break - } - dest = append(dest, src[offset:offset+sz]...) - remainingTargetSz -= sz - } else if isCopyFromDelta(cmd) { - sz := uint(cmd) // cmd is the size itself - if invalidSize(sz, targetSz) { - break - } - dest = append(dest, delta[0:sz]...) - remainingTargetSz -= sz - delta = delta[sz:] - } else { - return nil - } - - if remainingTargetSz <= 0 { - break - } - } - - return dest -} - -// Decodes a number encoded as an unsigned LEB128 at the start of some -// binary data and returns the decoded number and the rest of the -// stream. -// -// This must be called twice on the delta data buffer, first to get the -// expected source buffer size, and again to get the target buffer size. -func decodeLEB128(input []byte) (uint, []byte) { - var num, sz uint - var b byte - for { - b = input[sz] - num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks - sz++ - - if uint(b)&continuation == 0 || sz == uint(len(input)) { - break - } - } - - return num, input[sz:] -} - -const ( - payload = 0x7f // 0111 1111 - continuation = 0x80 // 1000 0000 -) - -func isCopyFromSrc(cmd byte) bool { - return (cmd & 0x80) != 0 -} - -func isCopyFromDelta(cmd byte) bool { - return (cmd&0x80) == 0 && cmd != 0 -} - -func decodeOffset(cmd byte, delta []byte) (uint, []byte) { - var offset uint - if (cmd & 0x01) != 0 { - offset = uint(delta[0]) - delta = delta[1:] - } - if (cmd & 0x02) != 0 { - offset |= uint(delta[0]) << 8 - delta = delta[1:] - } - if (cmd & 0x04) != 0 { - offset |= uint(delta[0]) << 16 - delta = delta[1:] - } - if (cmd & 0x08) != 0 { - offset |= uint(delta[0]) << 24 - delta = delta[1:] - } - - return offset, delta -} - -func decodeSize(cmd byte, delta []byte) (uint, []byte) { - var sz uint - if (cmd & 0x10) != 0 { - sz = uint(delta[0]) - delta = delta[1:] - } - if (cmd & 0x20) != 0 { - sz |= uint(delta[0]) << 8 - delta = delta[1:] - } - if (cmd & 0x40) != 0 { - sz |= uint(delta[0]) << 16 - delta = delta[1:] - } - if sz == 0 { - sz = 0x10000 - } - - return sz, delta -} - -func invalidSize(sz, targetSz uint) bool { - return sz > targetSz -} - -func invalidOffsetSize(offset, sz, srcSz uint) bool { - return sumOverflows(offset, sz) || - offset+sz > srcSz -} - -func sumOverflows(a, b uint) bool { - return a+b < a -} diff --git a/formats/packfile/doc.go b/formats/packfile/doc.go deleted file mode 100644 index 0b173ca..0000000 --- a/formats/packfile/doc.go +++ /dev/null @@ -1,168 +0,0 @@ -// Package packfile implements a encoder/decoder of packfile format -package packfile - -/* -GIT pack format -=============== - -== pack-*.pack files have the following format: - - - A header appears at the beginning and consists of the following: - - 4-byte signature: - The signature is: {'P', 'A', 'C', 'K'} - - 4-byte version number (network byte order): - GIT currently accepts version number 2 or 3 but - generates version 2 only. - - 4-byte number of objects contained in the pack (network byte order) - - Observation: we cannot have more than 4G versions ;-) and - more than 4G objects in a pack. - - - The header is followed by number of object entries, each of - which looks like this: - - (undeltified representation) - n-byte type and length (3-bit type, (n-1)*7+4-bit length) - compressed data - - (deltified representation) - n-byte type and length (3-bit type, (n-1)*7+4-bit length) - 20-byte base object name - compressed delta data - - Observation: length of each object is encoded in a variable - length format and is not constrained to 32-bit or anything. - - - The trailer records 20-byte SHA1 checksum of all of the above. - -== Original (version 1) pack-*.idx files have the following format: - - - The header consists of 256 4-byte network byte order - integers. N-th entry of this table records the number of - objects in the corresponding pack, the first byte of whose - object name is less than or equal to N. This is called the - 'first-level fan-out' table. - - - The header is followed by sorted 24-byte entries, one entry - per object in the pack. Each entry is: - - 4-byte network byte order integer, recording where the - object is stored in the packfile as the offset from the - beginning. - - 20-byte object name. - - - The file is concluded with a trailer: - - A copy of the 20-byte SHA1 checksum at the end of - corresponding packfile. - - 20-byte SHA1-checksum of all of the above. - -Pack Idx file: - - -- +--------------------------------+ -fanout | fanout[0] = 2 (for example) |-. -table +--------------------------------+ | - | fanout[1] | | - +--------------------------------+ | - | fanout[2] | | - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | - | fanout[255] = total objects |---. - -- +--------------------------------+ | | -main | offset | | | -index | object name 00XXXXXXXXXXXXXXXX | | | -table +--------------------------------+ | | - | offset | | | - | object name 00XXXXXXXXXXXXXXXX | | | - +--------------------------------+<+ | - .-| offset | | - | | object name 01XXXXXXXXXXXXXXXX | | - | +--------------------------------+ | - | | offset | | - | | object name 01XXXXXXXXXXXXXXXX | | - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | - | | offset | | - | | object name FFXXXXXXXXXXXXXXXX | | - --| +--------------------------------+<--+ -trailer | | packfile checksum | - | +--------------------------------+ - | | idxfile checksum | - | +--------------------------------+ - .-------. - | -Pack file entry: <+ - - packed object header: - 1-byte size extension bit (MSB) - type (next 3 bit) - size0 (lower 4-bit) - n-byte sizeN (as long as MSB is set, each 7-bit) - size0..sizeN form 4+7+7+..+7 bit integer, size0 - is the least significant part, and sizeN is the - most significant part. - packed object data: - If it is not DELTA, then deflated bytes (the size above - is the size before compression). - If it is REF_DELTA, then - 20-byte base object name SHA1 (the size above is the - size of the delta data that follows). - delta data, deflated. - If it is OFS_DELTA, then - n-byte offset (see below) interpreted as a negative - offset from the type-byte of the header of the - ofs-delta entry (the size above is the size of - the delta data that follows). - delta data, deflated. - - offset encoding: - n bytes with MSB set in all but the last one. - The offset is then the number constructed by - concatenating the lower 7 bit of each byte, and - for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1)) - to the result. - - - -== Version 2 pack-*.idx files support packs larger than 4 GiB, and - have some other reorganizations. They have the format: - - - A 4-byte magic number '\377tOc' which is an unreasonable - fanout[0] value. - - - A 4-byte version number (= 2) - - - A 256-entry fan-out table just like v1. - - - A table of sorted 20-byte SHA1 object names. These are - packed together without offset values to reduce the cache - footprint of the binary search for a specific object name. - - - A table of 4-byte CRC32 values of the packed object data. - This is new in v2 so compressed data can be copied directly - from pack to pack during repacking without undetected - data corruption. - - - A table of 4-byte offset values (in network byte order). - These are usually 31-bit pack file offsets, but large - offsets are encoded as an index into the next table with - the msbit set. - - - A table of 8-byte offset entries (empty for pack files less - than 2 GiB). Pack files are organized with heavily used - objects toward the front, so most object references should - not need to refer to this table. - - - The same trailer as a v1 pack file: - - A copy of the 20-byte SHA1 checksum at the end of - corresponding packfile. - - 20-byte SHA1-checksum of all of the above. - -From: -https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-protocol.txt -*/ diff --git a/formats/packfile/error.go b/formats/packfile/error.go deleted file mode 100644 index c0b9163..0000000 --- a/formats/packfile/error.go +++ /dev/null @@ -1,30 +0,0 @@ -package packfile - -import "fmt" - -// Error specifies errors returned during packfile parsing. -type Error struct { - reason, details string -} - -// NewError returns a new error. -func NewError(reason string) *Error { - return &Error{reason: reason} -} - -// Error returns a text representation of the error. -func (e *Error) Error() string { - if e.details == "" { - return e.reason - } - - return fmt.Sprintf("%s: %s", e.reason, e.details) -} - -// AddDetails adds details to an error, with additional text. -func (e *Error) AddDetails(format string, args ...interface{}) *Error { - return &Error{ - reason: e.reason, - details: fmt.Sprintf(format, args...), - } -} diff --git a/formats/packfile/scanner.go b/formats/packfile/scanner.go deleted file mode 100644 index 69cc7d0..0000000 --- a/formats/packfile/scanner.go +++ /dev/null @@ -1,418 +0,0 @@ -package packfile - -import ( - "bufio" - "bytes" - "compress/zlib" - "fmt" - "hash" - "hash/crc32" - "io" - "io/ioutil" - - "gopkg.in/src-d/go-git.v4/core" - "gopkg.in/src-d/go-git.v4/utils/binary" -) - -var ( - // ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile - ErrEmptyPackfile = NewError("empty packfile") - // ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect. - ErrBadSignature = NewError("malformed pack file signature") - // ErrUnsupportedVersion is returned by ReadHeader when the packfile version is - // different than VersionSupported. - ErrUnsupportedVersion = NewError("unsupported packfile version") - // ErrSeekNotSupported returned if seek is not support - ErrSeekNotSupported = NewError("not seek support") -) - -const ( - // VersionSupported is the packfile version supported by this parser. - VersionSupported uint32 = 2 -) - -// ObjectHeader contains the information related to the object, this information -// is collected from the previous bytes to the content of the object. -type ObjectHeader struct { - Type core.ObjectType - Offset int64 - Length int64 - Reference core.Hash - OffsetReference int64 -} - -type Scanner struct { - r reader - crc hash.Hash32 - - // pendingObject is used to detect if an object has been read, or still - // is waiting to be read - pendingObject *ObjectHeader - version, objects uint32 - - // lsSeekable says if this scanner can do Seek or not, to have a Scanner - // seekable a r implementing io.Seeker is required - IsSeekable bool -} - -// NewScanner returns a new Scanner based on a reader, if the given reader -// implements io.ReadSeeker the Scanner will be also Seekable -func NewScanner(r io.Reader) *Scanner { - seeker, ok := r.(io.ReadSeeker) - if !ok { - seeker = &trackableReader{Reader: r} - } - - crc := crc32.NewIEEE() - return &Scanner{ - r: &teeReader{ - newByteReadSeeker(seeker), - crc, - }, - crc: crc, - IsSeekable: ok, - } -} - -// Header reads the whole packfile header (signature, version and object count). -// It returns the version and the object count and performs checks on the -// validity of the signature and the version fields. -func (s *Scanner) Header() (version, objects uint32, err error) { - if s.version != 0 { - return s.version, s.objects, nil - } - - sig, err := s.readSignature() - if err != nil { - if err == io.EOF { - err = ErrEmptyPackfile - } - - return - } - - if !s.isValidSignature(sig) { - err = ErrBadSignature - return - } - - version, err = s.readVersion() - s.version = version - if err != nil { - return - } - - if !s.isSupportedVersion(version) { - err = ErrUnsupportedVersion.AddDetails("%d", version) - return - } - - objects, err = s.readCount() - s.objects = objects - return -} - -// readSignature reads an returns the signature field in the packfile. -func (s *Scanner) readSignature() ([]byte, error) { - var sig = make([]byte, 4) - if _, err := io.ReadFull(s.r, sig); err != nil { - return []byte{}, err - } - - return sig, nil -} - -// isValidSignature returns if sig is a valid packfile signature. -func (s *Scanner) isValidSignature(sig []byte) bool { - return bytes.Equal(sig, []byte{'P', 'A', 'C', 'K'}) -} - -// readVersion reads and returns the version field of a packfile. -func (s *Scanner) readVersion() (uint32, error) { - return binary.ReadUint32(s.r) -} - -// isSupportedVersion returns whether version v is supported by the parser. -// The current supported version is VersionSupported, defined above. -func (s *Scanner) isSupportedVersion(v uint32) bool { - return v == VersionSupported -} - -// readCount reads and returns the count of objects field of a packfile. -func (s *Scanner) readCount() (uint32, error) { - return binary.ReadUint32(s.r) -} - -// NextObjectHeader returns the ObjectHeader for the next object in the reader -func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) { - if err := s.doPending(); err != nil { - return nil, err - } - - s.crc.Reset() - - h := &ObjectHeader{} - s.pendingObject = h - - var err error - h.Offset, err = s.r.Seek(0, io.SeekCurrent) - if err != nil { - return nil, err - } - - h.Type, h.Length, err = s.readObjectTypeAndLength() - if err != nil { - return nil, err - } - - switch h.Type { - case core.OFSDeltaObject: - no, err := binary.ReadVariableWidthInt(s.r) - if err != nil { - return nil, err - } - - h.OffsetReference = h.Offset - no - case core.REFDeltaObject: - var err error - h.Reference, err = binary.ReadHash(s.r) - if err != nil { - return nil, err - } - } - - return h, nil -} - -func (s *Scanner) doPending() error { - if s.version == 0 { - var err error - s.version, s.objects, err = s.Header() - if err != nil { - return err - } - } - - return s.discardObjectIfNeeded() -} - -func (s *Scanner) discardObjectIfNeeded() error { - if s.pendingObject == nil { - return nil - } - - h := s.pendingObject - n, _, err := s.NextObject(ioutil.Discard) - if err != nil { - return err - } - - if n != h.Length { - return fmt.Errorf( - "error discarding object, discarded %d, expected %d", - n, h.Length, - ) - } - - return nil -} - -// ReadObjectTypeAndLength reads and returns the object type and the -// length field from an object entry in a packfile. -func (s *Scanner) readObjectTypeAndLength() (core.ObjectType, int64, error) { - t, c, err := s.readType() - if err != nil { - return t, 0, err - } - - l, err := s.readLength(c) - - return t, l, err -} - -const ( - maskType = uint8(112) // 0111 0000 - maskFirstLength = uint8(15) // 0000 1111 - maskContinue = uint8(128) // 1000 000 - firstLengthBits = uint8(4) // the first byte has 4 bits to store the length - maskLength = uint8(127) // 0111 1111 - lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length -) - -func (s *Scanner) readType() (core.ObjectType, byte, error) { - var c byte - var err error - if c, err = s.r.ReadByte(); err != nil { - return core.ObjectType(0), 0, err - } - - typ := parseType(c) - - return typ, c, nil -} - -func parseType(b byte) core.ObjectType { - return core.ObjectType((b & maskType) >> firstLengthBits) -} - -// the length is codified in the last 4 bits of the first byte and in -// the last 7 bits of subsequent bytes. Last byte has a 0 MSB. -func (s *Scanner) readLength(first byte) (int64, error) { - length := int64(first & maskFirstLength) - - c := first - shift := firstLengthBits - var err error - for c&maskContinue > 0 { - if c, err = s.r.ReadByte(); err != nil { - return 0, err - } - - length += int64(c&maskLength) << shift - shift += lengthBits - } - - return length, nil -} - -// NextObject writes the content of the next object into the reader, returns -// the number of bytes written, the CRC32 of the content and an error, if any -func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err error) { - defer s.crc.Reset() - - s.pendingObject = nil - written, err = s.copyObject(w) - crc32 = s.crc.Sum32() - return -} - -// ReadRegularObject reads and write a non-deltified object -// from it zlib stream in an object entry in the packfile. -func (s *Scanner) copyObject(w io.Writer) (int64, error) { - zr, err := zlib.NewReader(s.r) - if err != nil { - return -1, fmt.Errorf("zlib reading error: %s", err) - } - - defer func() { - closeErr := zr.Close() - if err == nil { - err = closeErr - } - }() - - return io.Copy(w, zr) -} - -// Seek sets a new offset from start, returns the old position before the change -func (s *Scanner) Seek(offset int64) (previous int64, err error) { - // if seeking we asume that you are not interested on the header - if s.version == 0 { - s.version = VersionSupported - } - - previous, err = s.r.Seek(0, io.SeekCurrent) - if err != nil { - return -1, err - } - - _, err = s.r.Seek(offset, io.SeekStart) - return previous, err -} - -// Checksum returns the checksum of the packfile -func (s *Scanner) Checksum() (core.Hash, error) { - err := s.discardObjectIfNeeded() - if err != nil { - return core.ZeroHash, err - } - - return binary.ReadHash(s.r) -} - -// Close reads the reader until io.EOF -func (s *Scanner) Close() error { - _, err := io.Copy(ioutil.Discard, s.r) - return err -} - -type trackableReader struct { - count int64 - io.Reader -} - -// Read reads up to len(p) bytes into p. -func (r *trackableReader) Read(p []byte) (n int, err error) { - n, err = r.Reader.Read(p) - r.count += int64(n) - - return -} - -// Seek only supports io.SeekCurrent, any other operation fails -func (r *trackableReader) Seek(offset int64, whence int) (int64, error) { - if whence != io.SeekCurrent { - return -1, ErrSeekNotSupported - } - - return r.count, nil -} - -func newByteReadSeeker(r io.ReadSeeker) *bufferedSeeker { - return &bufferedSeeker{ - r: r, - Reader: *bufio.NewReader(r), - } -} - -type bufferedSeeker struct { - r io.ReadSeeker - bufio.Reader -} - -func (r *bufferedSeeker) Seek(offset int64, whence int) (int64, error) { - if whence == io.SeekCurrent { - current, err := r.r.Seek(offset, whence) - if err != nil { - return current, err - } - - return current - int64(r.Buffered()), nil - } - - defer r.Reader.Reset(r.r) - return r.r.Seek(offset, whence) -} - -type reader interface { - io.Reader - io.ByteReader - io.Seeker -} - -type teeReader struct { - reader - w hash.Hash32 -} - -func (r *teeReader) Read(p []byte) (n int, err error) { - n, err = r.reader.Read(p) - if n > 0 { - if n, err := r.w.Write(p[:n]); err != nil { - return n, err - } - } - return -} - -func (r *teeReader) ReadByte() (b byte, err error) { - b, err = r.reader.ReadByte() - if err == nil { - _, err := r.w.Write([]byte{b}) - if err != nil { - return 0, err - } - } - - return -} diff --git a/formats/packfile/scanner_test.go b/formats/packfile/scanner_test.go deleted file mode 100644 index 5f80da0..0000000 --- a/formats/packfile/scanner_test.go +++ /dev/null @@ -1,189 +0,0 @@ -package packfile - -import ( - "bytes" - "io" - - . "gopkg.in/check.v1" - "gopkg.in/src-d/go-git.v4/core" - "gopkg.in/src-d/go-git.v4/fixtures" -) - -type ScannerSuite struct { - fixtures.Suite -} - -var _ = Suite(&ScannerSuite{}) - -func (s *ScannerSuite) TestHeader(c *C) { - r := fixtures.Basic().One().Packfile() - p := NewScanner(r) - - version, objects, err := p.Header() - c.Assert(err, IsNil) - c.Assert(version, Equals, VersionSupported) - c.Assert(objects, Equals, uint32(31)) -} - -func (s *ScannerSuite) TestNextObjectHeaderWithoutHeader(c *C) { - r := fixtures.Basic().One().Packfile() - p := NewScanner(r) - - h, err := p.NextObjectHeader() - c.Assert(err, IsNil) - c.Assert(h, DeepEquals, &expectedHeadersOFS[0]) - - version, objects, err := p.Header() - c.Assert(err, IsNil) - c.Assert(version, Equals, VersionSupported) - c.Assert(objects, Equals, uint32(31)) -} - -func (s *ScannerSuite) TestNextObjectHeaderREFDelta(c *C) { - s.testNextObjectHeader(c, "ref-delta", expectedHeadersREF) -} - -func (s *ScannerSuite) TestNextObjectHeaderOFSDelta(c *C) { - s.testNextObjectHeader(c, "ofs-delta", expectedHeadersOFS) -} - -func (s *ScannerSuite) testNextObjectHeader(c *C, tag string, expected []ObjectHeader) { - r := fixtures.Basic().ByTag(tag).One().Packfile() - p := NewScanner(r) - - _, objects, err := p.Header() - c.Assert(err, IsNil) - - for i := 0; i < int(objects); i++ { - h, err := p.NextObjectHeader() - c.Assert(err, IsNil) - c.Assert(*h, DeepEquals, expected[i]) - - buf := bytes.NewBuffer(nil) - n, _, err := p.NextObject(buf) - c.Assert(err, IsNil) - c.Assert(n, Equals, h.Length) - } - - n, err := p.Checksum() - c.Assert(err, IsNil) - c.Assert(n, HasLen, 20) -} - -func (s *ScannerSuite) TestNextObjectHeaderWithOutReadObject(c *C) { - f := fixtures.Basic().ByTag("ref-delta").One() - r := f.Packfile() - p := NewScanner(r) - - _, objects, err := p.Header() - c.Assert(err, IsNil) - - for i := 0; i < int(objects); i++ { - h, _ := p.NextObjectHeader() - c.Assert(err, IsNil) - c.Assert(*h, DeepEquals, expectedHeadersREF[i]) - } - - err = p.discardObjectIfNeeded() - c.Assert(err, IsNil) - - n, err := p.Checksum() - c.Assert(err, IsNil) - c.Assert(n, Equals, f.PackfileHash) -} - -func (s *ScannerSuite) TestNextObjectHeaderWithOutReadObjectNonSeekable(c *C) { - f := fixtures.Basic().ByTag("ref-delta").One() - r := io.MultiReader(f.Packfile()) - p := NewScanner(r) - - _, objects, err := p.Header() - c.Assert(err, IsNil) - - for i := 0; i < int(objects); i++ { - h, _ := p.NextObjectHeader() - c.Assert(err, IsNil) - c.Assert(*h, DeepEquals, expectedHeadersREF[i]) - } - - err = p.discardObjectIfNeeded() - c.Assert(err, IsNil) - - n, err := p.Checksum() - c.Assert(err, IsNil) - c.Assert(n, Equals, f.PackfileHash) -} - -var expectedHeadersOFS = []ObjectHeader{ - {Type: core.CommitObject, Offset: 12, Length: 254}, - {Type: core.OFSDeltaObject, Offset: 186, Length: 93, OffsetReference: 12}, - {Type: core.CommitObject, Offset: 286, Length: 242}, - {Type: core.CommitObject, Offset: 449, Length: 242}, - {Type: core.CommitObject, Offset: 615, Length: 333}, - {Type: core.CommitObject, Offset: 838, Length: 332}, - {Type: core.CommitObject, Offset: 1063, Length: 244}, - {Type: core.CommitObject, Offset: 1230, Length: 243}, - {Type: core.CommitObject, Offset: 1392, Length: 187}, - {Type: core.BlobObject, Offset: 1524, Length: 189}, - {Type: core.BlobObject, Offset: 1685, Length: 18}, - {Type: core.BlobObject, Offset: 1713, Length: 1072}, - {Type: core.BlobObject, Offset: 2351, Length: 76110}, - {Type: core.BlobObject, Offset: 78050, Length: 2780}, - {Type: core.BlobObject, Offset: 78882, Length: 217848}, - {Type: core.BlobObject, Offset: 80725, Length: 706}, - {Type: core.BlobObject, Offset: 80998, Length: 11488}, - {Type: core.BlobObject, Offset: 84032, Length: 78}, - {Type: core.TreeObject, Offset: 84115, Length: 272}, - {Type: core.OFSDeltaObject, Offset: 84375, Length: 43, OffsetReference: 84115}, - {Type: core.TreeObject, Offset: 84430, Length: 38}, - {Type: core.TreeObject, Offset: 84479, Length: 75}, - {Type: core.TreeObject, Offset: 84559, Length: 38}, - {Type: core.TreeObject, Offset: 84608, Length: 34}, - {Type: core.BlobObject, Offset: 84653, Length: 9}, - {Type: core.OFSDeltaObject, Offset: 84671, Length: 6, OffsetReference: 84375}, - {Type: core.OFSDeltaObject, Offset: 84688, Length: 9, OffsetReference: 84375}, - {Type: core.OFSDeltaObject, Offset: 84708, Length: 6, OffsetReference: 84375}, - {Type: core.OFSDeltaObject, Offset: 84725, Length: 5, OffsetReference: 84115}, - {Type: core.OFSDeltaObject, Offset: 84741, Length: 8, OffsetReference: 84375}, - {Type: core.OFSDeltaObject, Offset: 84760, Length: 4, OffsetReference: 84741}, -} - -var expectedHeadersREF = []ObjectHeader{ - {Type: core.CommitObject, Offset: 12, Length: 254}, - {Type: core.REFDeltaObject, Offset: 186, Length: 93, - Reference: core.NewHash("e8d3ffab552895c19b9fcf7aa264d277cde33881")}, - {Type: core.CommitObject, Offset: 304, Length: 242}, - {Type: core.CommitObject, Offset: 467, Length: 242}, - {Type: core.CommitObject, Offset: 633, Length: 333}, - {Type: core.CommitObject, Offset: 856, Length: 332}, - {Type: core.CommitObject, Offset: 1081, Length: 243}, - {Type: core.CommitObject, Offset: 1243, Length: 244}, - {Type: core.CommitObject, Offset: 1410, Length: 187}, - {Type: core.BlobObject, Offset: 1542, Length: 189}, - {Type: core.BlobObject, Offset: 1703, Length: 18}, - {Type: core.BlobObject, Offset: 1731, Length: 1072}, - {Type: core.BlobObject, Offset: 2369, Length: 76110}, - {Type: core.TreeObject, Offset: 78068, Length: 38}, - {Type: core.BlobObject, Offset: 78117, Length: 2780}, - {Type: core.TreeObject, Offset: 79049, Length: 75}, - {Type: core.BlobObject, Offset: 79129, Length: 217848}, - {Type: core.BlobObject, Offset: 80972, Length: 706}, - {Type: core.TreeObject, Offset: 81265, Length: 38}, - {Type: core.BlobObject, Offset: 81314, Length: 11488}, - {Type: core.TreeObject, Offset: 84752, Length: 34}, - {Type: core.BlobObject, Offset: 84797, Length: 78}, - {Type: core.TreeObject, Offset: 84880, Length: 271}, - {Type: core.REFDeltaObject, Offset: 85141, Length: 6, - Reference: core.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c")}, - {Type: core.REFDeltaObject, Offset: 85176, Length: 37, - Reference: core.NewHash("fb72698cab7617ac416264415f13224dfd7a165e")}, - {Type: core.BlobObject, Offset: 85244, Length: 9}, - {Type: core.REFDeltaObject, Offset: 85262, Length: 9, - Reference: core.NewHash("fb72698cab7617ac416264415f13224dfd7a165e")}, - {Type: core.REFDeltaObject, Offset: 85300, Length: 6, - Reference: core.NewHash("fb72698cab7617ac416264415f13224dfd7a165e")}, - {Type: core.TreeObject, Offset: 85335, Length: 110}, - {Type: core.REFDeltaObject, Offset: 85448, Length: 8, - Reference: core.NewHash("eba74343e2f15d62adedfd8c883ee0262b5c8021")}, - {Type: core.TreeObject, Offset: 85485, Length: 73}, -} |