diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2016-11-08 23:46:38 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-11-08 23:46:38 +0100 |
commit | ac095bb12c4d29722b60ba9f20590fa7cfa6bc7d (patch) | |
tree | 223f36f336ba3414b1e45cac8af6c4744a5d7ef6 /plumbing/format/idxfile | |
parent | e523701393598f4fa241dd407af9ff8925507a1a (diff) | |
download | go-git-ac095bb12c4d29722b60ba9f20590fa7cfa6bc7d.tar.gz |
new plumbing package (#118)
* plumbing: now core was renamed to core, and formats and clients moved inside
Diffstat (limited to 'plumbing/format/idxfile')
-rw-r--r-- | plumbing/format/idxfile/decoder.go | 148 | ||||
-rw-r--r-- | plumbing/format/idxfile/decoder_test.go | 69 | ||||
-rw-r--r-- | plumbing/format/idxfile/doc.go | 132 | ||||
-rw-r--r-- | plumbing/format/idxfile/encoder.go | 131 | ||||
-rw-r--r-- | plumbing/format/idxfile/encoder_test.go | 48 | ||||
-rw-r--r-- | plumbing/format/idxfile/idxfile.go | 62 |
6 files changed, 590 insertions, 0 deletions
diff --git a/plumbing/format/idxfile/decoder.go b/plumbing/format/idxfile/decoder.go new file mode 100644 index 0000000..e3ffc4b --- /dev/null +++ b/plumbing/format/idxfile/decoder.go @@ -0,0 +1,148 @@ +package idxfile + +import ( + "bytes" + "errors" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +var ( + // ErrUnsupportedVersion is returned by Decode when the idx file version + // is not supported. + ErrUnsupportedVersion = errors.New("Unsuported version") + // ErrMalformedIdxFile is returned by Decode when the idx file is corrupted. + ErrMalformedIdxFile = errors.New("Malformed IDX file") +) + +// A Decoder reads and decodes idx files from an input stream. +type Decoder struct { + io.Reader +} + +// NewDecoder returns a new decoder that reads from r. +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{r} +} + +// Decode reads the whole idx object from its input and stores it in the +// value pointed to by idx. +func (d *Decoder) Decode(idx *Idxfile) error { + if err := validateHeader(d); err != nil { + return err + } + + flow := []func(*Idxfile, io.Reader) error{ + readVersion, + readFanout, + readObjectNames, + readCRC32, + readOffsets, + readChecksums, + } + + for _, f := range flow { + if err := f(idx, d); err != nil { + return err + } + } + + if !idx.isValid() { + return ErrMalformedIdxFile + } + + return nil +} + +func validateHeader(r io.Reader) error { + var h = make([]byte, 4) + if _, err := r.Read(h); err != nil { + return err + } + + if !bytes.Equal(h, idxHeader) { + return ErrMalformedIdxFile + } + + return nil +} + +func readVersion(idx *Idxfile, r io.Reader) error { + v, err := binary.ReadUint32(r) + if err != nil { + return err + } + + if v > VersionSupported { + return ErrUnsupportedVersion + } + + idx.Version = v + return nil +} + +func readFanout(idx *Idxfile, r io.Reader) error { + var err error + for i := 0; i < 255; i++ { + idx.Fanout[i], err = binary.ReadUint32(r) + if err != nil { + return err + } + } + + idx.ObjectCount, err = binary.ReadUint32(r) + return err +} + +func readObjectNames(idx *Idxfile, r io.Reader) error { + c := int(idx.ObjectCount) + for i := 0; i < c; i++ { + var ref plumbing.Hash + if _, err := r.Read(ref[:]); err != nil { + return err + } + + idx.Entries = append(idx.Entries, Entry{Hash: ref}) + } + + return nil +} + +func readCRC32(idx *Idxfile, r io.Reader) error { + c := int(idx.ObjectCount) + for i := 0; i < c; i++ { + if err := binary.Read(r, &idx.Entries[i].CRC32); err != nil { + return err + } + } + + return nil +} + +func readOffsets(idx *Idxfile, r io.Reader) error { + c := int(idx.ObjectCount) + for i := 0; i < c; i++ { + o, err := binary.ReadUint32(r) + if err != nil { + return err + } + + idx.Entries[i].Offset = uint64(o) + } + + return nil +} + +func readChecksums(idx *Idxfile, r io.Reader) error { + if _, err := r.Read(idx.PackfileChecksum[:]); err != nil { + return err + } + + if _, err := r.Read(idx.IdxChecksum[:]); err != nil { + return err + } + + return nil +} diff --git a/plumbing/format/idxfile/decoder_test.go b/plumbing/format/idxfile/decoder_test.go new file mode 100644 index 0000000..5231e64 --- /dev/null +++ b/plumbing/format/idxfile/decoder_test.go @@ -0,0 +1,69 @@ +package idxfile + +import ( + "bytes" + "fmt" + "testing" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git.v4/fixtures" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + "gopkg.in/src-d/go-git.v4/storage/memory" +) + +func Test(t *testing.T) { TestingT(t) } + +type IdxfileSuite struct { + fixtures.Suite +} + +var _ = Suite(&IdxfileSuite{}) + +func (s *IdxfileSuite) TestDecode(c *C) { + f := fixtures.Basic().One() + + d := NewDecoder(f.Idx()) + idx := &Idxfile{} + err := d.Decode(idx) + c.Assert(err, IsNil) + + c.Assert(idx.Entries, HasLen, 31) + c.Assert(idx.Entries[0].Hash.String(), Equals, "1669dce138d9b841a518c64b10914d88f5e488ea") + c.Assert(idx.Entries[0].Offset, Equals, uint64(615)) + c.Assert(idx.Entries[0].CRC32, Equals, uint32(3645019190)) + + c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9") + c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String()) +} + +func (s *IdxfileSuite) TestDecodeCRCs(c *C) { + f := fixtures.Basic().ByTag("ofs-delta").One() + + scanner := packfile.NewScanner(f.Packfile()) + storage := memory.NewStorage() + + pd, err := packfile.NewDecoder(scanner, storage) + c.Assert(err, IsNil) + _, err = pd.Decode() + c.Assert(err, IsNil) + + i := &Idxfile{Version: VersionSupported} + + offsets := pd.Offsets() + for h, crc := range pd.CRCs() { + i.Add(h, uint64(offsets[h]), crc) + } + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + _, err = e.Encode(i) + c.Assert(err, IsNil) + + idx := &Idxfile{} + + d := NewDecoder(buf) + err = d.Decode(idx) + c.Assert(err, IsNil) + + c.Assert(idx.Entries, DeepEquals, i.Entries) +} diff --git a/plumbing/format/idxfile/doc.go b/plumbing/format/idxfile/doc.go new file mode 100644 index 0000000..8a76853 --- /dev/null +++ b/plumbing/format/idxfile/doc.go @@ -0,0 +1,132 @@ +// Package idxfile implements a encoder/decoder of idx files +package idxfile + +/* +== Original (version 1) pack-*.idx files have the following format: + + - The header consists of 256 4-byte network byte order + integers. N-th entry of this table records the number of + objects in the corresponding pack, the first byte of whose + object name is less than or equal to N. This is called the + 'first-level fan-out' table. + + - The header is followed by sorted 24-byte entries, one entry + per object in the pack. Each entry is: + + 4-byte network byte order integer, recording where the + object is stored in the packfile as the offset from the + beginning. + + 20-byte object name. + + - The file is concluded with a trailer: + + A copy of the 20-byte SHA1 checksum at the end of + corresponding packfile. + + 20-byte SHA1-checksum of all of the above. + +Pack Idx file: + + -- +--------------------------------+ +fanout | fanout[0] = 2 (for example) |-. +table +--------------------------------+ | + | fanout[1] | | + +--------------------------------+ | + | fanout[2] | | + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | + | fanout[255] = total objects |---. + -- +--------------------------------+ | | +main | offset | | | +index | object name 00XXXXXXXXXXXXXXXX | | | +table +--------------------------------+ | | + | offset | | | + | object name 00XXXXXXXXXXXXXXXX | | | + +--------------------------------+<+ | + .-| offset | | + | | object name 01XXXXXXXXXXXXXXXX | | + | +--------------------------------+ | + | | offset | | + | | object name 01XXXXXXXXXXXXXXXX | | + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | + | | offset | | + | | object name FFXXXXXXXXXXXXXXXX | | + --| +--------------------------------+<--+ +trailer | | packfile checksum | + | +--------------------------------+ + | | idxfile checksum | + | +--------------------------------+ + .-------. + | +Pack file entry: <+ + + packed object header: + 1-byte size extension bit (MSB) + type (next 3 bit) + size0 (lower 4-bit) + n-byte sizeN (as long as MSB is set, each 7-bit) + size0..sizeN form 4+7+7+..+7 bit integer, size0 + is the least significant part, and sizeN is the + most significant part. + packed object data: + If it is not DELTA, then deflated bytes (the size above + is the size before compression). + If it is REF_DELTA, then + 20-byte base object name SHA1 (the size above is the + size of the delta data that follows). + delta data, deflated. + If it is OFS_DELTA, then + n-byte offset (see below) interpreted as a negative + offset from the type-byte of the header of the + ofs-delta entry (the size above is the size of + the delta data that follows). + delta data, deflated. + + offset encoding: + n bytes with MSB set in all but the last one. + The offset is then the number constructed by + concatenating the lower 7 bit of each byte, and + for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1)) + to the result. + + + +== Version 2 pack-*.idx files support packs larger than 4 GiB, and + have some other reorganizations. They have the format: + + - A 4-byte magic number '\377tOc' which is an unreasonable + fanout[0] value. + + - A 4-byte version number (= 2) + + - A 256-entry fan-out table just like v1. + + - A table of sorted 20-byte SHA1 object names. These are + packed together without offset values to reduce the cache + footprint of the binary search for a specific object name. + + - A table of 4-byte CRC32 values of the packed object data. + This is new in v2 so compressed data can be copied directly + from pack to pack during repacking without undetected + data corruption. + + - A table of 4-byte offset values (in network byte order). + These are usually 31-bit pack file offsets, but large + offsets are encoded as an index into the next table with + the msbit set. + + - A table of 8-byte offset entries (empty for pack files less + than 2 GiB). Pack files are organized with heavily used + objects toward the front, so most object references should + not need to refer to this table. + + - The same trailer as a v1 pack file: + + A copy of the 20-byte SHA1 checksum at the end of + corresponding packfile. + + 20-byte SHA1-checksum of all of the above. + +From: +https://www.kernel.org/pub/software/scm/git/docs/v1.7.5/technical/pack-protocol.txt +*/ diff --git a/plumbing/format/idxfile/encoder.go b/plumbing/format/idxfile/encoder.go new file mode 100644 index 0000000..164414a --- /dev/null +++ b/plumbing/format/idxfile/encoder.go @@ -0,0 +1,131 @@ +package idxfile + +import ( + "crypto/sha1" + "hash" + "io" + "sort" + + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +// An Encoder writes idx files to an output stream. +type Encoder struct { + io.Writer + hash hash.Hash +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := sha1.New() + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +// Encode writes the idx in an idx file format to the stream of the encoder. +func (e *Encoder) Encode(idx *Idxfile) (int, error) { + idx.Entries.Sort() + + flow := []func(*Idxfile) (int, error){ + e.encodeHeader, + e.encodeFanout, + e.encodeHashes, + e.encodeCRC32, + e.encodeOffsets, + e.encodeChecksums, + } + + sz := 0 + for _, f := range flow { + i, err := f(idx) + sz += i + + if err != nil { + return sz, err + } + } + + return sz, nil +} + +func (e *Encoder) encodeHeader(idx *Idxfile) (int, error) { + c, err := e.Write(idxHeader) + if err != nil { + return c, err + } + + return c + 4, binary.WriteUint32(e, idx.Version) +} + +func (e *Encoder) encodeFanout(idx *Idxfile) (int, error) { + fanout := idx.calculateFanout() + for _, c := range fanout { + if err := binary.WriteUint32(e, c); err != nil { + return 0, err + } + } + + return 1024, nil +} + +func (e *Encoder) encodeHashes(idx *Idxfile) (int, error) { + sz := 0 + for _, ent := range idx.Entries { + i, err := e.Write(ent.Hash[:]) + sz += i + + if err != nil { + return sz, err + } + } + + return sz, nil +} + +func (e *Encoder) encodeCRC32(idx *Idxfile) (int, error) { + sz := 0 + for _, ent := range idx.Entries { + err := binary.Write(e, ent.CRC32) + sz += 4 + + if err != nil { + return sz, err + } + } + + return sz, nil +} + +func (e *Encoder) encodeOffsets(idx *Idxfile) (int, error) { + sz := 0 + for _, ent := range idx.Entries { + if err := binary.WriteUint32(e, uint32(ent.Offset)); err != nil { + return sz, err + } + + sz += 4 + + } + + return sz, nil +} + +func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) { + if _, err := e.Write(idx.PackfileChecksum[:]); err != nil { + return 0, err + } + + copy(idx.IdxChecksum[:], e.hash.Sum(nil)[:20]) + if _, err := e.Write(idx.IdxChecksum[:]); err != nil { + return 0, err + } + + return 40, nil +} + +type EntryList []Entry + +func (p EntryList) Len() int { return len(p) } +func (p EntryList) Less(i, j int) bool { return p[i].Hash.String() < p[j].Hash.String() } +func (p EntryList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } +func (p EntryList) Sort() { sort.Sort(p) } diff --git a/plumbing/format/idxfile/encoder_test.go b/plumbing/format/idxfile/encoder_test.go new file mode 100644 index 0000000..9a53863 --- /dev/null +++ b/plumbing/format/idxfile/encoder_test.go @@ -0,0 +1,48 @@ +package idxfile + +import ( + "bytes" + "io/ioutil" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git.v4/fixtures" + "gopkg.in/src-d/go-git.v4/plumbing" +) + +func (s *IdxfileSuite) TestEncode(c *C) { + expected := &Idxfile{} + expected.Add(plumbing.NewHash("4bfc730165c370df4a012afbb45ba3f9c332c0d4"), 82, 82) + expected.Add(plumbing.NewHash("8fa2238efdae08d83c12ee176fae65ff7c99af46"), 42, 42) + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + _, err := e.Encode(expected) + c.Assert(err, IsNil) + + idx := &Idxfile{} + d := NewDecoder(buf) + err = d.Decode(idx) + c.Assert(err, IsNil) + + c.Assert(idx.Entries, DeepEquals, expected.Entries) +} + +func (s *IdxfileSuite) TestDecodeEncode(c *C) { + fixtures.ByTag("packfile").Test(c, func(f *fixtures.Fixture) { + expected, err := ioutil.ReadAll(f.Idx()) + c.Assert(err, IsNil) + + idx := &Idxfile{} + d := NewDecoder(bytes.NewBuffer(expected)) + err = d.Decode(idx) + c.Assert(err, IsNil) + + result := bytes.NewBuffer(nil) + e := NewEncoder(result) + size, err := e.Encode(idx) + c.Assert(err, IsNil) + + c.Assert(size, Equals, len(expected)) + c.Assert(result.Bytes(), DeepEquals, expected) + }) +} diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go new file mode 100644 index 0000000..8329c23 --- /dev/null +++ b/plumbing/format/idxfile/idxfile.go @@ -0,0 +1,62 @@ +package idxfile + +import "gopkg.in/src-d/go-git.v4/plumbing" + +const ( + // VersionSupported is the only idx version supported. + VersionSupported = 2 +) + +var ( + idxHeader = []byte{255, 't', 'O', 'c'} +) + +// An Idxfile represents an idx file in memory. +type Idxfile struct { + Version uint32 + Fanout [255]uint32 + ObjectCount uint32 + Entries EntryList + PackfileChecksum [20]byte + IdxChecksum [20]byte +} + +// An Entry represents data about an object in the packfile: its hash, +// offset and CRC32 checksum. +type Entry struct { + Hash plumbing.Hash + CRC32 uint32 + Offset uint64 +} + +func (idx *Idxfile) Add(h plumbing.Hash, offset uint64, crc32 uint32) { + idx.Entries = append(idx.Entries, Entry{ + Hash: h, + Offset: offset, + CRC32: crc32, + }) +} + +func (idx *Idxfile) isValid() bool { + fanout := idx.calculateFanout() + for k, c := range idx.Fanout { + if fanout[k] != c { + return false + } + } + + return true +} + +func (idx *Idxfile) calculateFanout() [256]uint32 { + fanout := [256]uint32{} + for _, e := range idx.Entries { + fanout[e.Hash[0]]++ + } + + for i := 1; i < 256; i++ { + fanout[i] += fanout[i-1] + } + + return fanout +} |