From 81c5d2c6c672509ee7f30a346b890f3920ff20c1 Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Thu, 24 Nov 2016 15:15:59 +0100 Subject: plumbing/packfile: PACK encoder (#131) * plumbing/packfile: PACK encoder - Added simple PACK encoder, deltas not supported by now * Requested changes * Requested changes * Requested changes --- plumbing/format/packfile/common.go | 15 ++++ plumbing/format/packfile/encoder.go | 116 +++++++++++++++++++++++++ plumbing/format/packfile/encoder_test.go | 140 +++++++++++++++++++++++++++++++ plumbing/format/packfile/scanner.go | 16 +--- 4 files changed, 272 insertions(+), 15 deletions(-) create mode 100644 plumbing/format/packfile/common.go create mode 100644 plumbing/format/packfile/encoder.go create mode 100644 plumbing/format/packfile/encoder_test.go (limited to 'plumbing/format') diff --git a/plumbing/format/packfile/common.go b/plumbing/format/packfile/common.go new file mode 100644 index 0000000..1656551 --- /dev/null +++ b/plumbing/format/packfile/common.go @@ -0,0 +1,15 @@ +package packfile + +var signature = []byte{'P', 'A', 'C', 'K'} + +const ( + // VersionSupported is the packfile version supported by this package + VersionSupported uint32 = 2 + + firstLengthBits = uint8(4) // the first byte into object header has 4 bits to store the length + lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length + maskFirstLength = 15 // 0000 1111 + maskContinue = 0x80 // 1000 0000 + maskLength = uint8(127) // 0111 1111 + maskType = uint8(112) // 0111 0000 +) diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go new file mode 100644 index 0000000..1404dbe --- /dev/null +++ b/plumbing/format/packfile/encoder.go @@ -0,0 +1,116 @@ +package packfile + +import ( + "compress/zlib" + "crypto/sha1" + "fmt" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +// Encoder gets the data from the storage and write it into the writer in PACK +// format +type Encoder struct { + storage storer.ObjectStorer + w io.Writer + zw *zlib.Writer + hasher plumbing.Hasher +} + +// NewEncoder creates a new packfile encoder using a specific Writer and +// ObjectStorer +func NewEncoder(w io.Writer, s storer.ObjectStorer) *Encoder { + h := plumbing.Hasher{ + Hash: sha1.New(), + } + mw := io.MultiWriter(w, h) + zw := zlib.NewWriter(mw) + return &Encoder{ + storage: s, + w: mw, + zw: zw, + hasher: h, + } +} + +// Encode creates a packfile containing all the objects referenced in hashes +// and writes it to the writer in the Encoder. +func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) { + if err := e.head(len(hashes)); err != nil { + return plumbing.ZeroHash, err + } + + for _, h := range hashes { + o, err := e.storage.Object(plumbing.AnyObject, h) + if err != nil { + return plumbing.ZeroHash, err + } + + if err := e.entry(o); err != nil { + return plumbing.ZeroHash, err + } + } + + return e.footer() +} + +func (e *Encoder) head(numEntries int) error { + return binary.Write( + e.w, + signature, + int32(VersionSupported), + int32(numEntries), + ) +} + +func (e *Encoder) entry(o plumbing.Object) error { + t := o.Type() + if t == plumbing.OFSDeltaObject || t == plumbing.REFDeltaObject { + // TODO implements delta objects + return fmt.Errorf("delta object not supported: %v", t) + } + + if err := e.entryHead(t, o.Size()); err != nil { + return err + } + + e.zw.Reset(e.w) + or, err := o.Reader() + if err != nil { + return err + } + _, err = io.Copy(e.zw, or) + if err != nil { + return err + } + + return e.zw.Close() +} + +func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error { + t := int64(typeNum) + header := []byte{} + c := (t << firstLengthBits) | (size & maskFirstLength) + size >>= firstLengthBits + for { + if size == 0 { + break + } + header = append(header, byte(c|maskContinue)) + c = size & int64(maskLength) + size >>= lengthBits + } + + header = append(header, byte(c)) + _, err := e.w.Write(header) + + return err +} + +func (e *Encoder) footer() (plumbing.Hash, error) { + h := e.hasher.Sum() + return h, binary.Write(e.w, h) +} diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go new file mode 100644 index 0000000..729843d --- /dev/null +++ b/plumbing/format/packfile/encoder_test.go @@ -0,0 +1,140 @@ +package packfile + +import ( + "bytes" + + "gopkg.in/src-d/go-git.v4/fixtures" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/storage/memory" + + . "gopkg.in/check.v1" +) + +type EncoderSuite struct { + fixtures.Suite + buf *bytes.Buffer + store *memory.Storage + enc *Encoder +} + +var _ = Suite(&EncoderSuite{}) + +func (s *EncoderSuite) SetUpTest(c *C) { + s.buf = bytes.NewBuffer(nil) + s.store = memory.NewStorage() + s.enc = NewEncoder(s.buf, s.store) +} + +func (s *EncoderSuite) TestCorrectPackHeader(c *C) { + hash, err := s.enc.Encode([]plumbing.Hash{}) + c.Assert(err, IsNil) + + hb := [20]byte(hash) + + // PACK + VERSION + OBJECTS + HASH + expectedResult := []byte{'P', 'A', 'C', 'K', 0, 0, 0, 2, 0, 0, 0, 0} + expectedResult = append(expectedResult, hb[:]...) + + result := s.buf.Bytes() + + c.Assert(result, DeepEquals, expectedResult) +} + +func (s *EncoderSuite) TestCorrectPackWithOneEmptyObject(c *C) { + o := &plumbing.MemoryObject{} + o.SetType(plumbing.CommitObject) + o.SetSize(0) + _, err := s.store.SetObject(o) + c.Assert(err, IsNil) + + hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}) + c.Assert(err, IsNil) + + // PACK + VERSION(2) + OBJECT NUMBER(1) + expectedResult := []byte{'P', 'A', 'C', 'K', 0, 0, 0, 2, 0, 0, 0, 1} + // OBJECT HEADER(TYPE + SIZE)= 0001 0000 + expectedResult = append(expectedResult, []byte{16}...) + + // Zlib header + expectedResult = append(expectedResult, + []byte{120, 156, 1, 0, 0, 255, 255, 0, 0, 0, 1}...) + + // + HASH + hb := [20]byte(hash) + expectedResult = append(expectedResult, hb[:]...) + + result := s.buf.Bytes() + + c.Assert(result, DeepEquals, expectedResult) +} + +func (s *EncoderSuite) TestMaxObjectSize(c *C) { + o := s.store.NewObject() + o.SetSize(9223372036854775807) + o.SetType(plumbing.CommitObject) + _, err := s.store.SetObject(o) + c.Assert(err, IsNil) + hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}) + c.Assert(err, IsNil) + c.Assert(hash.IsZero(), Not(Equals), true) +} + +func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) { + fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { + scanner := NewScanner(f.Packfile()) + storage := memory.NewStorage() + + d, err := NewDecoder(scanner, storage) + c.Assert(err, IsNil) + + ch, err := d.Decode() + c.Assert(err, IsNil) + c.Assert(ch, Equals, f.PackfileHash) + + commitIter, err := d.o.IterObjects(plumbing.AnyObject) + c.Assert(err, IsNil) + + objects := []plumbing.Object{} + hashes := []plumbing.Hash{} + err = commitIter.ForEach(func(o plumbing.Object) error { + objects = append(objects, o) + hash, err := s.store.SetObject(o) + hashes = append(hashes, hash) + + return err + + }) + c.Assert(err, IsNil) + _, err = s.enc.Encode(hashes) + c.Assert(err, IsNil) + + scanner = NewScanner(s.buf) + storage = memory.NewStorage() + d, err = NewDecoder(scanner, storage) + c.Assert(err, IsNil) + _, err = d.Decode() + c.Assert(err, IsNil) + + commitIter, err = d.o.IterObjects(plumbing.AnyObject) + c.Assert(err, IsNil) + obtainedObjects := []plumbing.Object{} + err = commitIter.ForEach(func(o plumbing.Object) error { + obtainedObjects = append(obtainedObjects, o) + + return nil + }) + c.Assert(err, IsNil) + c.Assert(len(obtainedObjects), Equals, len(objects)) + + equals := 0 + for _, oo := range obtainedObjects { + for _, o := range objects { + if o.Hash() == oo.Hash() { + equals++ + } + } + } + + c.Assert(len(obtainedObjects), Equals, equals) + }) +} diff --git a/plumbing/format/packfile/scanner.go b/plumbing/format/packfile/scanner.go index 130bb94..3adc26a 100644 --- a/plumbing/format/packfile/scanner.go +++ b/plumbing/format/packfile/scanner.go @@ -26,11 +26,6 @@ var ( ErrSeekNotSupported = NewError("not seek support") ) -const ( - // VersionSupported is the packfile version supported by this parser. - VersionSupported uint32 = 2 -) - // ObjectHeader contains the information related to the object, this information // is collected from the previous bytes to the content of the object. type ObjectHeader struct { @@ -124,7 +119,7 @@ func (s *Scanner) readSignature() ([]byte, error) { // isValidSignature returns if sig is a valid packfile signature. func (s *Scanner) isValidSignature(sig []byte) bool { - return bytes.Equal(sig, []byte{'P', 'A', 'C', 'K'}) + return bytes.Equal(sig, signature) } // readVersion reads and returns the version field of a packfile. @@ -230,15 +225,6 @@ func (s *Scanner) readObjectTypeAndLength() (plumbing.ObjectType, int64, error) return t, l, err } -const ( - maskType = uint8(112) // 0111 0000 - maskFirstLength = uint8(15) // 0000 1111 - maskContinue = uint8(128) // 1000 000 - firstLengthBits = uint8(4) // the first byte has 4 bits to store the length - maskLength = uint8(127) // 0111 1111 - lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length -) - func (s *Scanner) readType() (plumbing.ObjectType, byte, error) { var c byte var err error -- cgit