From 6b7464a22c6177d9e0cf96e1aaaae13c127c3149 Mon Sep 17 00:00:00 2001 From: Máximo Cuadros Date: Mon, 31 Oct 2016 19:44:29 +0000 Subject: format: index encoder and index decoder improvements (#105) --- formats/index/decoder.go | 163 ++++++++++++++++++++++++++---------------- formats/index/decoder_test.go | 10 +-- formats/index/doc.go | 2 +- formats/index/encoder.go | 141 ++++++++++++++++++++++++++++++++++++ formats/index/encoder_test.go | 78 ++++++++++++++++++++ formats/index/index.go | 46 ++++++++---- 6 files changed, 362 insertions(+), 78 deletions(-) create mode 100644 formats/index/encoder.go create mode 100644 formats/index/encoder_test.go (limited to 'formats') diff --git a/formats/index/decoder.go b/formats/index/decoder.go index 8e37fd1..f3d4343 100644 --- a/formats/index/decoder.go +++ b/formats/index/decoder.go @@ -2,7 +2,9 @@ package index import ( "bytes" + "crypto/sha1" "errors" + "hash" "io" "io/ioutil" "strconv" @@ -13,38 +15,42 @@ import ( ) var ( - // IndexVersionSupported is the range of supported index versions - IndexVersionSupported = struct{ Min, Max uint32 }{Min: 2, Max: 4} + // DecodeVersionSupported is the range of supported index versions + DecodeVersionSupported = struct{ Min, Max uint32 }{Min: 2, Max: 4} - // ErrUnsupportedVersion is returned by Decode when the idxindex file - // version is not supported. - ErrUnsupportedVersion = errors.New("Unsuported version") // ErrMalformedSignature is returned by Decode when the index header file is // malformed - ErrMalformedSignature = errors.New("Malformed index signature file") + ErrMalformedSignature = errors.New("malformed index signature file") + // ErrInvalidChecksum is returned by Decode if the SHA1 hash missmatch with + // the read content + ErrInvalidChecksum = errors.New("invalid checksum") - indexSignature = []byte{'D', 'I', 'R', 'C'} - treeExtSignature = []byte{'T', 'R', 'E', 'E'} - resolveUndoExtSignature = []byte{'R', 'E', 'U', 'C'} + errUnknownExtension = errors.New("unknown extension") ) const ( - EntryExtended = 0x4000 - EntryValid = 0x8000 - - nameMask = 0xfff - intentToAddMask = 1 << 13 - skipWorkTreeMask = 1 << 14 + entryHeaderLength = 62 + entryExtended = 0x4000 + entryValid = 0x8000 + nameMask = 0xfff + intentToAddMask = 1 << 13 + skipWorkTreeMask = 1 << 14 ) +// A Decoder reads and decodes idx files from an input stream. type Decoder struct { r io.Reader + hash hash.Hash lastEntry *Entry } // NewDecoder returns a new decoder that reads from r. func NewDecoder(r io.Reader) *Decoder { - return &Decoder{r: r} + h := sha1.New() + return &Decoder{ + r: io.TeeReader(r, h), + hash: h, + } } // Decode reads the whole index object from its input and stores it in the @@ -56,20 +62,20 @@ func (d *Decoder) Decode(idx *Index) error { return err } - idx.EntryCount, err = binary.ReadUint32(d.r) + entryCount, err := binary.ReadUint32(d.r) if err != nil { return err } - if err := d.readEntries(idx); err != nil { + if err := d.readEntries(idx, int(entryCount)); err != nil { return err } return d.readExtensions(idx) } -func (d *Decoder) readEntries(idx *Index) error { - for i := 0; i < int(idx.EntryCount); i++ { +func (d *Decoder) readEntries(idx *Index, count int) error { + for i := 0; i < count; i++ { e, err := d.readEntry(idx) if err != nil { return err @@ -86,11 +92,11 @@ func (d *Decoder) readEntry(idx *Index) (*Entry, error) { e := &Entry{} var msec, mnsec, sec, nsec uint32 + var flags uint16 - flowSize := 62 flow := []interface{}{ - &msec, &mnsec, &sec, &nsec, + &msec, &mnsec, &e.Dev, &e.Inode, &e.Mode, @@ -98,19 +104,19 @@ func (d *Decoder) readEntry(idx *Index) (*Entry, error) { &e.GID, &e.Size, &e.Hash, - &e.Flags, + &flags, } if err := binary.Read(d.r, flow...); err != nil { return nil, err } - read := flowSize - e.CreatedAt = time.Unix(int64(msec), int64(mnsec)) - e.ModifiedAt = time.Unix(int64(sec), int64(nsec)) - e.Stage = Stage(e.Flags>>12) & 0x3 + read := entryHeaderLength + e.CreatedAt = time.Unix(int64(sec), int64(nsec)) + e.ModifiedAt = time.Unix(int64(msec), int64(mnsec)) + e.Stage = Stage(flags>>12) & 0x3 - if e.Flags&EntryExtended != 0 { + if flags&entryExtended != 0 { extended, err := binary.ReadUint16(d.r) if err != nil { return nil, err @@ -121,20 +127,21 @@ func (d *Decoder) readEntry(idx *Index) (*Entry, error) { e.SkipWorktree = extended&skipWorkTreeMask != 0 } - if err := d.readEntryName(idx, e); err != nil { + if err := d.readEntryName(idx, e, flags); err != nil { return nil, err } return e, d.padEntry(idx, e, read) } -func (d *Decoder) readEntryName(idx *Index, e *Entry) error { +func (d *Decoder) readEntryName(idx *Index, e *Entry, flags uint16) error { var name string var err error switch idx.Version { case 2, 3: - name, err = d.doReadEntryName(e) + len := flags & nameMask + name, err = d.doReadEntryName(len) case 4: name, err = d.doReadEntryNameV4() default: @@ -168,10 +175,8 @@ func (d *Decoder) doReadEntryNameV4() (string, error) { return base + string(name), nil } -func (d *Decoder) doReadEntryName(e *Entry) (string, error) { - pLen := e.Flags & nameMask - - name := make([]byte, int64(pLen)) +func (d *Decoder) doReadEntryName(len uint16) (string, error) { + name := make([]byte, len) if err := binary.Read(d.r, &name); err != nil { return "", err } @@ -195,50 +200,88 @@ func (d *Decoder) padEntry(idx *Index, e *Entry, read int) error { return nil } +// TODO: support 'Split index' and 'Untracked cache' extensions, take in count +// that they are not supported by jgit or libgit func (d *Decoder) readExtensions(idx *Index) error { + var expected []byte var err error + + var header [4]byte for { - err = d.readExtension(idx) + expected = d.hash.Sum(nil) + + var n int + if n, err = io.ReadFull(d.r, header[:]); err != nil { + if n == 0 { + err = io.EOF + } + + break + } + + err = d.readExtension(idx, header[:]) if err != nil { break } } - if err == io.EOF { - return nil + if err != errUnknownExtension { + return err } - return err + return d.readChecksum(expected, header) } -func (d *Decoder) readExtension(idx *Index) error { - var s = make([]byte, 4) - if _, err := io.ReadFull(d.r, s); err != nil { - return err +func (d *Decoder) readExtension(idx *Index, header []byte) error { + switch { + case bytes.Equal(header, treeExtSignature): + r, err := d.getExtensionReader() + if err != nil { + return err + } + + idx.Cache = &Tree{} + d := &treeExtensionDecoder{r} + if err := d.Decode(idx.Cache); err != nil { + return err + } + case bytes.Equal(header, resolveUndoExtSignature): + r, err := d.getExtensionReader() + if err != nil { + return err + } + + idx.ResolveUndo = &ResolveUndo{} + d := &resolveUndoDecoder{r} + if err := d.Decode(idx.ResolveUndo); err != nil { + return err + } + default: + return errUnknownExtension } + return nil +} + +func (d *Decoder) getExtensionReader() (io.Reader, error) { len, err := binary.ReadUint32(d.r) if err != nil { - return err + return nil, err } - switch { - case bytes.Equal(s, treeExtSignature): - t := &Tree{} - td := &treeExtensionDecoder{&io.LimitedReader{R: d.r, N: int64(len)}} - if err := td.Decode(t); err != nil { - return err - } + return &io.LimitedReader{R: d.r, N: int64(len)}, nil +} - idx.Cache = t - case bytes.Equal(s, resolveUndoExtSignature): - ru := &ResolveUndo{} - rud := &resolveUndoDecoder{&io.LimitedReader{R: d.r, N: int64(len)}} - if err := rud.Decode(ru); err != nil { - return err - } +func (d *Decoder) readChecksum(expected []byte, alreadyRead [4]byte) error { + var h core.Hash + copy(h[:4], alreadyRead[:]) + + if err := binary.Read(d.r, h[4:]); err != nil { + return err + } - idx.ResolveUndo = ru + if bytes.Compare(h[:], expected) != 0 { + return ErrInvalidChecksum } return nil @@ -259,7 +302,7 @@ func validateHeader(r io.Reader) (version uint32, err error) { return 0, err } - if version < IndexVersionSupported.Min || version > IndexVersionSupported.Max { + if version < DecodeVersionSupported.Min || version > DecodeVersionSupported.Max { return 0, ErrUnsupportedVersion } diff --git a/formats/index/decoder_test.go b/formats/index/decoder_test.go index cf4c872..a05417d 100644 --- a/formats/index/decoder_test.go +++ b/formats/index/decoder_test.go @@ -26,7 +26,7 @@ func (s *IdxfileSuite) TestDecode(c *C) { c.Assert(err, IsNil) c.Assert(idx.Version, Equals, uint32(2)) - c.Assert(idx.EntryCount, Equals, uint32(9)) + c.Assert(idx.Entries, HasLen, 9) } func (s *IdxfileSuite) TestDecodeEntries(c *C) { @@ -97,7 +97,7 @@ func (s *IdxfileSuite) TestDecodeMergeConflict(c *C) { c.Assert(err, IsNil) c.Assert(idx.Version, Equals, uint32(2)) - c.Assert(idx.EntryCount, Equals, uint32(13)) + c.Assert(idx.Entries, HasLen, 13) expected := []struct { Stage Stage @@ -136,7 +136,7 @@ func (s *IdxfileSuite) TestDecodeExtendedV3(c *C) { c.Assert(err, IsNil) c.Assert(idx.Version, Equals, uint32(3)) - c.Assert(idx.EntryCount, Equals, uint32(11)) + c.Assert(idx.Entries, HasLen, 11) c.Assert(idx.Entries[6].Name, Equals, "intent-to-add") c.Assert(idx.Entries[6].IntentToAdd, Equals, true) @@ -153,7 +153,7 @@ func (s *IdxfileSuite) TestDecodeResolveUndo(c *C) { c.Assert(err, IsNil) c.Assert(idx.Version, Equals, uint32(2)) - c.Assert(idx.EntryCount, Equals, uint32(8)) + c.Assert(idx.Entries, HasLen, 8) ru := idx.ResolveUndo c.Assert(ru.Entries, HasLen, 2) @@ -178,7 +178,7 @@ func (s *IdxfileSuite) TestDecodeV4(c *C) { c.Assert(err, IsNil) c.Assert(idx.Version, Equals, uint32(4)) - c.Assert(idx.EntryCount, Equals, uint32(11)) + c.Assert(idx.Entries, HasLen, 11) names := []string{ ".gitignore", "CHANGELOG", "LICENSE", "binary.jpg", "go/example.go", diff --git a/formats/index/doc.go b/formats/index/doc.go index 285eade..00466af 100644 --- a/formats/index/doc.go +++ b/formats/index/doc.go @@ -1,4 +1,4 @@ -// Package idxfile implements a encoder/decoder of index format files +// Package index implements a encoder/decoder of index format files package index /* diff --git a/formats/index/encoder.go b/formats/index/encoder.go new file mode 100644 index 0000000..94fbc68 --- /dev/null +++ b/formats/index/encoder.go @@ -0,0 +1,141 @@ +package index + +import ( + "bytes" + "crypto/sha1" + "errors" + "hash" + "io" + "time" + + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +var ( + // EncodeVersionSupported is the range of supported index versions + EncodeVersionSupported uint32 = 2 + + // ErrInvalidTimestamp is returned by Encode if a Index with a Entry with + // negative timestamp values + ErrInvalidTimestamp = errors.New("negative timestamps are not allowed") +) + +// An Encoder writes an Index to an output stream. +type Encoder struct { + w io.Writer + hash hash.Hash +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := sha1.New() + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +// Encode writes the Index to the stream of the encoder. +func (e *Encoder) Encode(idx *Index) error { + // TODO: support versions v3 and v4 + // TODO: support extensions + if idx.Version != EncodeVersionSupported { + return ErrUnsupportedVersion + } + + if err := e.encodeHeader(idx); err != nil { + return err + } + + if err := e.encodeEntries(idx); err != nil { + return err + } + + return e.encodeFooter() +} + +func (e *Encoder) encodeHeader(idx *Index) error { + return binary.Write(e.w, + indexSignature, + idx.Version, + uint32(len(idx.Entries)), + ) +} + +func (e *Encoder) encodeEntries(idx *Index) error { + for _, entry := range idx.Entries { + if err := e.encodeEntry(&entry); err != nil { + return err + } + + wrote := entryHeaderLength + len(entry.Name) + if err := e.padEntry(wrote); err != nil { + return err + } + } + + return nil +} + +func (e *Encoder) encodeEntry(entry *Entry) error { + if entry.IntentToAdd || entry.SkipWorktree { + return ErrUnsupportedVersion + } + + sec, nsec, err := e.timeToUint32(&entry.CreatedAt) + if err != nil { + return err + } + + msec, mnsec, err := e.timeToUint32(&entry.ModifiedAt) + if err != nil { + return err + } + + flags := uint16(entry.Stage&0x3) << 12 + if l := len(entry.Name); l < nameMask { + flags |= uint16(l) + } else { + flags |= nameMask + } + + flow := []interface{}{ + sec, nsec, + msec, mnsec, + entry.Dev, + entry.Inode, + entry.Mode, + entry.UID, + entry.GID, + entry.Size, + entry.Hash[:], + flags, + } + + if err := binary.Write(e.w, flow...); err != nil { + return err + } + + return binary.Write(e.w, []byte(entry.Name)) +} + +func (e *Encoder) timeToUint32(t *time.Time) (uint32, uint32, error) { + if t.IsZero() { + return 0, 0, nil + } + + if t.Unix() < 0 || t.UnixNano() < 0 { + return 0, 0, ErrInvalidTimestamp + } + + return uint32(t.Unix()), uint32(t.Nanosecond()), nil +} + +func (e *Encoder) padEntry(wrote int) error { + padLen := 8 - wrote%8 + + _, err := e.w.Write(bytes.Repeat([]byte{'\x00'}, padLen)) + return err +} + +func (e *Encoder) encodeFooter() error { + return binary.Write(e.w, e.hash.Sum(nil)) +} diff --git a/formats/index/encoder_test.go b/formats/index/encoder_test.go new file mode 100644 index 0000000..3085988 --- /dev/null +++ b/formats/index/encoder_test.go @@ -0,0 +1,78 @@ +package index + +import ( + "bytes" + "strings" + "time" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git.v4/core" +) + +func (s *IdxfileSuite) TestEncode(c *C) { + idx := &Index{ + Version: 2, + Entries: []Entry{{ + CreatedAt: time.Now(), + ModifiedAt: time.Now(), + Dev: 4242, + Inode: 424242, + UID: 84, + GID: 8484, + Size: 42, + Stage: TheirMode, + Hash: core.NewHash("e25b29c8946e0e192fae2edc1dabf7be71e8ecf3"), + Name: "foo", + }, { + CreatedAt: time.Now(), + ModifiedAt: time.Now(), + Name: strings.Repeat(" ", 20), + Size: 82, + }}, + } + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + err := e.Encode(idx) + c.Assert(err, IsNil) + + output := &Index{} + d := NewDecoder(buf) + err = d.Decode(output) + c.Assert(err, IsNil) + + c.Assert(idx, DeepEquals, output) +} + +func (s *IdxfileSuite) TestEncodeUnsuportedVersion(c *C) { + idx := &Index{Version: 3} + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + err := e.Encode(idx) + c.Assert(err, Equals, ErrUnsupportedVersion) +} + +func (s *IdxfileSuite) TestEncodeWithIntentToAddUnsuportedVersion(c *C) { + idx := &Index{ + Version: 2, + Entries: []Entry{{IntentToAdd: true}}, + } + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + err := e.Encode(idx) + c.Assert(err, Equals, ErrUnsupportedVersion) +} + +func (s *IdxfileSuite) TestEncodeWithSkipWorktreeUnsuportedVersion(c *C) { + idx := &Index{ + Version: 2, + Entries: []Entry{{SkipWorktree: true}}, + } + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + err := e.Encode(idx) + c.Assert(err, Equals, ErrUnsupportedVersion) +} diff --git a/formats/index/index.go b/formats/index/index.go index bea199e..35a5391 100644 --- a/formats/index/index.go +++ b/formats/index/index.go @@ -1,12 +1,24 @@ package index import ( + "errors" "os" "time" "gopkg.in/src-d/go-git.v4/core" ) +var ( + // ErrUnsupportedVersion is returned by Decode when the idxindex file + // version is not supported. + ErrUnsupportedVersion = errors.New("Unsuported version") + + indexSignature = []byte{'D', 'I', 'R', 'C'} + treeExtSignature = []byte{'T', 'R', 'E', 'E'} + resolveUndoExtSignature = []byte{'R', 'E', 'U', 'C'} +) + +// Stage during merge type Stage int const ( @@ -25,7 +37,6 @@ const ( // worktree are detected using this Index. The Index is also used during merges type Index struct { Version uint32 - EntryCount uint32 Entries []Entry Cache *Tree ResolveUndo *ResolveUndo @@ -35,20 +46,31 @@ type Index struct { // represents exactly one stage of a file. If a file path is unmerged then // multiple Entry instances may appear for the same path name. type Entry struct { - CreatedAt time.Time + // Hash is the SHA1 of the represented file + Hash core.Hash + // Name is the Entry path name relative to top level directory + Name string + // CreatedAt time when the tracked path was created + CreatedAt time.Time + // ModifiedAt time when the tracked path was changed ModifiedAt time.Time + // Dev and Inode of the tracked path Dev, Inode uint32 - Mode os.FileMode - UID, GID uint32 - Size uint32 - Flags uint16 - Stage Stage - + // Mode of the path + Mode os.FileMode + // UID and GID, userid and group id of the owner + UID, GID uint32 + // Size is the length in bytes for regular files + Size uint32 + // Stage on a merge is defines what stage is representing this entry + // https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging + Stage Stage + // SkipWorktree used in sparse checkouts + // https://git-scm.com/docs/git-read-tree#_sparse_checkout SkipWorktree bool - IntentToAdd bool - - Hash core.Hash - Name string + // IntentToAdd record only the fact that the path will be added later + // https://git-scm.com/docs/git-add ("git add -N") + IntentToAdd bool } // Tree contains pre-computed hashes for trees that can be derived from the -- cgit