aboutsummaryrefslogtreecommitdiffstats
path: root/formats
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2016-10-31 19:44:29 +0000
committerGitHub <noreply@github.com>2016-10-31 19:44:29 +0000
commit6b7464a22c6177d9e0cf96e1aaaae13c127c3149 (patch)
tree70ac03894fafe43deb5b62ba18afa45f79507695 /formats
parent5078f52a9f2217027b0f475d5a91e677b3228588 (diff)
downloadgo-git-6b7464a22c6177d9e0cf96e1aaaae13c127c3149.tar.gz
format: index encoder and index decoder improvements (#105)
Diffstat (limited to 'formats')
-rw-r--r--formats/index/decoder.go163
-rw-r--r--formats/index/decoder_test.go10
-rw-r--r--formats/index/doc.go2
-rw-r--r--formats/index/encoder.go141
-rw-r--r--formats/index/encoder_test.go78
-rw-r--r--formats/index/index.go46
6 files changed, 362 insertions, 78 deletions
diff --git a/formats/index/decoder.go b/formats/index/decoder.go
index 8e37fd1..f3d4343 100644
--- a/formats/index/decoder.go
+++ b/formats/index/decoder.go
@@ -2,7 +2,9 @@ package index
import (
"bytes"
+ "crypto/sha1"
"errors"
+ "hash"
"io"
"io/ioutil"
"strconv"
@@ -13,38 +15,42 @@ import (
)
var (
- // IndexVersionSupported is the range of supported index versions
- IndexVersionSupported = struct{ Min, Max uint32 }{Min: 2, Max: 4}
+ // DecodeVersionSupported is the range of supported index versions
+ DecodeVersionSupported = struct{ Min, Max uint32 }{Min: 2, Max: 4}
- // ErrUnsupportedVersion is returned by Decode when the idxindex file
- // version is not supported.
- ErrUnsupportedVersion = errors.New("Unsuported version")
// ErrMalformedSignature is returned by Decode when the index header file is
// malformed
- ErrMalformedSignature = errors.New("Malformed index signature file")
+ ErrMalformedSignature = errors.New("malformed index signature file")
+ // ErrInvalidChecksum is returned by Decode if the SHA1 hash missmatch with
+ // the read content
+ ErrInvalidChecksum = errors.New("invalid checksum")
- indexSignature = []byte{'D', 'I', 'R', 'C'}
- treeExtSignature = []byte{'T', 'R', 'E', 'E'}
- resolveUndoExtSignature = []byte{'R', 'E', 'U', 'C'}
+ errUnknownExtension = errors.New("unknown extension")
)
const (
- EntryExtended = 0x4000
- EntryValid = 0x8000
-
- nameMask = 0xfff
- intentToAddMask = 1 << 13
- skipWorkTreeMask = 1 << 14
+ entryHeaderLength = 62
+ entryExtended = 0x4000
+ entryValid = 0x8000
+ nameMask = 0xfff
+ intentToAddMask = 1 << 13
+ skipWorkTreeMask = 1 << 14
)
+// A Decoder reads and decodes idx files from an input stream.
type Decoder struct {
r io.Reader
+ hash hash.Hash
lastEntry *Entry
}
// NewDecoder returns a new decoder that reads from r.
func NewDecoder(r io.Reader) *Decoder {
- return &Decoder{r: r}
+ h := sha1.New()
+ return &Decoder{
+ r: io.TeeReader(r, h),
+ hash: h,
+ }
}
// Decode reads the whole index object from its input and stores it in the
@@ -56,20 +62,20 @@ func (d *Decoder) Decode(idx *Index) error {
return err
}
- idx.EntryCount, err = binary.ReadUint32(d.r)
+ entryCount, err := binary.ReadUint32(d.r)
if err != nil {
return err
}
- if err := d.readEntries(idx); err != nil {
+ if err := d.readEntries(idx, int(entryCount)); err != nil {
return err
}
return d.readExtensions(idx)
}
-func (d *Decoder) readEntries(idx *Index) error {
- for i := 0; i < int(idx.EntryCount); i++ {
+func (d *Decoder) readEntries(idx *Index, count int) error {
+ for i := 0; i < count; i++ {
e, err := d.readEntry(idx)
if err != nil {
return err
@@ -86,11 +92,11 @@ func (d *Decoder) readEntry(idx *Index) (*Entry, error) {
e := &Entry{}
var msec, mnsec, sec, nsec uint32
+ var flags uint16
- flowSize := 62
flow := []interface{}{
- &msec, &mnsec,
&sec, &nsec,
+ &msec, &mnsec,
&e.Dev,
&e.Inode,
&e.Mode,
@@ -98,19 +104,19 @@ func (d *Decoder) readEntry(idx *Index) (*Entry, error) {
&e.GID,
&e.Size,
&e.Hash,
- &e.Flags,
+ &flags,
}
if err := binary.Read(d.r, flow...); err != nil {
return nil, err
}
- read := flowSize
- e.CreatedAt = time.Unix(int64(msec), int64(mnsec))
- e.ModifiedAt = time.Unix(int64(sec), int64(nsec))
- e.Stage = Stage(e.Flags>>12) & 0x3
+ read := entryHeaderLength
+ e.CreatedAt = time.Unix(int64(sec), int64(nsec))
+ e.ModifiedAt = time.Unix(int64(msec), int64(mnsec))
+ e.Stage = Stage(flags>>12) & 0x3
- if e.Flags&EntryExtended != 0 {
+ if flags&entryExtended != 0 {
extended, err := binary.ReadUint16(d.r)
if err != nil {
return nil, err
@@ -121,20 +127,21 @@ func (d *Decoder) readEntry(idx *Index) (*Entry, error) {
e.SkipWorktree = extended&skipWorkTreeMask != 0
}
- if err := d.readEntryName(idx, e); err != nil {
+ if err := d.readEntryName(idx, e, flags); err != nil {
return nil, err
}
return e, d.padEntry(idx, e, read)
}
-func (d *Decoder) readEntryName(idx *Index, e *Entry) error {
+func (d *Decoder) readEntryName(idx *Index, e *Entry, flags uint16) error {
var name string
var err error
switch idx.Version {
case 2, 3:
- name, err = d.doReadEntryName(e)
+ len := flags & nameMask
+ name, err = d.doReadEntryName(len)
case 4:
name, err = d.doReadEntryNameV4()
default:
@@ -168,10 +175,8 @@ func (d *Decoder) doReadEntryNameV4() (string, error) {
return base + string(name), nil
}
-func (d *Decoder) doReadEntryName(e *Entry) (string, error) {
- pLen := e.Flags & nameMask
-
- name := make([]byte, int64(pLen))
+func (d *Decoder) doReadEntryName(len uint16) (string, error) {
+ name := make([]byte, len)
if err := binary.Read(d.r, &name); err != nil {
return "", err
}
@@ -195,50 +200,88 @@ func (d *Decoder) padEntry(idx *Index, e *Entry, read int) error {
return nil
}
+// TODO: support 'Split index' and 'Untracked cache' extensions, take in count
+// that they are not supported by jgit or libgit
func (d *Decoder) readExtensions(idx *Index) error {
+ var expected []byte
var err error
+
+ var header [4]byte
for {
- err = d.readExtension(idx)
+ expected = d.hash.Sum(nil)
+
+ var n int
+ if n, err = io.ReadFull(d.r, header[:]); err != nil {
+ if n == 0 {
+ err = io.EOF
+ }
+
+ break
+ }
+
+ err = d.readExtension(idx, header[:])
if err != nil {
break
}
}
- if err == io.EOF {
- return nil
+ if err != errUnknownExtension {
+ return err
}
- return err
+ return d.readChecksum(expected, header)
}
-func (d *Decoder) readExtension(idx *Index) error {
- var s = make([]byte, 4)
- if _, err := io.ReadFull(d.r, s); err != nil {
- return err
+func (d *Decoder) readExtension(idx *Index, header []byte) error {
+ switch {
+ case bytes.Equal(header, treeExtSignature):
+ r, err := d.getExtensionReader()
+ if err != nil {
+ return err
+ }
+
+ idx.Cache = &Tree{}
+ d := &treeExtensionDecoder{r}
+ if err := d.Decode(idx.Cache); err != nil {
+ return err
+ }
+ case bytes.Equal(header, resolveUndoExtSignature):
+ r, err := d.getExtensionReader()
+ if err != nil {
+ return err
+ }
+
+ idx.ResolveUndo = &ResolveUndo{}
+ d := &resolveUndoDecoder{r}
+ if err := d.Decode(idx.ResolveUndo); err != nil {
+ return err
+ }
+ default:
+ return errUnknownExtension
}
+ return nil
+}
+
+func (d *Decoder) getExtensionReader() (io.Reader, error) {
len, err := binary.ReadUint32(d.r)
if err != nil {
- return err
+ return nil, err
}
- switch {
- case bytes.Equal(s, treeExtSignature):
- t := &Tree{}
- td := &treeExtensionDecoder{&io.LimitedReader{R: d.r, N: int64(len)}}
- if err := td.Decode(t); err != nil {
- return err
- }
+ return &io.LimitedReader{R: d.r, N: int64(len)}, nil
+}
- idx.Cache = t
- case bytes.Equal(s, resolveUndoExtSignature):
- ru := &ResolveUndo{}
- rud := &resolveUndoDecoder{&io.LimitedReader{R: d.r, N: int64(len)}}
- if err := rud.Decode(ru); err != nil {
- return err
- }
+func (d *Decoder) readChecksum(expected []byte, alreadyRead [4]byte) error {
+ var h core.Hash
+ copy(h[:4], alreadyRead[:])
+
+ if err := binary.Read(d.r, h[4:]); err != nil {
+ return err
+ }
- idx.ResolveUndo = ru
+ if bytes.Compare(h[:], expected) != 0 {
+ return ErrInvalidChecksum
}
return nil
@@ -259,7 +302,7 @@ func validateHeader(r io.Reader) (version uint32, err error) {
return 0, err
}
- if version < IndexVersionSupported.Min || version > IndexVersionSupported.Max {
+ if version < DecodeVersionSupported.Min || version > DecodeVersionSupported.Max {
return 0, ErrUnsupportedVersion
}
diff --git a/formats/index/decoder_test.go b/formats/index/decoder_test.go
index cf4c872..a05417d 100644
--- a/formats/index/decoder_test.go
+++ b/formats/index/decoder_test.go
@@ -26,7 +26,7 @@ func (s *IdxfileSuite) TestDecode(c *C) {
c.Assert(err, IsNil)
c.Assert(idx.Version, Equals, uint32(2))
- c.Assert(idx.EntryCount, Equals, uint32(9))
+ c.Assert(idx.Entries, HasLen, 9)
}
func (s *IdxfileSuite) TestDecodeEntries(c *C) {
@@ -97,7 +97,7 @@ func (s *IdxfileSuite) TestDecodeMergeConflict(c *C) {
c.Assert(err, IsNil)
c.Assert(idx.Version, Equals, uint32(2))
- c.Assert(idx.EntryCount, Equals, uint32(13))
+ c.Assert(idx.Entries, HasLen, 13)
expected := []struct {
Stage Stage
@@ -136,7 +136,7 @@ func (s *IdxfileSuite) TestDecodeExtendedV3(c *C) {
c.Assert(err, IsNil)
c.Assert(idx.Version, Equals, uint32(3))
- c.Assert(idx.EntryCount, Equals, uint32(11))
+ c.Assert(idx.Entries, HasLen, 11)
c.Assert(idx.Entries[6].Name, Equals, "intent-to-add")
c.Assert(idx.Entries[6].IntentToAdd, Equals, true)
@@ -153,7 +153,7 @@ func (s *IdxfileSuite) TestDecodeResolveUndo(c *C) {
c.Assert(err, IsNil)
c.Assert(idx.Version, Equals, uint32(2))
- c.Assert(idx.EntryCount, Equals, uint32(8))
+ c.Assert(idx.Entries, HasLen, 8)
ru := idx.ResolveUndo
c.Assert(ru.Entries, HasLen, 2)
@@ -178,7 +178,7 @@ func (s *IdxfileSuite) TestDecodeV4(c *C) {
c.Assert(err, IsNil)
c.Assert(idx.Version, Equals, uint32(4))
- c.Assert(idx.EntryCount, Equals, uint32(11))
+ c.Assert(idx.Entries, HasLen, 11)
names := []string{
".gitignore", "CHANGELOG", "LICENSE", "binary.jpg", "go/example.go",
diff --git a/formats/index/doc.go b/formats/index/doc.go
index 285eade..00466af 100644
--- a/formats/index/doc.go
+++ b/formats/index/doc.go
@@ -1,4 +1,4 @@
-// Package idxfile implements a encoder/decoder of index format files
+// Package index implements a encoder/decoder of index format files
package index
/*
diff --git a/formats/index/encoder.go b/formats/index/encoder.go
new file mode 100644
index 0000000..94fbc68
--- /dev/null
+++ b/formats/index/encoder.go
@@ -0,0 +1,141 @@
+package index
+
+import (
+ "bytes"
+ "crypto/sha1"
+ "errors"
+ "hash"
+ "io"
+ "time"
+
+ "gopkg.in/src-d/go-git.v4/utils/binary"
+)
+
+var (
+ // EncodeVersionSupported is the range of supported index versions
+ EncodeVersionSupported uint32 = 2
+
+ // ErrInvalidTimestamp is returned by Encode if a Index with a Entry with
+ // negative timestamp values
+ ErrInvalidTimestamp = errors.New("negative timestamps are not allowed")
+)
+
+// An Encoder writes an Index to an output stream.
+type Encoder struct {
+ w io.Writer
+ hash hash.Hash
+}
+
+// NewEncoder returns a new encoder that writes to w.
+func NewEncoder(w io.Writer) *Encoder {
+ h := sha1.New()
+ mw := io.MultiWriter(w, h)
+ return &Encoder{mw, h}
+}
+
+// Encode writes the Index to the stream of the encoder.
+func (e *Encoder) Encode(idx *Index) error {
+ // TODO: support versions v3 and v4
+ // TODO: support extensions
+ if idx.Version != EncodeVersionSupported {
+ return ErrUnsupportedVersion
+ }
+
+ if err := e.encodeHeader(idx); err != nil {
+ return err
+ }
+
+ if err := e.encodeEntries(idx); err != nil {
+ return err
+ }
+
+ return e.encodeFooter()
+}
+
+func (e *Encoder) encodeHeader(idx *Index) error {
+ return binary.Write(e.w,
+ indexSignature,
+ idx.Version,
+ uint32(len(idx.Entries)),
+ )
+}
+
+func (e *Encoder) encodeEntries(idx *Index) error {
+ for _, entry := range idx.Entries {
+ if err := e.encodeEntry(&entry); err != nil {
+ return err
+ }
+
+ wrote := entryHeaderLength + len(entry.Name)
+ if err := e.padEntry(wrote); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (e *Encoder) encodeEntry(entry *Entry) error {
+ if entry.IntentToAdd || entry.SkipWorktree {
+ return ErrUnsupportedVersion
+ }
+
+ sec, nsec, err := e.timeToUint32(&entry.CreatedAt)
+ if err != nil {
+ return err
+ }
+
+ msec, mnsec, err := e.timeToUint32(&entry.ModifiedAt)
+ if err != nil {
+ return err
+ }
+
+ flags := uint16(entry.Stage&0x3) << 12
+ if l := len(entry.Name); l < nameMask {
+ flags |= uint16(l)
+ } else {
+ flags |= nameMask
+ }
+
+ flow := []interface{}{
+ sec, nsec,
+ msec, mnsec,
+ entry.Dev,
+ entry.Inode,
+ entry.Mode,
+ entry.UID,
+ entry.GID,
+ entry.Size,
+ entry.Hash[:],
+ flags,
+ }
+
+ if err := binary.Write(e.w, flow...); err != nil {
+ return err
+ }
+
+ return binary.Write(e.w, []byte(entry.Name))
+}
+
+func (e *Encoder) timeToUint32(t *time.Time) (uint32, uint32, error) {
+ if t.IsZero() {
+ return 0, 0, nil
+ }
+
+ if t.Unix() < 0 || t.UnixNano() < 0 {
+ return 0, 0, ErrInvalidTimestamp
+ }
+
+ return uint32(t.Unix()), uint32(t.Nanosecond()), nil
+}
+
+func (e *Encoder) padEntry(wrote int) error {
+ padLen := 8 - wrote%8
+
+ _, err := e.w.Write(bytes.Repeat([]byte{'\x00'}, padLen))
+ return err
+}
+
+func (e *Encoder) encodeFooter() error {
+ return binary.Write(e.w, e.hash.Sum(nil))
+}
diff --git a/formats/index/encoder_test.go b/formats/index/encoder_test.go
new file mode 100644
index 0000000..3085988
--- /dev/null
+++ b/formats/index/encoder_test.go
@@ -0,0 +1,78 @@
+package index
+
+import (
+ "bytes"
+ "strings"
+ "time"
+
+ . "gopkg.in/check.v1"
+ "gopkg.in/src-d/go-git.v4/core"
+)
+
+func (s *IdxfileSuite) TestEncode(c *C) {
+ idx := &Index{
+ Version: 2,
+ Entries: []Entry{{
+ CreatedAt: time.Now(),
+ ModifiedAt: time.Now(),
+ Dev: 4242,
+ Inode: 424242,
+ UID: 84,
+ GID: 8484,
+ Size: 42,
+ Stage: TheirMode,
+ Hash: core.NewHash("e25b29c8946e0e192fae2edc1dabf7be71e8ecf3"),
+ Name: "foo",
+ }, {
+ CreatedAt: time.Now(),
+ ModifiedAt: time.Now(),
+ Name: strings.Repeat(" ", 20),
+ Size: 82,
+ }},
+ }
+
+ buf := bytes.NewBuffer(nil)
+ e := NewEncoder(buf)
+ err := e.Encode(idx)
+ c.Assert(err, IsNil)
+
+ output := &Index{}
+ d := NewDecoder(buf)
+ err = d.Decode(output)
+ c.Assert(err, IsNil)
+
+ c.Assert(idx, DeepEquals, output)
+}
+
+func (s *IdxfileSuite) TestEncodeUnsuportedVersion(c *C) {
+ idx := &Index{Version: 3}
+
+ buf := bytes.NewBuffer(nil)
+ e := NewEncoder(buf)
+ err := e.Encode(idx)
+ c.Assert(err, Equals, ErrUnsupportedVersion)
+}
+
+func (s *IdxfileSuite) TestEncodeWithIntentToAddUnsuportedVersion(c *C) {
+ idx := &Index{
+ Version: 2,
+ Entries: []Entry{{IntentToAdd: true}},
+ }
+
+ buf := bytes.NewBuffer(nil)
+ e := NewEncoder(buf)
+ err := e.Encode(idx)
+ c.Assert(err, Equals, ErrUnsupportedVersion)
+}
+
+func (s *IdxfileSuite) TestEncodeWithSkipWorktreeUnsuportedVersion(c *C) {
+ idx := &Index{
+ Version: 2,
+ Entries: []Entry{{SkipWorktree: true}},
+ }
+
+ buf := bytes.NewBuffer(nil)
+ e := NewEncoder(buf)
+ err := e.Encode(idx)
+ c.Assert(err, Equals, ErrUnsupportedVersion)
+}
diff --git a/formats/index/index.go b/formats/index/index.go
index bea199e..35a5391 100644
--- a/formats/index/index.go
+++ b/formats/index/index.go
@@ -1,12 +1,24 @@
package index
import (
+ "errors"
"os"
"time"
"gopkg.in/src-d/go-git.v4/core"
)
+var (
+ // ErrUnsupportedVersion is returned by Decode when the idxindex file
+ // version is not supported.
+ ErrUnsupportedVersion = errors.New("Unsuported version")
+
+ indexSignature = []byte{'D', 'I', 'R', 'C'}
+ treeExtSignature = []byte{'T', 'R', 'E', 'E'}
+ resolveUndoExtSignature = []byte{'R', 'E', 'U', 'C'}
+)
+
+// Stage during merge
type Stage int
const (
@@ -25,7 +37,6 @@ const (
// worktree are detected using this Index. The Index is also used during merges
type Index struct {
Version uint32
- EntryCount uint32
Entries []Entry
Cache *Tree
ResolveUndo *ResolveUndo
@@ -35,20 +46,31 @@ type Index struct {
// represents exactly one stage of a file. If a file path is unmerged then
// multiple Entry instances may appear for the same path name.
type Entry struct {
- CreatedAt time.Time
+ // Hash is the SHA1 of the represented file
+ Hash core.Hash
+ // Name is the Entry path name relative to top level directory
+ Name string
+ // CreatedAt time when the tracked path was created
+ CreatedAt time.Time
+ // ModifiedAt time when the tracked path was changed
ModifiedAt time.Time
+ // Dev and Inode of the tracked path
Dev, Inode uint32
- Mode os.FileMode
- UID, GID uint32
- Size uint32
- Flags uint16
- Stage Stage
-
+ // Mode of the path
+ Mode os.FileMode
+ // UID and GID, userid and group id of the owner
+ UID, GID uint32
+ // Size is the length in bytes for regular files
+ Size uint32
+ // Stage on a merge is defines what stage is representing this entry
+ // https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging
+ Stage Stage
+ // SkipWorktree used in sparse checkouts
+ // https://git-scm.com/docs/git-read-tree#_sparse_checkout
SkipWorktree bool
- IntentToAdd bool
-
- Hash core.Hash
- Name string
+ // IntentToAdd record only the fact that the path will be added later
+ // https://git-scm.com/docs/git-add ("git add -N")
+ IntentToAdd bool
}
// Tree contains pre-computed hashes for trees that can be derived from the