aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/index
diff options
context:
space:
mode:
Diffstat (limited to 'plumbing/format/index')
-rw-r--r--plumbing/format/index/decoder.go446
-rw-r--r--plumbing/format/index/decoder_test.go196
-rw-r--r--plumbing/format/index/doc.go302
-rw-r--r--plumbing/format/index/encoder.go141
-rw-r--r--plumbing/format/index/encoder_test.go78
-rw-r--r--plumbing/format/index/index.go108
6 files changed, 1271 insertions, 0 deletions
diff --git a/plumbing/format/index/decoder.go b/plumbing/format/index/decoder.go
new file mode 100644
index 0000000..9069c9e
--- /dev/null
+++ b/plumbing/format/index/decoder.go
@@ -0,0 +1,446 @@
+package index
+
+import (
+ "bytes"
+ "crypto/sha1"
+ "errors"
+ "hash"
+ "io"
+ "io/ioutil"
+ "strconv"
+ "time"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/utils/binary"
+)
+
+var (
+ // DecodeVersionSupported is the range of supported index versions
+ DecodeVersionSupported = struct{ Min, Max uint32 }{Min: 2, Max: 4}
+
+ // ErrMalformedSignature is returned by Decode when the index header file is
+ // malformed
+ ErrMalformedSignature = errors.New("malformed index signature file")
+ // ErrInvalidChecksum is returned by Decode if the SHA1 hash missmatch with
+ // the read content
+ ErrInvalidChecksum = errors.New("invalid checksum")
+
+ errUnknownExtension = errors.New("unknown extension")
+)
+
+const (
+ entryHeaderLength = 62
+ entryExtended = 0x4000
+ entryValid = 0x8000
+ nameMask = 0xfff
+ intentToAddMask = 1 << 13
+ skipWorkTreeMask = 1 << 14
+)
+
+// A Decoder reads and decodes idx files from an input stream.
+type Decoder struct {
+ r io.Reader
+ hash hash.Hash
+ lastEntry *Entry
+}
+
+// NewDecoder returns a new decoder that reads from r.
+func NewDecoder(r io.Reader) *Decoder {
+ h := sha1.New()
+ return &Decoder{
+ r: io.TeeReader(r, h),
+ hash: h,
+ }
+}
+
+// Decode reads the whole index object from its input and stores it in the
+// value pointed to by idx.
+func (d *Decoder) Decode(idx *Index) error {
+ var err error
+ idx.Version, err = validateHeader(d.r)
+ if err != nil {
+ return err
+ }
+
+ entryCount, err := binary.ReadUint32(d.r)
+ if err != nil {
+ return err
+ }
+
+ if err := d.readEntries(idx, int(entryCount)); err != nil {
+ return err
+ }
+
+ return d.readExtensions(idx)
+}
+
+func (d *Decoder) readEntries(idx *Index, count int) error {
+ for i := 0; i < count; i++ {
+ e, err := d.readEntry(idx)
+ if err != nil {
+ return err
+ }
+
+ d.lastEntry = e
+ idx.Entries = append(idx.Entries, *e)
+ }
+
+ return nil
+}
+
+func (d *Decoder) readEntry(idx *Index) (*Entry, error) {
+ e := &Entry{}
+
+ var msec, mnsec, sec, nsec uint32
+ var flags uint16
+
+ flow := []interface{}{
+ &sec, &nsec,
+ &msec, &mnsec,
+ &e.Dev,
+ &e.Inode,
+ &e.Mode,
+ &e.UID,
+ &e.GID,
+ &e.Size,
+ &e.Hash,
+ &flags,
+ }
+
+ if err := binary.Read(d.r, flow...); err != nil {
+ return nil, err
+ }
+
+ read := entryHeaderLength
+ e.CreatedAt = time.Unix(int64(sec), int64(nsec))
+ e.ModifiedAt = time.Unix(int64(msec), int64(mnsec))
+ e.Stage = Stage(flags>>12) & 0x3
+
+ if flags&entryExtended != 0 {
+ extended, err := binary.ReadUint16(d.r)
+ if err != nil {
+ return nil, err
+ }
+
+ read += 2
+ e.IntentToAdd = extended&intentToAddMask != 0
+ e.SkipWorktree = extended&skipWorkTreeMask != 0
+ }
+
+ if err := d.readEntryName(idx, e, flags); err != nil {
+ return nil, err
+ }
+
+ return e, d.padEntry(idx, e, read)
+}
+
+func (d *Decoder) readEntryName(idx *Index, e *Entry, flags uint16) error {
+ var name string
+ var err error
+
+ switch idx.Version {
+ case 2, 3:
+ len := flags & nameMask
+ name, err = d.doReadEntryName(len)
+ case 4:
+ name, err = d.doReadEntryNameV4()
+ default:
+ return ErrUnsupportedVersion
+ }
+
+ if err != nil {
+ return err
+ }
+
+ e.Name = name
+ return nil
+}
+
+func (d *Decoder) doReadEntryNameV4() (string, error) {
+ l, err := binary.ReadVariableWidthInt(d.r)
+ if err != nil {
+ return "", err
+ }
+
+ var base string
+ if d.lastEntry != nil {
+ base = d.lastEntry.Name[:len(d.lastEntry.Name)-int(l)]
+ }
+
+ name, err := binary.ReadUntil(d.r, '\x00')
+ if err != nil {
+ return "", err
+ }
+
+ return base + string(name), nil
+}
+
+func (d *Decoder) doReadEntryName(len uint16) (string, error) {
+ name := make([]byte, len)
+ if err := binary.Read(d.r, &name); err != nil {
+ return "", err
+ }
+
+ return string(name), nil
+}
+
+// Index entries are padded out to the next 8 byte alignment
+// for historical reasons related to how C Git read the files.
+func (d *Decoder) padEntry(idx *Index, e *Entry, read int) error {
+ if idx.Version == 4 {
+ return nil
+ }
+
+ entrySize := read + len(e.Name)
+ padLen := 8 - entrySize%8
+ if _, err := io.CopyN(ioutil.Discard, d.r, int64(padLen)); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// TODO: support 'Split index' and 'Untracked cache' extensions, take in count
+// that they are not supported by jgit or libgit
+func (d *Decoder) readExtensions(idx *Index) error {
+ var expected []byte
+ var err error
+
+ var header [4]byte
+ for {
+ expected = d.hash.Sum(nil)
+
+ var n int
+ if n, err = io.ReadFull(d.r, header[:]); err != nil {
+ if n == 0 {
+ err = io.EOF
+ }
+
+ break
+ }
+
+ err = d.readExtension(idx, header[:])
+ if err != nil {
+ break
+ }
+ }
+
+ if err != errUnknownExtension {
+ return err
+ }
+
+ return d.readChecksum(expected, header)
+}
+
+func (d *Decoder) readExtension(idx *Index, header []byte) error {
+ switch {
+ case bytes.Equal(header, treeExtSignature):
+ r, err := d.getExtensionReader()
+ if err != nil {
+ return err
+ }
+
+ idx.Cache = &Tree{}
+ d := &treeExtensionDecoder{r}
+ if err := d.Decode(idx.Cache); err != nil {
+ return err
+ }
+ case bytes.Equal(header, resolveUndoExtSignature):
+ r, err := d.getExtensionReader()
+ if err != nil {
+ return err
+ }
+
+ idx.ResolveUndo = &ResolveUndo{}
+ d := &resolveUndoDecoder{r}
+ if err := d.Decode(idx.ResolveUndo); err != nil {
+ return err
+ }
+ default:
+ return errUnknownExtension
+ }
+
+ return nil
+}
+
+func (d *Decoder) getExtensionReader() (io.Reader, error) {
+ len, err := binary.ReadUint32(d.r)
+ if err != nil {
+ return nil, err
+ }
+
+ return &io.LimitedReader{R: d.r, N: int64(len)}, nil
+}
+
+func (d *Decoder) readChecksum(expected []byte, alreadyRead [4]byte) error {
+ var h plumbing.Hash
+ copy(h[:4], alreadyRead[:])
+
+ if err := binary.Read(d.r, h[4:]); err != nil {
+ return err
+ }
+
+ if bytes.Compare(h[:], expected) != 0 {
+ return ErrInvalidChecksum
+ }
+
+ return nil
+}
+
+func validateHeader(r io.Reader) (version uint32, err error) {
+ var s = make([]byte, 4)
+ if _, err := io.ReadFull(r, s); err != nil {
+ return 0, err
+ }
+
+ if !bytes.Equal(s, indexSignature) {
+ return 0, ErrMalformedSignature
+ }
+
+ version, err = binary.ReadUint32(r)
+ if err != nil {
+ return 0, err
+ }
+
+ if version < DecodeVersionSupported.Min || version > DecodeVersionSupported.Max {
+ return 0, ErrUnsupportedVersion
+ }
+
+ return
+}
+
+type treeExtensionDecoder struct {
+ r io.Reader
+}
+
+func (d *treeExtensionDecoder) Decode(t *Tree) error {
+ for {
+ e, err := d.readEntry()
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+
+ return err
+ }
+
+ if e == nil {
+ continue
+ }
+
+ t.Entries = append(t.Entries, *e)
+ }
+}
+
+func (d *treeExtensionDecoder) readEntry() (*TreeEntry, error) {
+ e := &TreeEntry{}
+
+ path, err := binary.ReadUntil(d.r, '\x00')
+ if err != nil {
+ return nil, err
+ }
+
+ e.Path = string(path)
+
+ count, err := binary.ReadUntil(d.r, ' ')
+ if err != nil {
+ return nil, err
+ }
+
+ i, err := strconv.Atoi(string(count))
+ if err != nil {
+ return nil, err
+ }
+
+ // An entry can be in an invalidated state and is represented by having a
+ // negative number in the entry_count field.
+ if i == -1 {
+ return nil, nil
+ }
+
+ e.Entries = i
+ trees, err := binary.ReadUntil(d.r, '\n')
+ if err != nil {
+ return nil, err
+ }
+
+ i, err = strconv.Atoi(string(trees))
+ if err != nil {
+ return nil, err
+ }
+
+ e.Trees = i
+
+ if err := binary.Read(d.r, &e.Hash); err != nil {
+ return nil, err
+ }
+
+ return e, nil
+}
+
+type resolveUndoDecoder struct {
+ r io.Reader
+}
+
+func (d *resolveUndoDecoder) Decode(ru *ResolveUndo) error {
+ for {
+ e, err := d.readEntry()
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+
+ return err
+ }
+
+ ru.Entries = append(ru.Entries, *e)
+ }
+}
+
+func (d *resolveUndoDecoder) readEntry() (*ResolveUndoEntry, error) {
+ e := &ResolveUndoEntry{
+ Stages: make(map[Stage]plumbing.Hash, 0),
+ }
+
+ path, err := binary.ReadUntil(d.r, '\x00')
+ if err != nil {
+ return nil, err
+ }
+
+ e.Path = string(path)
+
+ for i := 0; i < 3; i++ {
+ if err := d.readStage(e, Stage(i+1)); err != nil {
+ return nil, err
+ }
+ }
+
+ for s := range e.Stages {
+ var hash plumbing.Hash
+ if err := binary.Read(d.r, hash[:]); err != nil {
+ return nil, err
+ }
+
+ e.Stages[s] = hash
+ }
+
+ return e, nil
+}
+
+func (d *resolveUndoDecoder) readStage(e *ResolveUndoEntry, s Stage) error {
+ ascii, err := binary.ReadUntil(d.r, '\x00')
+ if err != nil {
+ return err
+ }
+
+ stage, err := strconv.ParseInt(string(ascii), 8, 64)
+ if err != nil {
+ return err
+ }
+
+ if stage != 0 {
+ e.Stages[s] = plumbing.ZeroHash
+ }
+
+ return nil
+}
diff --git a/plumbing/format/index/decoder_test.go b/plumbing/format/index/decoder_test.go
new file mode 100644
index 0000000..44ecb69
--- /dev/null
+++ b/plumbing/format/index/decoder_test.go
@@ -0,0 +1,196 @@
+package index
+
+import (
+ "testing"
+
+ . "gopkg.in/check.v1"
+ "gopkg.in/src-d/go-git.v4/fixtures"
+ "gopkg.in/src-d/go-git.v4/plumbing"
+)
+
+func Test(t *testing.T) { TestingT(t) }
+
+type IdxfileSuite struct {
+ fixtures.Suite
+}
+
+var _ = Suite(&IdxfileSuite{})
+
+func (s *IdxfileSuite) TestDecode(c *C) {
+ f, err := fixtures.Basic().One().DotGit().Open("index")
+ c.Assert(err, IsNil)
+
+ idx := &Index{}
+ d := NewDecoder(f)
+ err = d.Decode(idx)
+ c.Assert(err, IsNil)
+
+ c.Assert(idx.Version, Equals, uint32(2))
+ c.Assert(idx.Entries, HasLen, 9)
+}
+
+func (s *IdxfileSuite) TestDecodeEntries(c *C) {
+ f, err := fixtures.Basic().One().DotGit().Open("index")
+ c.Assert(err, IsNil)
+
+ idx := &Index{}
+ d := NewDecoder(f)
+ err = d.Decode(idx)
+ c.Assert(err, IsNil)
+
+ c.Assert(idx.Entries, HasLen, 9)
+
+ e := idx.Entries[0]
+ c.Assert(e.CreatedAt.Unix(), Equals, int64(1473350251))
+ c.Assert(e.CreatedAt.Nanosecond(), Equals, 12059307)
+ c.Assert(e.ModifiedAt.Unix(), Equals, int64(1473350251))
+ c.Assert(e.ModifiedAt.Nanosecond(), Equals, 12059307)
+ c.Assert(e.Dev, Equals, uint32(38))
+ c.Assert(e.Inode, Equals, uint32(1715795))
+ c.Assert(e.UID, Equals, uint32(1000))
+ c.Assert(e.GID, Equals, uint32(100))
+ c.Assert(e.Size, Equals, uint32(189))
+ c.Assert(e.Hash.String(), Equals, "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88")
+ c.Assert(e.Name, Equals, ".gitignore")
+ c.Assert(e.Mode.String(), Equals, "-rw-r--r--")
+
+ e = idx.Entries[1]
+ c.Assert(e.Name, Equals, "CHANGELOG")
+}
+
+func (s *IdxfileSuite) TestDecodeCacheTree(c *C) {
+ f, err := fixtures.Basic().One().DotGit().Open("index")
+ c.Assert(err, IsNil)
+
+ idx := &Index{}
+ d := NewDecoder(f)
+ err = d.Decode(idx)
+ c.Assert(err, IsNil)
+
+ c.Assert(idx.Entries, HasLen, 9)
+ c.Assert(idx.Cache.Entries, HasLen, 5)
+
+ for i, expected := range expectedEntries {
+ c.Assert(idx.Cache.Entries[i].Path, Equals, expected.Path)
+ c.Assert(idx.Cache.Entries[i].Entries, Equals, expected.Entries)
+ c.Assert(idx.Cache.Entries[i].Trees, Equals, expected.Trees)
+ c.Assert(idx.Cache.Entries[i].Hash.String(), Equals, expected.Hash.String())
+ }
+
+}
+
+var expectedEntries = []TreeEntry{
+ {Path: "", Entries: 9, Trees: 4, Hash: plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c")},
+ {Path: "go", Entries: 1, Trees: 0, Hash: plumbing.NewHash("a39771a7651f97faf5c72e08224d857fc35133db")},
+ {Path: "php", Entries: 1, Trees: 0, Hash: plumbing.NewHash("586af567d0bb5e771e49bdd9434f5e0fb76d25fa")},
+ {Path: "json", Entries: 2, Trees: 0, Hash: plumbing.NewHash("5a877e6a906a2743ad6e45d99c1793642aaf8eda")},
+ {Path: "vendor", Entries: 1, Trees: 0, Hash: plumbing.NewHash("cf4aa3b38974fb7d81f367c0830f7d78d65ab86b")},
+}
+
+func (s *IdxfileSuite) TestDecodeMergeConflict(c *C) {
+ f, err := fixtures.Basic().ByTag("merge-conflict").One().DotGit().Open("index")
+ c.Assert(err, IsNil)
+
+ idx := &Index{}
+ d := NewDecoder(f)
+ err = d.Decode(idx)
+ c.Assert(err, IsNil)
+
+ c.Assert(idx.Version, Equals, uint32(2))
+ c.Assert(idx.Entries, HasLen, 13)
+
+ expected := []struct {
+ Stage Stage
+ Hash string
+ }{
+ {AncestorMode, "880cd14280f4b9b6ed3986d6671f907d7cc2a198"},
+ {OurMode, "d499a1a0b79b7d87a35155afd0c1cce78b37a91c"},
+ {TheirMode, "14f8e368114f561c38e134f6e68ea6fea12d77ed"},
+ }
+
+ // stagged files
+ for i, e := range idx.Entries[4:7] {
+ c.Assert(e.Stage, Equals, expected[i].Stage)
+ c.Assert(e.CreatedAt.Unix(), Equals, int64(0))
+ c.Assert(e.CreatedAt.Nanosecond(), Equals, 0)
+ c.Assert(e.ModifiedAt.Unix(), Equals, int64(0))
+ c.Assert(e.ModifiedAt.Nanosecond(), Equals, 0)
+ c.Assert(e.Dev, Equals, uint32(0))
+ c.Assert(e.Inode, Equals, uint32(0))
+ c.Assert(e.UID, Equals, uint32(0))
+ c.Assert(e.GID, Equals, uint32(0))
+ c.Assert(e.Size, Equals, uint32(0))
+ c.Assert(e.Hash.String(), Equals, expected[i].Hash)
+ c.Assert(e.Name, Equals, "go/example.go")
+ }
+
+}
+
+func (s *IdxfileSuite) TestDecodeExtendedV3(c *C) {
+ f, err := fixtures.Basic().ByTag("intent-to-add").One().DotGit().Open("index")
+ c.Assert(err, IsNil)
+
+ idx := &Index{}
+ d := NewDecoder(f)
+ err = d.Decode(idx)
+ c.Assert(err, IsNil)
+
+ c.Assert(idx.Version, Equals, uint32(3))
+ c.Assert(idx.Entries, HasLen, 11)
+
+ c.Assert(idx.Entries[6].Name, Equals, "intent-to-add")
+ c.Assert(idx.Entries[6].IntentToAdd, Equals, true)
+ c.Assert(idx.Entries[6].SkipWorktree, Equals, false)
+}
+
+func (s *IdxfileSuite) TestDecodeResolveUndo(c *C) {
+ f, err := fixtures.Basic().ByTag("resolve-undo").One().DotGit().Open("index")
+ c.Assert(err, IsNil)
+
+ idx := &Index{}
+ d := NewDecoder(f)
+ err = d.Decode(idx)
+ c.Assert(err, IsNil)
+
+ c.Assert(idx.Version, Equals, uint32(2))
+ c.Assert(idx.Entries, HasLen, 8)
+
+ ru := idx.ResolveUndo
+ c.Assert(ru.Entries, HasLen, 2)
+ c.Assert(ru.Entries[0].Path, Equals, "go/example.go")
+ c.Assert(ru.Entries[0].Stages, HasLen, 3)
+ c.Assert(ru.Entries[0].Stages[AncestorMode], Not(Equals), plumbing.ZeroHash)
+ c.Assert(ru.Entries[0].Stages[OurMode], Not(Equals), plumbing.ZeroHash)
+ c.Assert(ru.Entries[0].Stages[TheirMode], Not(Equals), plumbing.ZeroHash)
+ c.Assert(ru.Entries[1].Path, Equals, "haskal/haskal.hs")
+ c.Assert(ru.Entries[1].Stages, HasLen, 2)
+ c.Assert(ru.Entries[1].Stages[OurMode], Not(Equals), plumbing.ZeroHash)
+ c.Assert(ru.Entries[1].Stages[TheirMode], Not(Equals), plumbing.ZeroHash)
+}
+
+func (s *IdxfileSuite) TestDecodeV4(c *C) {
+ f, err := fixtures.Basic().ByTag("index-v4").One().DotGit().Open("index")
+ c.Assert(err, IsNil)
+
+ idx := &Index{}
+ d := NewDecoder(f)
+ err = d.Decode(idx)
+ c.Assert(err, IsNil)
+
+ c.Assert(idx.Version, Equals, uint32(4))
+ c.Assert(idx.Entries, HasLen, 11)
+
+ names := []string{
+ ".gitignore", "CHANGELOG", "LICENSE", "binary.jpg", "go/example.go",
+ "haskal/haskal.hs", "intent-to-add", "json/long.json",
+ "json/short.json", "php/crappy.php", "vendor/foo.go",
+ }
+
+ for i, e := range idx.Entries {
+ c.Assert(e.Name, Equals, names[i])
+ }
+
+ c.Assert(idx.Entries[6].Name, Equals, "intent-to-add")
+ c.Assert(idx.Entries[6].IntentToAdd, Equals, true)
+ c.Assert(idx.Entries[6].SkipWorktree, Equals, false)
+}
diff --git a/plumbing/format/index/doc.go b/plumbing/format/index/doc.go
new file mode 100644
index 0000000..3a72606
--- /dev/null
+++ b/plumbing/format/index/doc.go
@@ -0,0 +1,302 @@
+// Package index implements a encoder/decoder of index format files
+package index
+
+/*
+Git index format
+================
+
+== The Git index file has the following format
+
+ All binary numbers are in network byte order. Version 2 is described
+ here unless stated otherwise.
+
+ - A 12-byte header consisting of
+
+ 4-byte signature:
+ The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache")
+
+ 4-byte version number:
+ The current supported versions are 2, 3 and 4.
+
+ 32-bit number of index entries.
+
+ - A number of sorted index entries (see below).
+
+ - Extensions
+
+ Extensions are identified by signature. Optional extensions can
+ be ignored if Git does not understand them.
+
+ Git currently supports cached tree and resolve undo extensions.
+
+ 4-byte extension signature. If the first byte is 'A'..'Z' the
+ extension is optional and can be ignored.
+
+ 32-bit size of the extension
+
+ Extension data
+
+ - 160-bit SHA-1 over the content of the index file before this
+ checksum.
+
+== Index entry
+
+ Index entries are sorted in ascending order on the name field,
+ interpreted as a string of unsigned bytes (i.e. memcmp() order, no
+ localization, no special casing of directory separator '/'). Entries
+ with the same name are sorted by their stage field.
+
+ 32-bit ctime seconds, the last time a file's metadata changed
+ this is stat(2) data
+
+ 32-bit ctime nanosecond fractions
+ this is stat(2) data
+
+ 32-bit mtime seconds, the last time a file's data changed
+ this is stat(2) data
+
+ 32-bit mtime nanosecond fractions
+ this is stat(2) data
+
+ 32-bit dev
+ this is stat(2) data
+
+ 32-bit ino
+ this is stat(2) data
+
+ 32-bit mode, split into (high to low bits)
+
+ 4-bit object type
+ valid values in binary are 1000 (regular file), 1010 (symbolic link)
+ and 1110 (gitlink)
+
+ 3-bit unused
+
+ 9-bit unix permission. Only 0755 and 0644 are valid for regular files.
+ Symbolic links and gitlinks have value 0 in this field.
+
+ 32-bit uid
+ this is stat(2) data
+
+ 32-bit gid
+ this is stat(2) data
+
+ 32-bit file size
+ This is the on-disk size from stat(2), truncated to 32-bit.
+
+ 160-bit SHA-1 for the represented object
+
+ A 16-bit 'flags' field split into (high to low bits)
+
+ 1-bit assume-valid flag
+
+ 1-bit extended flag (must be zero in version 2)
+
+ 2-bit stage (during merge)
+
+ 12-bit name length if the length is less than 0xFFF; otherwise 0xFFF
+ is stored in this field.
+
+ (Version 3 or later) A 16-bit field, only applicable if the
+ "extended flag" above is 1, split into (high to low bits).
+
+ 1-bit reserved for future
+
+ 1-bit skip-worktree flag (used by sparse checkout)
+
+ 1-bit intent-to-add flag (used by "git add -N")
+
+ 13-bit unused, must be zero
+
+ Entry path name (variable length) relative to top level directory
+ (without leading slash). '/' is used as path separator. The special
+ path components ".", ".." and ".git" (without quotes) are disallowed.
+ Trailing slash is also disallowed.
+
+ The exact encoding is undefined, but the '.' and '/' characters
+ are encoded in 7-bit ASCII and the encoding cannot contain a NUL
+ byte (iow, this is a UNIX pathname).
+
+ (Version 4) In version 4, the entry path name is prefix-compressed
+ relative to the path name for the previous entry (the very first
+ entry is encoded as if the path name for the previous entry is an
+ empty string). At the beginning of an entry, an integer N in the
+ variable width encoding (the same encoding as the offset is encoded
+ for OFS_DELTA pack entries; see pack-format.txt) is stored, followed
+ by a NUL-terminated string S. Removing N bytes from the end of the
+ path name for the previous entry, and replacing it with the string S
+ yields the path name for this entry.
+
+ 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes
+ while keeping the name NUL-terminated.
+
+ (Version 4) In version 4, the padding after the pathname does not
+ exist.
+
+ Interpretation of index entries in split index mode is completely
+ different. See below for details.
+
+== Extensions
+
+=== Cached tree
+
+ Cached tree extension contains pre-computed hashes for trees that can
+ be derived from the index. It helps speed up tree object generation
+ from index for a new commit.
+
+ When a path is updated in index, the path must be invalidated and
+ removed from tree cache.
+
+ The signature for this extension is { 'T', 'R', 'E', 'E' }.
+
+ A series of entries fill the entire extension; each of which
+ consists of:
+
+ - NUL-terminated path component (relative to its parent directory);
+
+ - ASCII decimal number of entries in the index that is covered by the
+ tree this entry represents (entry_count);
+
+ - A space (ASCII 32);
+
+ - ASCII decimal number that represents the number of subtrees this
+ tree has;
+
+ - A newline (ASCII 10); and
+
+ - 160-bit object name for the object that would result from writing
+ this span of index as a tree.
+
+ An entry can be in an invalidated state and is represented by having
+ a negative number in the entry_count field. In this case, there is no
+ object name and the next entry starts immediately after the newline.
+ When writing an invalid entry, -1 should always be used as entry_count.
+
+ The entries are written out in the top-down, depth-first order. The
+ first entry represents the root level of the repository, followed by the
+ first subtree--let's call this A--of the root level (with its name
+ relative to the root level), followed by the first subtree of A (with
+ its name relative to A), ...
+
+=== Resolve undo
+
+ A conflict is represented in the index as a set of higher stage entries.
+ When a conflict is resolved (e.g. with "git add path"), these higher
+ stage entries will be removed and a stage-0 entry with proper resolution
+ is added.
+
+ When these higher stage entries are removed, they are saved in the
+ resolve undo extension, so that conflicts can be recreated (e.g. with
+ "git checkout -m"), in case users want to redo a conflict resolution
+ from scratch.
+
+ The signature for this extension is { 'R', 'E', 'U', 'C' }.
+
+ A series of entries fill the entire extension; each of which
+ consists of:
+
+ - NUL-terminated pathname the entry describes (relative to the root of
+ the repository, i.e. full pathname);
+
+ - Three NUL-terminated ASCII octal numbers, entry mode of entries in
+ stage 1 to 3 (a missing stage is represented by "0" in this field);
+ and
+
+ - At most three 160-bit object names of the entry in stages from 1 to 3
+ (nothing is written for a missing stage).
+
+=== Split index
+
+ In split index mode, the majority of index entries could be stored
+ in a separate file. This extension records the changes to be made on
+ top of that to produce the final index.
+
+ The signature for this extension is { 'l', 'i', 'n', 'k' }.
+
+ The extension consists of:
+
+ - 160-bit SHA-1 of the shared index file. The shared index file path
+ is $GIT_DIR/sharedindex.<SHA-1>. If all 160 bits are zero, the
+ index does not require a shared index file.
+
+ - An ewah-encoded delete bitmap, each bit represents an entry in the
+ shared index. If a bit is set, its corresponding entry in the
+ shared index will be removed from the final index. Note, because
+ a delete operation changes index entry positions, but we do need
+ original positions in replace phase, it's best to just mark
+ entries for removal, then do a mass deletion after replacement.
+
+ - An ewah-encoded replace bitmap, each bit represents an entry in
+ the shared index. If a bit is set, its corresponding entry in the
+ shared index will be replaced with an entry in this index
+ file. All replaced entries are stored in sorted order in this
+ index. The first "1" bit in the replace bitmap corresponds to the
+ first index entry, the second "1" bit to the second entry and so
+ on. Replaced entries may have empty path names to save space.
+
+ The remaining index entries after replaced ones will be added to the
+ final index. These added entries are also sorted by entry name then
+ stage.
+
+== Untracked cache
+
+ Untracked cache saves the untracked file list and necessary data to
+ verify the cache. The signature for this extension is { 'U', 'N',
+ 'T', 'R' }.
+
+ The extension starts with
+
+ - A sequence of NUL-terminated strings, preceded by the size of the
+ sequence in variable width encoding. Each string describes the
+ environment where the cache can be used.
+
+ - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from
+ ctime field until "file size".
+
+ - Stat data of plumbing.excludesfile
+
+ - 32-bit dir_flags (see struct dir_struct)
+
+ - 160-bit SHA-1 of $GIT_DIR/info/exclude. Null SHA-1 means the file
+ does not exist.
+
+ - 160-bit SHA-1 of plumbing.excludesfile. Null SHA-1 means the file does
+ not exist.
+
+ - NUL-terminated string of per-dir exclude file name. This usually
+ is ".gitignore".
+
+ - The number of following directory blocks, variable width
+ encoding. If this number is zero, the extension ends here with a
+ following NUL.
+
+ - A number of directory blocks in depth-first-search order, each
+ consists of
+
+ - The number of untracked entries, variable width encoding.
+
+ - The number of sub-directory blocks, variable width encoding.
+
+ - The directory name terminated by NUL.
+
+ - A number of untracked file/dir names terminated by NUL.
+
+The remaining data of each directory block is grouped by type:
+
+ - An ewah bitmap, the n-th bit marks whether the n-th directory has
+ valid untracked cache entries.
+
+ - An ewah bitmap, the n-th bit records "check-only" bit of
+ read_directory_recursive() for the n-th directory.
+
+ - An ewah bitmap, the n-th bit indicates whether SHA-1 and stat data
+ is valid for the n-th directory and exists in the next data.
+
+ - An array of stat data. The n-th data corresponds with the n-th
+ "one" bit in the previous ewah bitmap.
+
+ - An array of SHA-1. The n-th SHA-1 corresponds with the n-th "one" bit
+ in the previous ewah bitmap.
+
+ - One NUL.
+*/
diff --git a/plumbing/format/index/encoder.go b/plumbing/format/index/encoder.go
new file mode 100644
index 0000000..94fbc68
--- /dev/null
+++ b/plumbing/format/index/encoder.go
@@ -0,0 +1,141 @@
+package index
+
+import (
+ "bytes"
+ "crypto/sha1"
+ "errors"
+ "hash"
+ "io"
+ "time"
+
+ "gopkg.in/src-d/go-git.v4/utils/binary"
+)
+
+var (
+ // EncodeVersionSupported is the range of supported index versions
+ EncodeVersionSupported uint32 = 2
+
+ // ErrInvalidTimestamp is returned by Encode if a Index with a Entry with
+ // negative timestamp values
+ ErrInvalidTimestamp = errors.New("negative timestamps are not allowed")
+)
+
+// An Encoder writes an Index to an output stream.
+type Encoder struct {
+ w io.Writer
+ hash hash.Hash
+}
+
+// NewEncoder returns a new encoder that writes to w.
+func NewEncoder(w io.Writer) *Encoder {
+ h := sha1.New()
+ mw := io.MultiWriter(w, h)
+ return &Encoder{mw, h}
+}
+
+// Encode writes the Index to the stream of the encoder.
+func (e *Encoder) Encode(idx *Index) error {
+ // TODO: support versions v3 and v4
+ // TODO: support extensions
+ if idx.Version != EncodeVersionSupported {
+ return ErrUnsupportedVersion
+ }
+
+ if err := e.encodeHeader(idx); err != nil {
+ return err
+ }
+
+ if err := e.encodeEntries(idx); err != nil {
+ return err
+ }
+
+ return e.encodeFooter()
+}
+
+func (e *Encoder) encodeHeader(idx *Index) error {
+ return binary.Write(e.w,
+ indexSignature,
+ idx.Version,
+ uint32(len(idx.Entries)),
+ )
+}
+
+func (e *Encoder) encodeEntries(idx *Index) error {
+ for _, entry := range idx.Entries {
+ if err := e.encodeEntry(&entry); err != nil {
+ return err
+ }
+
+ wrote := entryHeaderLength + len(entry.Name)
+ if err := e.padEntry(wrote); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (e *Encoder) encodeEntry(entry *Entry) error {
+ if entry.IntentToAdd || entry.SkipWorktree {
+ return ErrUnsupportedVersion
+ }
+
+ sec, nsec, err := e.timeToUint32(&entry.CreatedAt)
+ if err != nil {
+ return err
+ }
+
+ msec, mnsec, err := e.timeToUint32(&entry.ModifiedAt)
+ if err != nil {
+ return err
+ }
+
+ flags := uint16(entry.Stage&0x3) << 12
+ if l := len(entry.Name); l < nameMask {
+ flags |= uint16(l)
+ } else {
+ flags |= nameMask
+ }
+
+ flow := []interface{}{
+ sec, nsec,
+ msec, mnsec,
+ entry.Dev,
+ entry.Inode,
+ entry.Mode,
+ entry.UID,
+ entry.GID,
+ entry.Size,
+ entry.Hash[:],
+ flags,
+ }
+
+ if err := binary.Write(e.w, flow...); err != nil {
+ return err
+ }
+
+ return binary.Write(e.w, []byte(entry.Name))
+}
+
+func (e *Encoder) timeToUint32(t *time.Time) (uint32, uint32, error) {
+ if t.IsZero() {
+ return 0, 0, nil
+ }
+
+ if t.Unix() < 0 || t.UnixNano() < 0 {
+ return 0, 0, ErrInvalidTimestamp
+ }
+
+ return uint32(t.Unix()), uint32(t.Nanosecond()), nil
+}
+
+func (e *Encoder) padEntry(wrote int) error {
+ padLen := 8 - wrote%8
+
+ _, err := e.w.Write(bytes.Repeat([]byte{'\x00'}, padLen))
+ return err
+}
+
+func (e *Encoder) encodeFooter() error {
+ return binary.Write(e.w, e.hash.Sum(nil))
+}
diff --git a/plumbing/format/index/encoder_test.go b/plumbing/format/index/encoder_test.go
new file mode 100644
index 0000000..6770985
--- /dev/null
+++ b/plumbing/format/index/encoder_test.go
@@ -0,0 +1,78 @@
+package index
+
+import (
+ "bytes"
+ "strings"
+ "time"
+
+ . "gopkg.in/check.v1"
+ "gopkg.in/src-d/go-git.v4/plumbing"
+)
+
+func (s *IdxfileSuite) TestEncode(c *C) {
+ idx := &Index{
+ Version: 2,
+ Entries: []Entry{{
+ CreatedAt: time.Now(),
+ ModifiedAt: time.Now(),
+ Dev: 4242,
+ Inode: 424242,
+ UID: 84,
+ GID: 8484,
+ Size: 42,
+ Stage: TheirMode,
+ Hash: plumbing.NewHash("e25b29c8946e0e192fae2edc1dabf7be71e8ecf3"),
+ Name: "foo",
+ }, {
+ CreatedAt: time.Now(),
+ ModifiedAt: time.Now(),
+ Name: strings.Repeat(" ", 20),
+ Size: 82,
+ }},
+ }
+
+ buf := bytes.NewBuffer(nil)
+ e := NewEncoder(buf)
+ err := e.Encode(idx)
+ c.Assert(err, IsNil)
+
+ output := &Index{}
+ d := NewDecoder(buf)
+ err = d.Decode(output)
+ c.Assert(err, IsNil)
+
+ c.Assert(idx, DeepEquals, output)
+}
+
+func (s *IdxfileSuite) TestEncodeUnsuportedVersion(c *C) {
+ idx := &Index{Version: 3}
+
+ buf := bytes.NewBuffer(nil)
+ e := NewEncoder(buf)
+ err := e.Encode(idx)
+ c.Assert(err, Equals, ErrUnsupportedVersion)
+}
+
+func (s *IdxfileSuite) TestEncodeWithIntentToAddUnsuportedVersion(c *C) {
+ idx := &Index{
+ Version: 2,
+ Entries: []Entry{{IntentToAdd: true}},
+ }
+
+ buf := bytes.NewBuffer(nil)
+ e := NewEncoder(buf)
+ err := e.Encode(idx)
+ c.Assert(err, Equals, ErrUnsupportedVersion)
+}
+
+func (s *IdxfileSuite) TestEncodeWithSkipWorktreeUnsuportedVersion(c *C) {
+ idx := &Index{
+ Version: 2,
+ Entries: []Entry{{SkipWorktree: true}},
+ }
+
+ buf := bytes.NewBuffer(nil)
+ e := NewEncoder(buf)
+ err := e.Encode(idx)
+ c.Assert(err, Equals, ErrUnsupportedVersion)
+}
diff --git a/plumbing/format/index/index.go b/plumbing/format/index/index.go
new file mode 100644
index 0000000..4a3c798
--- /dev/null
+++ b/plumbing/format/index/index.go
@@ -0,0 +1,108 @@
+package index
+
+import (
+ "errors"
+ "os"
+ "time"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+)
+
+var (
+ // ErrUnsupportedVersion is returned by Decode when the idxindex file
+ // version is not supported.
+ ErrUnsupportedVersion = errors.New("Unsuported version")
+
+ indexSignature = []byte{'D', 'I', 'R', 'C'}
+ treeExtSignature = []byte{'T', 'R', 'E', 'E'}
+ resolveUndoExtSignature = []byte{'R', 'E', 'U', 'C'}
+)
+
+// Stage during merge
+type Stage int
+
+const (
+ // Merged is the default stage, fully merged
+ Merged Stage = 1
+ // AncestorMode is the base revision
+ AncestorMode Stage = 1
+ // OurMode is the first tree revision, ours
+ OurMode Stage = 2
+ // TheirMode is the second tree revision, theirs
+ TheirMode Stage = 3
+)
+
+// Index contains the information about which objects are currently checked out
+// in the worktree, having information about the working files. Changes in
+// worktree are detected using this Index. The Index is also used during merges
+type Index struct {
+ Version uint32
+ Entries []Entry
+ Cache *Tree
+ ResolveUndo *ResolveUndo
+}
+
+// Entry represents a single file (or stage of a file) in the cache. An entry
+// represents exactly one stage of a file. If a file path is unmerged then
+// multiple Entry instances may appear for the same path name.
+type Entry struct {
+ // Hash is the SHA1 of the represented file
+ Hash plumbing.Hash
+ // Name is the Entry path name relative to top level directory
+ Name string
+ // CreatedAt time when the tracked path was created
+ CreatedAt time.Time
+ // ModifiedAt time when the tracked path was changed
+ ModifiedAt time.Time
+ // Dev and Inode of the tracked path
+ Dev, Inode uint32
+ // Mode of the path
+ Mode os.FileMode
+ // UID and GID, userid and group id of the owner
+ UID, GID uint32
+ // Size is the length in bytes for regular files
+ Size uint32
+ // Stage on a merge is defines what stage is representing this entry
+ // https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging
+ Stage Stage
+ // SkipWorktree used in sparse checkouts
+ // https://git-scm.com/docs/git-read-tree#_sparse_checkout
+ SkipWorktree bool
+ // IntentToAdd record only the fact that the path will be added later
+ // https://git-scm.com/docs/git-add ("git add -N")
+ IntentToAdd bool
+}
+
+// Tree contains pre-computed hashes for trees that can be derived from the
+// index. It helps speed up tree object generation from index for a new commit.
+type Tree struct {
+ Entries []TreeEntry
+}
+
+// TreeEntry entry of a cached Tree
+type TreeEntry struct {
+ // Path component (relative to its parent directory)
+ Path string
+ // Entries is the number of entries in the index that is covered by the tree
+ // this entry represents
+ Entries int
+ // Trees is the number that represents the number of subtrees this tree has
+ Trees int
+ // Hash object name for the object that would result from writing this span
+ // of index as a tree.
+ Hash plumbing.Hash
+}
+
+// ResolveUndo when a conflict is resolved (e.g. with "git add path"), these
+// higher stage entries will be removed and a stage-0 entry with proper
+// resolution is added. When these higher stage entries are removed, they are
+// saved in the resolve undo extension
+type ResolveUndo struct {
+ Entries []ResolveUndoEntry
+}
+
+// ResolveUndoEntry contains the information about a conflict when is resolved
+type ResolveUndoEntry struct {
+ Path string
+ Stages map[Stage]plumbing.Hash
+}