diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2016-11-08 23:46:38 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-11-08 23:46:38 +0100 |
commit | ac095bb12c4d29722b60ba9f20590fa7cfa6bc7d (patch) | |
tree | 223f36f336ba3414b1e45cac8af6c4744a5d7ef6 /plumbing/format/index | |
parent | e523701393598f4fa241dd407af9ff8925507a1a (diff) | |
download | go-git-ac095bb12c4d29722b60ba9f20590fa7cfa6bc7d.tar.gz |
new plumbing package (#118)
* plumbing: now core was renamed to core, and formats and clients moved inside
Diffstat (limited to 'plumbing/format/index')
-rw-r--r-- | plumbing/format/index/decoder.go | 446 | ||||
-rw-r--r-- | plumbing/format/index/decoder_test.go | 196 | ||||
-rw-r--r-- | plumbing/format/index/doc.go | 302 | ||||
-rw-r--r-- | plumbing/format/index/encoder.go | 141 | ||||
-rw-r--r-- | plumbing/format/index/encoder_test.go | 78 | ||||
-rw-r--r-- | plumbing/format/index/index.go | 108 |
6 files changed, 1271 insertions, 0 deletions
diff --git a/plumbing/format/index/decoder.go b/plumbing/format/index/decoder.go new file mode 100644 index 0000000..9069c9e --- /dev/null +++ b/plumbing/format/index/decoder.go @@ -0,0 +1,446 @@ +package index + +import ( + "bytes" + "crypto/sha1" + "errors" + "hash" + "io" + "io/ioutil" + "strconv" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +var ( + // DecodeVersionSupported is the range of supported index versions + DecodeVersionSupported = struct{ Min, Max uint32 }{Min: 2, Max: 4} + + // ErrMalformedSignature is returned by Decode when the index header file is + // malformed + ErrMalformedSignature = errors.New("malformed index signature file") + // ErrInvalidChecksum is returned by Decode if the SHA1 hash missmatch with + // the read content + ErrInvalidChecksum = errors.New("invalid checksum") + + errUnknownExtension = errors.New("unknown extension") +) + +const ( + entryHeaderLength = 62 + entryExtended = 0x4000 + entryValid = 0x8000 + nameMask = 0xfff + intentToAddMask = 1 << 13 + skipWorkTreeMask = 1 << 14 +) + +// A Decoder reads and decodes idx files from an input stream. +type Decoder struct { + r io.Reader + hash hash.Hash + lastEntry *Entry +} + +// NewDecoder returns a new decoder that reads from r. +func NewDecoder(r io.Reader) *Decoder { + h := sha1.New() + return &Decoder{ + r: io.TeeReader(r, h), + hash: h, + } +} + +// Decode reads the whole index object from its input and stores it in the +// value pointed to by idx. +func (d *Decoder) Decode(idx *Index) error { + var err error + idx.Version, err = validateHeader(d.r) + if err != nil { + return err + } + + entryCount, err := binary.ReadUint32(d.r) + if err != nil { + return err + } + + if err := d.readEntries(idx, int(entryCount)); err != nil { + return err + } + + return d.readExtensions(idx) +} + +func (d *Decoder) readEntries(idx *Index, count int) error { + for i := 0; i < count; i++ { + e, err := d.readEntry(idx) + if err != nil { + return err + } + + d.lastEntry = e + idx.Entries = append(idx.Entries, *e) + } + + return nil +} + +func (d *Decoder) readEntry(idx *Index) (*Entry, error) { + e := &Entry{} + + var msec, mnsec, sec, nsec uint32 + var flags uint16 + + flow := []interface{}{ + &sec, &nsec, + &msec, &mnsec, + &e.Dev, + &e.Inode, + &e.Mode, + &e.UID, + &e.GID, + &e.Size, + &e.Hash, + &flags, + } + + if err := binary.Read(d.r, flow...); err != nil { + return nil, err + } + + read := entryHeaderLength + e.CreatedAt = time.Unix(int64(sec), int64(nsec)) + e.ModifiedAt = time.Unix(int64(msec), int64(mnsec)) + e.Stage = Stage(flags>>12) & 0x3 + + if flags&entryExtended != 0 { + extended, err := binary.ReadUint16(d.r) + if err != nil { + return nil, err + } + + read += 2 + e.IntentToAdd = extended&intentToAddMask != 0 + e.SkipWorktree = extended&skipWorkTreeMask != 0 + } + + if err := d.readEntryName(idx, e, flags); err != nil { + return nil, err + } + + return e, d.padEntry(idx, e, read) +} + +func (d *Decoder) readEntryName(idx *Index, e *Entry, flags uint16) error { + var name string + var err error + + switch idx.Version { + case 2, 3: + len := flags & nameMask + name, err = d.doReadEntryName(len) + case 4: + name, err = d.doReadEntryNameV4() + default: + return ErrUnsupportedVersion + } + + if err != nil { + return err + } + + e.Name = name + return nil +} + +func (d *Decoder) doReadEntryNameV4() (string, error) { + l, err := binary.ReadVariableWidthInt(d.r) + if err != nil { + return "", err + } + + var base string + if d.lastEntry != nil { + base = d.lastEntry.Name[:len(d.lastEntry.Name)-int(l)] + } + + name, err := binary.ReadUntil(d.r, '\x00') + if err != nil { + return "", err + } + + return base + string(name), nil +} + +func (d *Decoder) doReadEntryName(len uint16) (string, error) { + name := make([]byte, len) + if err := binary.Read(d.r, &name); err != nil { + return "", err + } + + return string(name), nil +} + +// Index entries are padded out to the next 8 byte alignment +// for historical reasons related to how C Git read the files. +func (d *Decoder) padEntry(idx *Index, e *Entry, read int) error { + if idx.Version == 4 { + return nil + } + + entrySize := read + len(e.Name) + padLen := 8 - entrySize%8 + if _, err := io.CopyN(ioutil.Discard, d.r, int64(padLen)); err != nil { + return err + } + + return nil +} + +// TODO: support 'Split index' and 'Untracked cache' extensions, take in count +// that they are not supported by jgit or libgit +func (d *Decoder) readExtensions(idx *Index) error { + var expected []byte + var err error + + var header [4]byte + for { + expected = d.hash.Sum(nil) + + var n int + if n, err = io.ReadFull(d.r, header[:]); err != nil { + if n == 0 { + err = io.EOF + } + + break + } + + err = d.readExtension(idx, header[:]) + if err != nil { + break + } + } + + if err != errUnknownExtension { + return err + } + + return d.readChecksum(expected, header) +} + +func (d *Decoder) readExtension(idx *Index, header []byte) error { + switch { + case bytes.Equal(header, treeExtSignature): + r, err := d.getExtensionReader() + if err != nil { + return err + } + + idx.Cache = &Tree{} + d := &treeExtensionDecoder{r} + if err := d.Decode(idx.Cache); err != nil { + return err + } + case bytes.Equal(header, resolveUndoExtSignature): + r, err := d.getExtensionReader() + if err != nil { + return err + } + + idx.ResolveUndo = &ResolveUndo{} + d := &resolveUndoDecoder{r} + if err := d.Decode(idx.ResolveUndo); err != nil { + return err + } + default: + return errUnknownExtension + } + + return nil +} + +func (d *Decoder) getExtensionReader() (io.Reader, error) { + len, err := binary.ReadUint32(d.r) + if err != nil { + return nil, err + } + + return &io.LimitedReader{R: d.r, N: int64(len)}, nil +} + +func (d *Decoder) readChecksum(expected []byte, alreadyRead [4]byte) error { + var h plumbing.Hash + copy(h[:4], alreadyRead[:]) + + if err := binary.Read(d.r, h[4:]); err != nil { + return err + } + + if bytes.Compare(h[:], expected) != 0 { + return ErrInvalidChecksum + } + + return nil +} + +func validateHeader(r io.Reader) (version uint32, err error) { + var s = make([]byte, 4) + if _, err := io.ReadFull(r, s); err != nil { + return 0, err + } + + if !bytes.Equal(s, indexSignature) { + return 0, ErrMalformedSignature + } + + version, err = binary.ReadUint32(r) + if err != nil { + return 0, err + } + + if version < DecodeVersionSupported.Min || version > DecodeVersionSupported.Max { + return 0, ErrUnsupportedVersion + } + + return +} + +type treeExtensionDecoder struct { + r io.Reader +} + +func (d *treeExtensionDecoder) Decode(t *Tree) error { + for { + e, err := d.readEntry() + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + + if e == nil { + continue + } + + t.Entries = append(t.Entries, *e) + } +} + +func (d *treeExtensionDecoder) readEntry() (*TreeEntry, error) { + e := &TreeEntry{} + + path, err := binary.ReadUntil(d.r, '\x00') + if err != nil { + return nil, err + } + + e.Path = string(path) + + count, err := binary.ReadUntil(d.r, ' ') + if err != nil { + return nil, err + } + + i, err := strconv.Atoi(string(count)) + if err != nil { + return nil, err + } + + // An entry can be in an invalidated state and is represented by having a + // negative number in the entry_count field. + if i == -1 { + return nil, nil + } + + e.Entries = i + trees, err := binary.ReadUntil(d.r, '\n') + if err != nil { + return nil, err + } + + i, err = strconv.Atoi(string(trees)) + if err != nil { + return nil, err + } + + e.Trees = i + + if err := binary.Read(d.r, &e.Hash); err != nil { + return nil, err + } + + return e, nil +} + +type resolveUndoDecoder struct { + r io.Reader +} + +func (d *resolveUndoDecoder) Decode(ru *ResolveUndo) error { + for { + e, err := d.readEntry() + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + + ru.Entries = append(ru.Entries, *e) + } +} + +func (d *resolveUndoDecoder) readEntry() (*ResolveUndoEntry, error) { + e := &ResolveUndoEntry{ + Stages: make(map[Stage]plumbing.Hash, 0), + } + + path, err := binary.ReadUntil(d.r, '\x00') + if err != nil { + return nil, err + } + + e.Path = string(path) + + for i := 0; i < 3; i++ { + if err := d.readStage(e, Stage(i+1)); err != nil { + return nil, err + } + } + + for s := range e.Stages { + var hash plumbing.Hash + if err := binary.Read(d.r, hash[:]); err != nil { + return nil, err + } + + e.Stages[s] = hash + } + + return e, nil +} + +func (d *resolveUndoDecoder) readStage(e *ResolveUndoEntry, s Stage) error { + ascii, err := binary.ReadUntil(d.r, '\x00') + if err != nil { + return err + } + + stage, err := strconv.ParseInt(string(ascii), 8, 64) + if err != nil { + return err + } + + if stage != 0 { + e.Stages[s] = plumbing.ZeroHash + } + + return nil +} diff --git a/plumbing/format/index/decoder_test.go b/plumbing/format/index/decoder_test.go new file mode 100644 index 0000000..44ecb69 --- /dev/null +++ b/plumbing/format/index/decoder_test.go @@ -0,0 +1,196 @@ +package index + +import ( + "testing" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git.v4/fixtures" + "gopkg.in/src-d/go-git.v4/plumbing" +) + +func Test(t *testing.T) { TestingT(t) } + +type IdxfileSuite struct { + fixtures.Suite +} + +var _ = Suite(&IdxfileSuite{}) + +func (s *IdxfileSuite) TestDecode(c *C) { + f, err := fixtures.Basic().One().DotGit().Open("index") + c.Assert(err, IsNil) + + idx := &Index{} + d := NewDecoder(f) + err = d.Decode(idx) + c.Assert(err, IsNil) + + c.Assert(idx.Version, Equals, uint32(2)) + c.Assert(idx.Entries, HasLen, 9) +} + +func (s *IdxfileSuite) TestDecodeEntries(c *C) { + f, err := fixtures.Basic().One().DotGit().Open("index") + c.Assert(err, IsNil) + + idx := &Index{} + d := NewDecoder(f) + err = d.Decode(idx) + c.Assert(err, IsNil) + + c.Assert(idx.Entries, HasLen, 9) + + e := idx.Entries[0] + c.Assert(e.CreatedAt.Unix(), Equals, int64(1473350251)) + c.Assert(e.CreatedAt.Nanosecond(), Equals, 12059307) + c.Assert(e.ModifiedAt.Unix(), Equals, int64(1473350251)) + c.Assert(e.ModifiedAt.Nanosecond(), Equals, 12059307) + c.Assert(e.Dev, Equals, uint32(38)) + c.Assert(e.Inode, Equals, uint32(1715795)) + c.Assert(e.UID, Equals, uint32(1000)) + c.Assert(e.GID, Equals, uint32(100)) + c.Assert(e.Size, Equals, uint32(189)) + c.Assert(e.Hash.String(), Equals, "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88") + c.Assert(e.Name, Equals, ".gitignore") + c.Assert(e.Mode.String(), Equals, "-rw-r--r--") + + e = idx.Entries[1] + c.Assert(e.Name, Equals, "CHANGELOG") +} + +func (s *IdxfileSuite) TestDecodeCacheTree(c *C) { + f, err := fixtures.Basic().One().DotGit().Open("index") + c.Assert(err, IsNil) + + idx := &Index{} + d := NewDecoder(f) + err = d.Decode(idx) + c.Assert(err, IsNil) + + c.Assert(idx.Entries, HasLen, 9) + c.Assert(idx.Cache.Entries, HasLen, 5) + + for i, expected := range expectedEntries { + c.Assert(idx.Cache.Entries[i].Path, Equals, expected.Path) + c.Assert(idx.Cache.Entries[i].Entries, Equals, expected.Entries) + c.Assert(idx.Cache.Entries[i].Trees, Equals, expected.Trees) + c.Assert(idx.Cache.Entries[i].Hash.String(), Equals, expected.Hash.String()) + } + +} + +var expectedEntries = []TreeEntry{ + {Path: "", Entries: 9, Trees: 4, Hash: plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c")}, + {Path: "go", Entries: 1, Trees: 0, Hash: plumbing.NewHash("a39771a7651f97faf5c72e08224d857fc35133db")}, + {Path: "php", Entries: 1, Trees: 0, Hash: plumbing.NewHash("586af567d0bb5e771e49bdd9434f5e0fb76d25fa")}, + {Path: "json", Entries: 2, Trees: 0, Hash: plumbing.NewHash("5a877e6a906a2743ad6e45d99c1793642aaf8eda")}, + {Path: "vendor", Entries: 1, Trees: 0, Hash: plumbing.NewHash("cf4aa3b38974fb7d81f367c0830f7d78d65ab86b")}, +} + +func (s *IdxfileSuite) TestDecodeMergeConflict(c *C) { + f, err := fixtures.Basic().ByTag("merge-conflict").One().DotGit().Open("index") + c.Assert(err, IsNil) + + idx := &Index{} + d := NewDecoder(f) + err = d.Decode(idx) + c.Assert(err, IsNil) + + c.Assert(idx.Version, Equals, uint32(2)) + c.Assert(idx.Entries, HasLen, 13) + + expected := []struct { + Stage Stage + Hash string + }{ + {AncestorMode, "880cd14280f4b9b6ed3986d6671f907d7cc2a198"}, + {OurMode, "d499a1a0b79b7d87a35155afd0c1cce78b37a91c"}, + {TheirMode, "14f8e368114f561c38e134f6e68ea6fea12d77ed"}, + } + + // stagged files + for i, e := range idx.Entries[4:7] { + c.Assert(e.Stage, Equals, expected[i].Stage) + c.Assert(e.CreatedAt.Unix(), Equals, int64(0)) + c.Assert(e.CreatedAt.Nanosecond(), Equals, 0) + c.Assert(e.ModifiedAt.Unix(), Equals, int64(0)) + c.Assert(e.ModifiedAt.Nanosecond(), Equals, 0) + c.Assert(e.Dev, Equals, uint32(0)) + c.Assert(e.Inode, Equals, uint32(0)) + c.Assert(e.UID, Equals, uint32(0)) + c.Assert(e.GID, Equals, uint32(0)) + c.Assert(e.Size, Equals, uint32(0)) + c.Assert(e.Hash.String(), Equals, expected[i].Hash) + c.Assert(e.Name, Equals, "go/example.go") + } + +} + +func (s *IdxfileSuite) TestDecodeExtendedV3(c *C) { + f, err := fixtures.Basic().ByTag("intent-to-add").One().DotGit().Open("index") + c.Assert(err, IsNil) + + idx := &Index{} + d := NewDecoder(f) + err = d.Decode(idx) + c.Assert(err, IsNil) + + c.Assert(idx.Version, Equals, uint32(3)) + c.Assert(idx.Entries, HasLen, 11) + + c.Assert(idx.Entries[6].Name, Equals, "intent-to-add") + c.Assert(idx.Entries[6].IntentToAdd, Equals, true) + c.Assert(idx.Entries[6].SkipWorktree, Equals, false) +} + +func (s *IdxfileSuite) TestDecodeResolveUndo(c *C) { + f, err := fixtures.Basic().ByTag("resolve-undo").One().DotGit().Open("index") + c.Assert(err, IsNil) + + idx := &Index{} + d := NewDecoder(f) + err = d.Decode(idx) + c.Assert(err, IsNil) + + c.Assert(idx.Version, Equals, uint32(2)) + c.Assert(idx.Entries, HasLen, 8) + + ru := idx.ResolveUndo + c.Assert(ru.Entries, HasLen, 2) + c.Assert(ru.Entries[0].Path, Equals, "go/example.go") + c.Assert(ru.Entries[0].Stages, HasLen, 3) + c.Assert(ru.Entries[0].Stages[AncestorMode], Not(Equals), plumbing.ZeroHash) + c.Assert(ru.Entries[0].Stages[OurMode], Not(Equals), plumbing.ZeroHash) + c.Assert(ru.Entries[0].Stages[TheirMode], Not(Equals), plumbing.ZeroHash) + c.Assert(ru.Entries[1].Path, Equals, "haskal/haskal.hs") + c.Assert(ru.Entries[1].Stages, HasLen, 2) + c.Assert(ru.Entries[1].Stages[OurMode], Not(Equals), plumbing.ZeroHash) + c.Assert(ru.Entries[1].Stages[TheirMode], Not(Equals), plumbing.ZeroHash) +} + +func (s *IdxfileSuite) TestDecodeV4(c *C) { + f, err := fixtures.Basic().ByTag("index-v4").One().DotGit().Open("index") + c.Assert(err, IsNil) + + idx := &Index{} + d := NewDecoder(f) + err = d.Decode(idx) + c.Assert(err, IsNil) + + c.Assert(idx.Version, Equals, uint32(4)) + c.Assert(idx.Entries, HasLen, 11) + + names := []string{ + ".gitignore", "CHANGELOG", "LICENSE", "binary.jpg", "go/example.go", + "haskal/haskal.hs", "intent-to-add", "json/long.json", + "json/short.json", "php/crappy.php", "vendor/foo.go", + } + + for i, e := range idx.Entries { + c.Assert(e.Name, Equals, names[i]) + } + + c.Assert(idx.Entries[6].Name, Equals, "intent-to-add") + c.Assert(idx.Entries[6].IntentToAdd, Equals, true) + c.Assert(idx.Entries[6].SkipWorktree, Equals, false) +} diff --git a/plumbing/format/index/doc.go b/plumbing/format/index/doc.go new file mode 100644 index 0000000..3a72606 --- /dev/null +++ b/plumbing/format/index/doc.go @@ -0,0 +1,302 @@ +// Package index implements a encoder/decoder of index format files +package index + +/* +Git index format +================ + +== The Git index file has the following format + + All binary numbers are in network byte order. Version 2 is described + here unless stated otherwise. + + - A 12-byte header consisting of + + 4-byte signature: + The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache") + + 4-byte version number: + The current supported versions are 2, 3 and 4. + + 32-bit number of index entries. + + - A number of sorted index entries (see below). + + - Extensions + + Extensions are identified by signature. Optional extensions can + be ignored if Git does not understand them. + + Git currently supports cached tree and resolve undo extensions. + + 4-byte extension signature. If the first byte is 'A'..'Z' the + extension is optional and can be ignored. + + 32-bit size of the extension + + Extension data + + - 160-bit SHA-1 over the content of the index file before this + checksum. + +== Index entry + + Index entries are sorted in ascending order on the name field, + interpreted as a string of unsigned bytes (i.e. memcmp() order, no + localization, no special casing of directory separator '/'). Entries + with the same name are sorted by their stage field. + + 32-bit ctime seconds, the last time a file's metadata changed + this is stat(2) data + + 32-bit ctime nanosecond fractions + this is stat(2) data + + 32-bit mtime seconds, the last time a file's data changed + this is stat(2) data + + 32-bit mtime nanosecond fractions + this is stat(2) data + + 32-bit dev + this is stat(2) data + + 32-bit ino + this is stat(2) data + + 32-bit mode, split into (high to low bits) + + 4-bit object type + valid values in binary are 1000 (regular file), 1010 (symbolic link) + and 1110 (gitlink) + + 3-bit unused + + 9-bit unix permission. Only 0755 and 0644 are valid for regular files. + Symbolic links and gitlinks have value 0 in this field. + + 32-bit uid + this is stat(2) data + + 32-bit gid + this is stat(2) data + + 32-bit file size + This is the on-disk size from stat(2), truncated to 32-bit. + + 160-bit SHA-1 for the represented object + + A 16-bit 'flags' field split into (high to low bits) + + 1-bit assume-valid flag + + 1-bit extended flag (must be zero in version 2) + + 2-bit stage (during merge) + + 12-bit name length if the length is less than 0xFFF; otherwise 0xFFF + is stored in this field. + + (Version 3 or later) A 16-bit field, only applicable if the + "extended flag" above is 1, split into (high to low bits). + + 1-bit reserved for future + + 1-bit skip-worktree flag (used by sparse checkout) + + 1-bit intent-to-add flag (used by "git add -N") + + 13-bit unused, must be zero + + Entry path name (variable length) relative to top level directory + (without leading slash). '/' is used as path separator. The special + path components ".", ".." and ".git" (without quotes) are disallowed. + Trailing slash is also disallowed. + + The exact encoding is undefined, but the '.' and '/' characters + are encoded in 7-bit ASCII and the encoding cannot contain a NUL + byte (iow, this is a UNIX pathname). + + (Version 4) In version 4, the entry path name is prefix-compressed + relative to the path name for the previous entry (the very first + entry is encoded as if the path name for the previous entry is an + empty string). At the beginning of an entry, an integer N in the + variable width encoding (the same encoding as the offset is encoded + for OFS_DELTA pack entries; see pack-format.txt) is stored, followed + by a NUL-terminated string S. Removing N bytes from the end of the + path name for the previous entry, and replacing it with the string S + yields the path name for this entry. + + 1-8 nul bytes as necessary to pad the entry to a multiple of eight bytes + while keeping the name NUL-terminated. + + (Version 4) In version 4, the padding after the pathname does not + exist. + + Interpretation of index entries in split index mode is completely + different. See below for details. + +== Extensions + +=== Cached tree + + Cached tree extension contains pre-computed hashes for trees that can + be derived from the index. It helps speed up tree object generation + from index for a new commit. + + When a path is updated in index, the path must be invalidated and + removed from tree cache. + + The signature for this extension is { 'T', 'R', 'E', 'E' }. + + A series of entries fill the entire extension; each of which + consists of: + + - NUL-terminated path component (relative to its parent directory); + + - ASCII decimal number of entries in the index that is covered by the + tree this entry represents (entry_count); + + - A space (ASCII 32); + + - ASCII decimal number that represents the number of subtrees this + tree has; + + - A newline (ASCII 10); and + + - 160-bit object name for the object that would result from writing + this span of index as a tree. + + An entry can be in an invalidated state and is represented by having + a negative number in the entry_count field. In this case, there is no + object name and the next entry starts immediately after the newline. + When writing an invalid entry, -1 should always be used as entry_count. + + The entries are written out in the top-down, depth-first order. The + first entry represents the root level of the repository, followed by the + first subtree--let's call this A--of the root level (with its name + relative to the root level), followed by the first subtree of A (with + its name relative to A), ... + +=== Resolve undo + + A conflict is represented in the index as a set of higher stage entries. + When a conflict is resolved (e.g. with "git add path"), these higher + stage entries will be removed and a stage-0 entry with proper resolution + is added. + + When these higher stage entries are removed, they are saved in the + resolve undo extension, so that conflicts can be recreated (e.g. with + "git checkout -m"), in case users want to redo a conflict resolution + from scratch. + + The signature for this extension is { 'R', 'E', 'U', 'C' }. + + A series of entries fill the entire extension; each of which + consists of: + + - NUL-terminated pathname the entry describes (relative to the root of + the repository, i.e. full pathname); + + - Three NUL-terminated ASCII octal numbers, entry mode of entries in + stage 1 to 3 (a missing stage is represented by "0" in this field); + and + + - At most three 160-bit object names of the entry in stages from 1 to 3 + (nothing is written for a missing stage). + +=== Split index + + In split index mode, the majority of index entries could be stored + in a separate file. This extension records the changes to be made on + top of that to produce the final index. + + The signature for this extension is { 'l', 'i', 'n', 'k' }. + + The extension consists of: + + - 160-bit SHA-1 of the shared index file. The shared index file path + is $GIT_DIR/sharedindex.<SHA-1>. If all 160 bits are zero, the + index does not require a shared index file. + + - An ewah-encoded delete bitmap, each bit represents an entry in the + shared index. If a bit is set, its corresponding entry in the + shared index will be removed from the final index. Note, because + a delete operation changes index entry positions, but we do need + original positions in replace phase, it's best to just mark + entries for removal, then do a mass deletion after replacement. + + - An ewah-encoded replace bitmap, each bit represents an entry in + the shared index. If a bit is set, its corresponding entry in the + shared index will be replaced with an entry in this index + file. All replaced entries are stored in sorted order in this + index. The first "1" bit in the replace bitmap corresponds to the + first index entry, the second "1" bit to the second entry and so + on. Replaced entries may have empty path names to save space. + + The remaining index entries after replaced ones will be added to the + final index. These added entries are also sorted by entry name then + stage. + +== Untracked cache + + Untracked cache saves the untracked file list and necessary data to + verify the cache. The signature for this extension is { 'U', 'N', + 'T', 'R' }. + + The extension starts with + + - A sequence of NUL-terminated strings, preceded by the size of the + sequence in variable width encoding. Each string describes the + environment where the cache can be used. + + - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from + ctime field until "file size". + + - Stat data of plumbing.excludesfile + + - 32-bit dir_flags (see struct dir_struct) + + - 160-bit SHA-1 of $GIT_DIR/info/exclude. Null SHA-1 means the file + does not exist. + + - 160-bit SHA-1 of plumbing.excludesfile. Null SHA-1 means the file does + not exist. + + - NUL-terminated string of per-dir exclude file name. This usually + is ".gitignore". + + - The number of following directory blocks, variable width + encoding. If this number is zero, the extension ends here with a + following NUL. + + - A number of directory blocks in depth-first-search order, each + consists of + + - The number of untracked entries, variable width encoding. + + - The number of sub-directory blocks, variable width encoding. + + - The directory name terminated by NUL. + + - A number of untracked file/dir names terminated by NUL. + +The remaining data of each directory block is grouped by type: + + - An ewah bitmap, the n-th bit marks whether the n-th directory has + valid untracked cache entries. + + - An ewah bitmap, the n-th bit records "check-only" bit of + read_directory_recursive() for the n-th directory. + + - An ewah bitmap, the n-th bit indicates whether SHA-1 and stat data + is valid for the n-th directory and exists in the next data. + + - An array of stat data. The n-th data corresponds with the n-th + "one" bit in the previous ewah bitmap. + + - An array of SHA-1. The n-th SHA-1 corresponds with the n-th "one" bit + in the previous ewah bitmap. + + - One NUL. +*/ diff --git a/plumbing/format/index/encoder.go b/plumbing/format/index/encoder.go new file mode 100644 index 0000000..94fbc68 --- /dev/null +++ b/plumbing/format/index/encoder.go @@ -0,0 +1,141 @@ +package index + +import ( + "bytes" + "crypto/sha1" + "errors" + "hash" + "io" + "time" + + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +var ( + // EncodeVersionSupported is the range of supported index versions + EncodeVersionSupported uint32 = 2 + + // ErrInvalidTimestamp is returned by Encode if a Index with a Entry with + // negative timestamp values + ErrInvalidTimestamp = errors.New("negative timestamps are not allowed") +) + +// An Encoder writes an Index to an output stream. +type Encoder struct { + w io.Writer + hash hash.Hash +} + +// NewEncoder returns a new encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := sha1.New() + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +// Encode writes the Index to the stream of the encoder. +func (e *Encoder) Encode(idx *Index) error { + // TODO: support versions v3 and v4 + // TODO: support extensions + if idx.Version != EncodeVersionSupported { + return ErrUnsupportedVersion + } + + if err := e.encodeHeader(idx); err != nil { + return err + } + + if err := e.encodeEntries(idx); err != nil { + return err + } + + return e.encodeFooter() +} + +func (e *Encoder) encodeHeader(idx *Index) error { + return binary.Write(e.w, + indexSignature, + idx.Version, + uint32(len(idx.Entries)), + ) +} + +func (e *Encoder) encodeEntries(idx *Index) error { + for _, entry := range idx.Entries { + if err := e.encodeEntry(&entry); err != nil { + return err + } + + wrote := entryHeaderLength + len(entry.Name) + if err := e.padEntry(wrote); err != nil { + return err + } + } + + return nil +} + +func (e *Encoder) encodeEntry(entry *Entry) error { + if entry.IntentToAdd || entry.SkipWorktree { + return ErrUnsupportedVersion + } + + sec, nsec, err := e.timeToUint32(&entry.CreatedAt) + if err != nil { + return err + } + + msec, mnsec, err := e.timeToUint32(&entry.ModifiedAt) + if err != nil { + return err + } + + flags := uint16(entry.Stage&0x3) << 12 + if l := len(entry.Name); l < nameMask { + flags |= uint16(l) + } else { + flags |= nameMask + } + + flow := []interface{}{ + sec, nsec, + msec, mnsec, + entry.Dev, + entry.Inode, + entry.Mode, + entry.UID, + entry.GID, + entry.Size, + entry.Hash[:], + flags, + } + + if err := binary.Write(e.w, flow...); err != nil { + return err + } + + return binary.Write(e.w, []byte(entry.Name)) +} + +func (e *Encoder) timeToUint32(t *time.Time) (uint32, uint32, error) { + if t.IsZero() { + return 0, 0, nil + } + + if t.Unix() < 0 || t.UnixNano() < 0 { + return 0, 0, ErrInvalidTimestamp + } + + return uint32(t.Unix()), uint32(t.Nanosecond()), nil +} + +func (e *Encoder) padEntry(wrote int) error { + padLen := 8 - wrote%8 + + _, err := e.w.Write(bytes.Repeat([]byte{'\x00'}, padLen)) + return err +} + +func (e *Encoder) encodeFooter() error { + return binary.Write(e.w, e.hash.Sum(nil)) +} diff --git a/plumbing/format/index/encoder_test.go b/plumbing/format/index/encoder_test.go new file mode 100644 index 0000000..6770985 --- /dev/null +++ b/plumbing/format/index/encoder_test.go @@ -0,0 +1,78 @@ +package index + +import ( + "bytes" + "strings" + "time" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git.v4/plumbing" +) + +func (s *IdxfileSuite) TestEncode(c *C) { + idx := &Index{ + Version: 2, + Entries: []Entry{{ + CreatedAt: time.Now(), + ModifiedAt: time.Now(), + Dev: 4242, + Inode: 424242, + UID: 84, + GID: 8484, + Size: 42, + Stage: TheirMode, + Hash: plumbing.NewHash("e25b29c8946e0e192fae2edc1dabf7be71e8ecf3"), + Name: "foo", + }, { + CreatedAt: time.Now(), + ModifiedAt: time.Now(), + Name: strings.Repeat(" ", 20), + Size: 82, + }}, + } + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + err := e.Encode(idx) + c.Assert(err, IsNil) + + output := &Index{} + d := NewDecoder(buf) + err = d.Decode(output) + c.Assert(err, IsNil) + + c.Assert(idx, DeepEquals, output) +} + +func (s *IdxfileSuite) TestEncodeUnsuportedVersion(c *C) { + idx := &Index{Version: 3} + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + err := e.Encode(idx) + c.Assert(err, Equals, ErrUnsupportedVersion) +} + +func (s *IdxfileSuite) TestEncodeWithIntentToAddUnsuportedVersion(c *C) { + idx := &Index{ + Version: 2, + Entries: []Entry{{IntentToAdd: true}}, + } + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + err := e.Encode(idx) + c.Assert(err, Equals, ErrUnsupportedVersion) +} + +func (s *IdxfileSuite) TestEncodeWithSkipWorktreeUnsuportedVersion(c *C) { + idx := &Index{ + Version: 2, + Entries: []Entry{{SkipWorktree: true}}, + } + + buf := bytes.NewBuffer(nil) + e := NewEncoder(buf) + err := e.Encode(idx) + c.Assert(err, Equals, ErrUnsupportedVersion) +} diff --git a/plumbing/format/index/index.go b/plumbing/format/index/index.go new file mode 100644 index 0000000..4a3c798 --- /dev/null +++ b/plumbing/format/index/index.go @@ -0,0 +1,108 @@ +package index + +import ( + "errors" + "os" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" +) + +var ( + // ErrUnsupportedVersion is returned by Decode when the idxindex file + // version is not supported. + ErrUnsupportedVersion = errors.New("Unsuported version") + + indexSignature = []byte{'D', 'I', 'R', 'C'} + treeExtSignature = []byte{'T', 'R', 'E', 'E'} + resolveUndoExtSignature = []byte{'R', 'E', 'U', 'C'} +) + +// Stage during merge +type Stage int + +const ( + // Merged is the default stage, fully merged + Merged Stage = 1 + // AncestorMode is the base revision + AncestorMode Stage = 1 + // OurMode is the first tree revision, ours + OurMode Stage = 2 + // TheirMode is the second tree revision, theirs + TheirMode Stage = 3 +) + +// Index contains the information about which objects are currently checked out +// in the worktree, having information about the working files. Changes in +// worktree are detected using this Index. The Index is also used during merges +type Index struct { + Version uint32 + Entries []Entry + Cache *Tree + ResolveUndo *ResolveUndo +} + +// Entry represents a single file (or stage of a file) in the cache. An entry +// represents exactly one stage of a file. If a file path is unmerged then +// multiple Entry instances may appear for the same path name. +type Entry struct { + // Hash is the SHA1 of the represented file + Hash plumbing.Hash + // Name is the Entry path name relative to top level directory + Name string + // CreatedAt time when the tracked path was created + CreatedAt time.Time + // ModifiedAt time when the tracked path was changed + ModifiedAt time.Time + // Dev and Inode of the tracked path + Dev, Inode uint32 + // Mode of the path + Mode os.FileMode + // UID and GID, userid and group id of the owner + UID, GID uint32 + // Size is the length in bytes for regular files + Size uint32 + // Stage on a merge is defines what stage is representing this entry + // https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging + Stage Stage + // SkipWorktree used in sparse checkouts + // https://git-scm.com/docs/git-read-tree#_sparse_checkout + SkipWorktree bool + // IntentToAdd record only the fact that the path will be added later + // https://git-scm.com/docs/git-add ("git add -N") + IntentToAdd bool +} + +// Tree contains pre-computed hashes for trees that can be derived from the +// index. It helps speed up tree object generation from index for a new commit. +type Tree struct { + Entries []TreeEntry +} + +// TreeEntry entry of a cached Tree +type TreeEntry struct { + // Path component (relative to its parent directory) + Path string + // Entries is the number of entries in the index that is covered by the tree + // this entry represents + Entries int + // Trees is the number that represents the number of subtrees this tree has + Trees int + // Hash object name for the object that would result from writing this span + // of index as a tree. + Hash plumbing.Hash +} + +// ResolveUndo when a conflict is resolved (e.g. with "git add path"), these +// higher stage entries will be removed and a stage-0 entry with proper +// resolution is added. When these higher stage entries are removed, they are +// saved in the resolve undo extension +type ResolveUndo struct { + Entries []ResolveUndoEntry +} + +// ResolveUndoEntry contains the information about a conflict when is resolved +type ResolveUndoEntry struct { + Path string + Stages map[Stage]plumbing.Hash +} |