aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/commitgraph
diff options
context:
space:
mode:
Diffstat (limited to 'plumbing/format/commitgraph')
-rw-r--r--plumbing/format/commitgraph/commitgraph.go35
-rw-r--r--plumbing/format/commitgraph/commitgraph_test.go132
-rw-r--r--plumbing/format/commitgraph/doc.go103
-rw-r--r--plumbing/format/commitgraph/encoder.go190
-rw-r--r--plumbing/format/commitgraph/file.go259
-rw-r--r--plumbing/format/commitgraph/memory.go72
6 files changed, 791 insertions, 0 deletions
diff --git a/plumbing/format/commitgraph/commitgraph.go b/plumbing/format/commitgraph/commitgraph.go
new file mode 100644
index 0000000..e43cd89
--- /dev/null
+++ b/plumbing/format/commitgraph/commitgraph.go
@@ -0,0 +1,35 @@
+package commitgraph
+
+import (
+ "time"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+)
+
+// CommitData is a reduced representation of Commit as presented in the commit graph
+// file. It is merely useful as an optimization for walking the commit graphs.
+type CommitData struct {
+ // TreeHash is the hash of the root tree of the commit.
+ TreeHash plumbing.Hash
+ // ParentIndexes are the indexes of the parent commits of the commit.
+ ParentIndexes []int
+ // ParentHashes are the hashes of the parent commits of the commit.
+ ParentHashes []plumbing.Hash
+ // Generation number is the pre-computed generation in the commit graph
+ // or zero if not available
+ Generation int
+ // When is the timestamp of the commit.
+ When time.Time
+}
+
+// Index represents a representation of commit graph that allows indexed
+// access to the nodes using commit object hash
+type Index interface {
+ // GetIndexByHash gets the index in the commit graph from commit hash, if available
+ GetIndexByHash(h plumbing.Hash) (int, error)
+ // GetNodeByIndex gets the commit node from the commit graph using index
+ // obtained from child node, if available
+ GetCommitDataByIndex(i int) (*CommitData, error)
+ // Hashes returns all the hashes that are available in the index
+ Hashes() []plumbing.Hash
+}
diff --git a/plumbing/format/commitgraph/commitgraph_test.go b/plumbing/format/commitgraph/commitgraph_test.go
new file mode 100644
index 0000000..0214f49
--- /dev/null
+++ b/plumbing/format/commitgraph/commitgraph_test.go
@@ -0,0 +1,132 @@
+package commitgraph_test
+
+import (
+ "io/ioutil"
+ "os"
+ "path"
+ "testing"
+
+ . "gopkg.in/check.v1"
+ fixtures "gopkg.in/src-d/go-git-fixtures.v3"
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph"
+)
+
+func Test(t *testing.T) { TestingT(t) }
+
+type CommitgraphSuite struct {
+ fixtures.Suite
+}
+
+var _ = Suite(&CommitgraphSuite{})
+
+func testDecodeHelper(c *C, path string) {
+ reader, err := os.Open(path)
+ c.Assert(err, IsNil)
+ defer reader.Close()
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+
+ // Root commit
+ nodeIndex, err := index.GetIndexByHash(plumbing.NewHash("347c91919944a68e9413581a1bc15519550a3afe"))
+ c.Assert(err, IsNil)
+ commitData, err := index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 0)
+ c.Assert(len(commitData.ParentHashes), Equals, 0)
+
+ // Regular commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("e713b52d7e13807e87a002e812041f248db3f643"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 1)
+ c.Assert(len(commitData.ParentHashes), Equals, 1)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "347c91919944a68e9413581a1bc15519550a3afe")
+
+ // Merge commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("b29328491a0682c259bcce28741eac71f3499f7d"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 2)
+ c.Assert(len(commitData.ParentHashes), Equals, 2)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
+ c.Assert(commitData.ParentHashes[1].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
+
+ // Octopus merge commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 3)
+ c.Assert(len(commitData.ParentHashes), Equals, 3)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "ce275064ad67d51e99f026084e20827901a8361c")
+ c.Assert(commitData.ParentHashes[1].String(), Equals, "bb13916df33ed23004c3ce9ed3b8487528e655c1")
+ c.Assert(commitData.ParentHashes[2].String(), Equals, "a45273fe2d63300e1962a9e26a6b15c276cd7082")
+
+ // Check all hashes
+ hashes := index.Hashes()
+ c.Assert(len(hashes), Equals, 11)
+ c.Assert(hashes[0].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
+ c.Assert(hashes[10].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
+}
+
+func (s *CommitgraphSuite) TestDecode(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ testDecodeHelper(c, path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
+ })
+}
+
+func (s *CommitgraphSuite) TestReencode(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+
+ reader, err := os.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
+ c.Assert(err, IsNil)
+ defer reader.Close()
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+
+ writer, err := ioutil.TempFile(dotgit.Root(), "commit-graph")
+ c.Assert(err, IsNil)
+ tmpName := writer.Name()
+ defer os.Remove(tmpName)
+ encoder := commitgraph.NewEncoder(writer)
+ err = encoder.Encode(index)
+ c.Assert(err, IsNil)
+ writer.Close()
+
+ testDecodeHelper(c, tmpName)
+ })
+}
+
+func (s *CommitgraphSuite) TestReencodeInMemory(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+
+ reader, err := os.Open(path.Join(dotgit.Root(), "objects", "info", "commit-graph"))
+ c.Assert(err, IsNil)
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+ memoryIndex := commitgraph.NewMemoryIndex()
+ for i, hash := range index.Hashes() {
+ commitData, err := index.GetCommitDataByIndex(i)
+ c.Assert(err, IsNil)
+ memoryIndex.Add(hash, commitData)
+ }
+ reader.Close()
+
+ writer, err := ioutil.TempFile(dotgit.Root(), "commit-graph")
+ c.Assert(err, IsNil)
+ tmpName := writer.Name()
+ defer os.Remove(tmpName)
+ encoder := commitgraph.NewEncoder(writer)
+ err = encoder.Encode(memoryIndex)
+ c.Assert(err, IsNil)
+ writer.Close()
+
+ testDecodeHelper(c, tmpName)
+ })
+}
diff --git a/plumbing/format/commitgraph/doc.go b/plumbing/format/commitgraph/doc.go
new file mode 100644
index 0000000..41cd8b1
--- /dev/null
+++ b/plumbing/format/commitgraph/doc.go
@@ -0,0 +1,103 @@
+// Package commitgraph implements encoding and decoding of commit-graph files.
+//
+// Git commit graph format
+// =======================
+//
+// The Git commit graph stores a list of commit OIDs and some associated
+// metadata, including:
+//
+// - The generation number of the commit. Commits with no parents have
+// generation number 1; commits with parents have generation number
+// one more than the maximum generation number of its parents. We
+// reserve zero as special, and can be used to mark a generation
+// number invalid or as "not computed".
+//
+// - The root tree OID.
+//
+// - The commit date.
+//
+// - The parents of the commit, stored using positional references within
+// the graph file.
+//
+// These positional references are stored as unsigned 32-bit integers
+// corresponding to the array position within the list of commit OIDs. Due
+// to some special constants we use to track parents, we can store at most
+// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits.
+//
+// == Commit graph files have the following format:
+//
+// In order to allow extensions that add extra data to the graph, we organize
+// the body into "chunks" and provide a binary lookup table at the beginning
+// of the body. The header includes certain values, such as number of chunks
+// and hash type.
+//
+// All 4-byte numbers are in network order.
+//
+// HEADER:
+//
+// 4-byte signature:
+// The signature is: {'C', 'G', 'P', 'H'}
+//
+// 1-byte version number:
+// Currently, the only valid version is 1.
+//
+// 1-byte Hash Version (1 = SHA-1)
+// We infer the hash length (H) from this value.
+//
+// 1-byte number (C) of "chunks"
+//
+// 1-byte (reserved for later use)
+// Current clients should ignore this value.
+//
+// CHUNK LOOKUP:
+//
+// (C + 1) * 12 bytes listing the table of contents for the chunks:
+// First 4 bytes describe the chunk id. Value 0 is a terminating label.
+// Other 8 bytes provide the byte-offset in current file for chunk to
+// start. (Chunks are ordered contiguously in the file, so you can infer
+// the length using the next chunk position if necessary.) Each chunk
+// ID appears at most once.
+//
+// The remaining data in the body is described one chunk at a time, and
+// these chunks may be given in any order. Chunks are required unless
+// otherwise specified.
+//
+// CHUNK DATA:
+//
+// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes)
+// The ith entry, F[i], stores the number of OIDs with first
+// byte at most i. Thus F[255] stores the total
+// number of commits (N).
+//
+// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes)
+// The OIDs for all commits in the graph, sorted in ascending order.
+//
+// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes)
+// * The first H bytes are for the OID of the root tree.
+// * The next 8 bytes are for the positions of the first two parents
+// of the ith commit. Stores value 0x7000000 if no parent in that
+// position. If there are more than two parents, the second value
+// has its most-significant bit on and the other bits store an array
+// position into the Extra Edge List chunk.
+// * The next 8 bytes store the generation number of the commit and
+// the commit time in seconds since EPOCH. The generation number
+// uses the higher 30 bits of the first 4 bytes, while the commit
+// time uses the 32 bits of the second 4 bytes, along with the lowest
+// 2 bits of the lowest byte, storing the 33rd and 34th bit of the
+// commit time.
+//
+// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional]
+// This list of 4-byte values store the second through nth parents for
+// all octopus merges. The second parent value in the commit data stores
+// an array position within this list along with the most-significant bit
+// on. Starting at that array position, iterate through this list of commit
+// positions for the parents until reaching a value with the most-significant
+// bit on. The other bits correspond to the position of the last parent.
+//
+// TRAILER:
+//
+// H-byte HASH-checksum of all of the above.
+//
+// Source:
+// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt
+package commitgraph
diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go
new file mode 100644
index 0000000..a06871c
--- /dev/null
+++ b/plumbing/format/commitgraph/encoder.go
@@ -0,0 +1,190 @@
+package commitgraph
+
+import (
+ "crypto/sha1"
+ "hash"
+ "io"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/utils/binary"
+)
+
+// Encoder writes MemoryIndex structs to an output stream.
+type Encoder struct {
+ io.Writer
+ hash hash.Hash
+}
+
+// NewEncoder returns a new stream encoder that writes to w.
+func NewEncoder(w io.Writer) *Encoder {
+ h := sha1.New()
+ mw := io.MultiWriter(w, h)
+ return &Encoder{mw, h}
+}
+
+// Encode writes an index into the commit-graph file
+func (e *Encoder) Encode(idx Index) error {
+ var err error
+
+ // Get all the hashes in the input index
+ hashes := idx.Hashes()
+
+ // Sort the inout and prepare helper structures we'll need for encoding
+ hashToIndex, fanout, extraEdgesCount := e.prepare(idx, hashes)
+
+ chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature}
+ chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36}
+ if extraEdgesCount > 0 {
+ chunkSignatures = append(chunkSignatures, extraEdgeListSignature)
+ chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*4)
+ }
+
+ if err = e.encodeFileHeader(len(chunkSignatures)); err != nil {
+ return err
+ }
+ if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil {
+ return err
+ }
+ if err = e.encodeFanout(fanout); err != nil {
+ return err
+ }
+ if err = e.encodeOidLookup(hashes); err != nil {
+ return err
+ }
+ if extraEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil {
+ if err = e.encodeExtraEdges(extraEdges); err != nil {
+ return err
+ }
+ }
+ if err != nil {
+ return err
+ }
+ return e.encodeChecksum()
+}
+
+func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, extraEdgesCount uint32) {
+ // Sort the hashes and build our index
+ plumbing.HashesSort(hashes)
+ hashToIndex = make(map[plumbing.Hash]uint32)
+ fanout = make([]uint32, 256)
+ for i, hash := range hashes {
+ hashToIndex[hash] = uint32(i)
+ fanout[hash[0]]++
+ }
+
+ // Convert the fanout to cumulative values
+ for i := 1; i <= 0xff; i++ {
+ fanout[i] += fanout[i-1]
+ }
+
+ // Find out if we will need extra edge table
+ for i := 0; i < len(hashes); i++ {
+ v, _ := idx.GetCommitDataByIndex(i)
+ if len(v.ParentHashes) > 2 {
+ extraEdgesCount += uint32(len(v.ParentHashes) - 1)
+ break
+ }
+ }
+
+ return
+}
+
+func (e *Encoder) encodeFileHeader(chunkCount int) (err error) {
+ if _, err = e.Write(commitFileSignature); err == nil {
+ _, err = e.Write([]byte{1, 1, byte(chunkCount), 0})
+ }
+ return
+}
+
+func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) {
+ // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator
+ offset := uint64(8 + len(chunkSignatures)*12 + 12)
+ for i, signature := range chunkSignatures {
+ if _, err = e.Write(signature); err == nil {
+ err = binary.WriteUint64(e, offset)
+ }
+ if err != nil {
+ return
+ }
+ offset += chunkSizes[i]
+ }
+ if _, err = e.Write(lastSignature); err == nil {
+ err = binary.WriteUint64(e, offset)
+ }
+ return
+}
+
+func (e *Encoder) encodeFanout(fanout []uint32) (err error) {
+ for i := 0; i <= 0xff; i++ {
+ if err = binary.WriteUint32(e, fanout[i]); err != nil {
+ return
+ }
+ }
+ return
+}
+
+func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) {
+ for _, hash := range hashes {
+ if _, err = e.Write(hash[:]); err != nil {
+ return err
+ }
+ }
+ return
+}
+
+func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (extraEdges []uint32, err error) {
+ for _, hash := range hashes {
+ origIndex, _ := idx.GetIndexByHash(hash)
+ commitData, _ := idx.GetCommitDataByIndex(origIndex)
+ if _, err = e.Write(commitData.TreeHash[:]); err != nil {
+ return
+ }
+
+ var parent1, parent2 uint32
+ if len(commitData.ParentHashes) == 0 {
+ parent1 = parentNone
+ parent2 = parentNone
+ } else if len(commitData.ParentHashes) == 1 {
+ parent1 = hashToIndex[commitData.ParentHashes[0]]
+ parent2 = parentNone
+ } else if len(commitData.ParentHashes) == 2 {
+ parent1 = hashToIndex[commitData.ParentHashes[0]]
+ parent2 = hashToIndex[commitData.ParentHashes[1]]
+ } else if len(commitData.ParentHashes) > 2 {
+ parent1 = hashToIndex[commitData.ParentHashes[0]]
+ parent2 = uint32(len(extraEdges)) | parentOctopusUsed
+ for _, parentHash := range commitData.ParentHashes[1:] {
+ extraEdges = append(extraEdges, hashToIndex[parentHash])
+ }
+ extraEdges[len(extraEdges)-1] |= parentLast
+ }
+
+ if err = binary.WriteUint32(e, parent1); err == nil {
+ err = binary.WriteUint32(e, parent2)
+ }
+ if err != nil {
+ return
+ }
+
+ unixTime := uint64(commitData.When.Unix())
+ unixTime |= uint64(commitData.Generation) << 34
+ if err = binary.WriteUint64(e, unixTime); err != nil {
+ return
+ }
+ }
+ return
+}
+
+func (e *Encoder) encodeExtraEdges(extraEdges []uint32) (err error) {
+ for _, parent := range extraEdges {
+ if err = binary.WriteUint32(e, parent); err != nil {
+ return
+ }
+ }
+ return
+}
+
+func (e *Encoder) encodeChecksum() error {
+ _, err := e.Write(e.hash.Sum(nil)[:20])
+ return err
+}
diff --git a/plumbing/format/commitgraph/file.go b/plumbing/format/commitgraph/file.go
new file mode 100644
index 0000000..175d279
--- /dev/null
+++ b/plumbing/format/commitgraph/file.go
@@ -0,0 +1,259 @@
+package commitgraph
+
+import (
+ "bytes"
+ encbin "encoding/binary"
+ "errors"
+ "io"
+ "time"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/utils/binary"
+)
+
+var (
+ // ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph
+ // file version is not supported.
+ ErrUnsupportedVersion = errors.New("Unsupported version")
+ // ErrUnsupportedHash is returned by OpenFileIndex when the commit graph
+ // hash function is not supported. Currently only SHA-1 is defined and
+ // supported
+ ErrUnsupportedHash = errors.New("Unsupported hash algorithm")
+ // ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit
+ // graph file is corrupted.
+ ErrMalformedCommitGraphFile = errors.New("Malformed commit graph file")
+
+ commitFileSignature = []byte{'C', 'G', 'P', 'H'}
+ oidFanoutSignature = []byte{'O', 'I', 'D', 'F'}
+ oidLookupSignature = []byte{'O', 'I', 'D', 'L'}
+ commitDataSignature = []byte{'C', 'D', 'A', 'T'}
+ extraEdgeListSignature = []byte{'E', 'D', 'G', 'E'}
+ lastSignature = []byte{0, 0, 0, 0}
+
+ parentNone = uint32(0x70000000)
+ parentOctopusUsed = uint32(0x80000000)
+ parentOctopusMask = uint32(0x7fffffff)
+ parentLast = uint32(0x80000000)
+)
+
+type fileIndex struct {
+ reader io.ReaderAt
+ fanout [256]int
+ oidFanoutOffset int64
+ oidLookupOffset int64
+ commitDataOffset int64
+ extraEdgeListOffset int64
+}
+
+// OpenFileIndex opens a serialized commit graph file in the format described at
+// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt
+func OpenFileIndex(reader io.ReaderAt) (Index, error) {
+ fi := &fileIndex{reader: reader}
+
+ if err := fi.verifyFileHeader(); err != nil {
+ return nil, err
+ }
+ if err := fi.readChunkHeaders(); err != nil {
+ return nil, err
+ }
+ if err := fi.readFanout(); err != nil {
+ return nil, err
+ }
+
+ return fi, nil
+}
+
+func (fi *fileIndex) verifyFileHeader() error {
+ // Verify file signature
+ var signature = make([]byte, 4)
+ if _, err := fi.reader.ReadAt(signature, 0); err != nil {
+ return err
+ }
+ if !bytes.Equal(signature, commitFileSignature) {
+ return ErrMalformedCommitGraphFile
+ }
+
+ // Read and verify the file header
+ var header = make([]byte, 4)
+ if _, err := fi.reader.ReadAt(header, 4); err != nil {
+ return err
+ }
+ if header[0] != 1 {
+ return ErrUnsupportedVersion
+ }
+ if header[1] != 1 {
+ return ErrUnsupportedHash
+ }
+
+ return nil
+}
+
+func (fi *fileIndex) readChunkHeaders() error {
+ var chunkID = make([]byte, 4)
+ for i := 0; ; i++ {
+ chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12)
+ if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil {
+ return err
+ }
+ chunkOffset, err := binary.ReadUint64(chunkHeader)
+ if err != nil {
+ return err
+ }
+
+ if bytes.Equal(chunkID, oidFanoutSignature) {
+ fi.oidFanoutOffset = int64(chunkOffset)
+ } else if bytes.Equal(chunkID, oidLookupSignature) {
+ fi.oidLookupOffset = int64(chunkOffset)
+ } else if bytes.Equal(chunkID, commitDataSignature) {
+ fi.commitDataOffset = int64(chunkOffset)
+ } else if bytes.Equal(chunkID, extraEdgeListSignature) {
+ fi.extraEdgeListOffset = int64(chunkOffset)
+ } else if bytes.Equal(chunkID, lastSignature) {
+ break
+ }
+ }
+
+ if fi.oidFanoutOffset <= 0 || fi.oidLookupOffset <= 0 || fi.commitDataOffset <= 0 {
+ return ErrMalformedCommitGraphFile
+ }
+
+ return nil
+}
+
+func (fi *fileIndex) readFanout() error {
+ fanoutReader := io.NewSectionReader(fi.reader, fi.oidFanoutOffset, 256*4)
+ for i := 0; i < 256; i++ {
+ fanoutValue, err := binary.ReadUint32(fanoutReader)
+ if err != nil {
+ return err
+ }
+ if fanoutValue > 0x7fffffff {
+ return ErrMalformedCommitGraphFile
+ }
+ fi.fanout[i] = int(fanoutValue)
+ }
+ return nil
+}
+
+func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (int, error) {
+ var oid plumbing.Hash
+
+ // Find the hash in the oid lookup table
+ var low int
+ if h[0] == 0 {
+ low = 0
+ } else {
+ low = fi.fanout[h[0]-1]
+ }
+ high := fi.fanout[h[0]]
+ for low < high {
+ mid := (low + high) >> 1
+ offset := fi.oidLookupOffset + int64(mid)*20
+ if _, err := fi.reader.ReadAt(oid[:], offset); err != nil {
+ return 0, err
+ }
+ cmp := bytes.Compare(h[:], oid[:])
+ if cmp < 0 {
+ high = mid
+ } else if cmp == 0 {
+ return mid, nil
+ } else {
+ low = mid + 1
+ }
+ }
+
+ return 0, plumbing.ErrObjectNotFound
+}
+
+func (fi *fileIndex) GetCommitDataByIndex(idx int) (*CommitData, error) {
+ if idx >= fi.fanout[0xff] {
+ return nil, plumbing.ErrObjectNotFound
+ }
+
+ offset := fi.commitDataOffset + int64(idx)*36
+ commitDataReader := io.NewSectionReader(fi.reader, offset, 36)
+
+ treeHash, err := binary.ReadHash(commitDataReader)
+ if err != nil {
+ return nil, err
+ }
+ parent1, err := binary.ReadUint32(commitDataReader)
+ if err != nil {
+ return nil, err
+ }
+ parent2, err := binary.ReadUint32(commitDataReader)
+ if err != nil {
+ return nil, err
+ }
+ genAndTime, err := binary.ReadUint64(commitDataReader)
+ if err != nil {
+ return nil, err
+ }
+
+ var parentIndexes []int
+ if parent2&parentOctopusUsed == parentOctopusUsed {
+ // Octopus merge
+ parentIndexes = []int{int(parent1 & parentOctopusMask)}
+ offset := fi.extraEdgeListOffset + 4*int64(parent2&parentOctopusMask)
+ buf := make([]byte, 4)
+ for {
+ _, err := fi.reader.ReadAt(buf, offset)
+ if err != nil {
+ return nil, err
+ }
+
+ parent := encbin.BigEndian.Uint32(buf)
+ offset += 4
+ parentIndexes = append(parentIndexes, int(parent&parentOctopusMask))
+ if parent&parentLast == parentLast {
+ break
+ }
+ }
+ } else if parent2 != parentNone {
+ parentIndexes = []int{int(parent1 & parentOctopusMask), int(parent2 & parentOctopusMask)}
+ } else if parent1 != parentNone {
+ parentIndexes = []int{int(parent1 & parentOctopusMask)}
+ }
+
+ parentHashes, err := fi.getHashesFromIndexes(parentIndexes)
+ if err != nil {
+ return nil, err
+ }
+
+ return &CommitData{
+ TreeHash: treeHash,
+ ParentIndexes: parentIndexes,
+ ParentHashes: parentHashes,
+ Generation: int(genAndTime >> 34),
+ When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0),
+ }, nil
+}
+
+func (fi *fileIndex) getHashesFromIndexes(indexes []int) ([]plumbing.Hash, error) {
+ hashes := make([]plumbing.Hash, len(indexes))
+
+ for i, idx := range indexes {
+ if idx >= fi.fanout[0xff] {
+ return nil, ErrMalformedCommitGraphFile
+ }
+
+ offset := fi.oidLookupOffset + int64(idx)*20
+ if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil {
+ return nil, err
+ }
+ }
+
+ return hashes, nil
+}
+
+// Hashes returns all the hashes that are available in the index
+func (fi *fileIndex) Hashes() []plumbing.Hash {
+ hashes := make([]plumbing.Hash, fi.fanout[0xff])
+ for i := 0; i < int(fi.fanout[0xff]); i++ {
+ offset := fi.oidLookupOffset + int64(i)*20
+ if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < 20 {
+ return nil
+ }
+ }
+ return hashes
+}
diff --git a/plumbing/format/commitgraph/memory.go b/plumbing/format/commitgraph/memory.go
new file mode 100644
index 0000000..a4a96e9
--- /dev/null
+++ b/plumbing/format/commitgraph/memory.go
@@ -0,0 +1,72 @@
+package commitgraph
+
+import (
+ "gopkg.in/src-d/go-git.v4/plumbing"
+)
+
+// MemoryIndex provides a way to build the commit-graph in memory
+// for later encoding to file.
+type MemoryIndex struct {
+ commitData []*CommitData
+ indexMap map[plumbing.Hash]int
+}
+
+// NewMemoryIndex creates in-memory commit graph representation
+func NewMemoryIndex() *MemoryIndex {
+ return &MemoryIndex{
+ indexMap: make(map[plumbing.Hash]int),
+ }
+}
+
+// GetIndexByHash gets the index in the commit graph from commit hash, if available
+func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (int, error) {
+ i, ok := mi.indexMap[h]
+ if ok {
+ return i, nil
+ }
+
+ return 0, plumbing.ErrObjectNotFound
+}
+
+// GetCommitDataByIndex gets the commit node from the commit graph using index
+// obtained from child node, if available
+func (mi *MemoryIndex) GetCommitDataByIndex(i int) (*CommitData, error) {
+ if int(i) >= len(mi.commitData) {
+ return nil, plumbing.ErrObjectNotFound
+ }
+
+ commitData := mi.commitData[i]
+
+ // Map parent hashes to parent indexes
+ if commitData.ParentIndexes == nil {
+ parentIndexes := make([]int, len(commitData.ParentHashes))
+ for i, parentHash := range commitData.ParentHashes {
+ var err error
+ if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil {
+ return nil, err
+ }
+ }
+ commitData.ParentIndexes = parentIndexes
+ }
+
+ return commitData, nil
+}
+
+// Hashes returns all the hashes that are available in the index
+func (mi *MemoryIndex) Hashes() []plumbing.Hash {
+ hashes := make([]plumbing.Hash, 0, len(mi.indexMap))
+ for k := range mi.indexMap {
+ hashes = append(hashes, k)
+ }
+ return hashes
+}
+
+// Add adds new node to the memory index
+func (mi *MemoryIndex) Add(hash plumbing.Hash, commitData *CommitData) {
+ // The parent indexes are calculated lazily in GetNodeByIndex
+ // which allows adding nodes out of order as long as all parents
+ // are eventually resolved
+ commitData.ParentIndexes = nil
+ mi.indexMap[hash] = len(mi.commitData)
+ mi.commitData = append(mi.commitData, commitData)
+}