aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/commitgraph
diff options
context:
space:
mode:
authorAndrew Thornton <art27@cantab.net>2023-09-30 11:41:23 +0100
committerAndrew Thornton <art27@cantab.net>2023-10-08 09:38:00 +0100
commit946bb8183643bdda90810fc48e450a008894b244 (patch)
treeee2718e1ac621c88716068842e4ae38ed69862e8 /plumbing/format/commitgraph
parent19fe126d8889134e6224717a756745eed9985e22 (diff)
downloadgo-git-946bb8183643bdda90810fc48e450a008894b244.tar.gz
plumbing: commitgraph, fix types and handle commit-graph-chains
Unfortunately the original variant makes some incorrect typing assumptions about commit-graphs which make handling graph chains difficult to do correctly. This creates a new subpackage and deprecates the old one. It then adds support commit graph chains. Signed-off-by: Andrew Thornton <art27@cantab.net>
Diffstat (limited to 'plumbing/format/commitgraph')
-rw-r--r--plumbing/format/commitgraph/commitgraph.go6
-rw-r--r--plumbing/format/commitgraph/doc.go113
-rw-r--r--plumbing/format/commitgraph/encoder.go9
-rw-r--r--plumbing/format/commitgraph/file.go6
-rw-r--r--plumbing/format/commitgraph/memory.go6
-rw-r--r--plumbing/format/commitgraph/v2/chain.go100
-rw-r--r--plumbing/format/commitgraph/v2/chain_test.go100
-rw-r--r--plumbing/format/commitgraph/v2/chunk.go48
-rw-r--r--plumbing/format/commitgraph/v2/commitgraph.go40
-rw-r--r--plumbing/format/commitgraph/v2/commitgraph_test.go165
-rw-r--r--plumbing/format/commitgraph/v2/doc.go106
-rw-r--r--plumbing/format/commitgraph/v2/encoder.go192
-rw-r--r--plumbing/format/commitgraph/v2/file.go338
-rw-r--r--plumbing/format/commitgraph/v2/memory.go91
14 files changed, 1265 insertions, 55 deletions
diff --git a/plumbing/format/commitgraph/commitgraph.go b/plumbing/format/commitgraph/commitgraph.go
index 3d59323..e772d26 100644
--- a/plumbing/format/commitgraph/commitgraph.go
+++ b/plumbing/format/commitgraph/commitgraph.go
@@ -8,6 +8,9 @@ import (
// CommitData is a reduced representation of Commit as presented in the commit graph
// file. It is merely useful as an optimization for walking the commit graphs.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type CommitData struct {
// TreeHash is the hash of the root tree of the commit.
TreeHash plumbing.Hash
@@ -24,6 +27,9 @@ type CommitData struct {
// Index represents a representation of commit graph that allows indexed
// access to the nodes using commit object hash
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type Index interface {
// GetIndexByHash gets the index in the commit graph from commit hash, if available
GetIndexByHash(h plumbing.Hash) (int, error)
diff --git a/plumbing/format/commitgraph/doc.go b/plumbing/format/commitgraph/doc.go
index 41cd8b1..c320e18 100644
--- a/plumbing/format/commitgraph/doc.go
+++ b/plumbing/format/commitgraph/doc.go
@@ -1,23 +1,26 @@
// Package commitgraph implements encoding and decoding of commit-graph files.
//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
+//
// Git commit graph format
// =======================
//
// The Git commit graph stores a list of commit OIDs and some associated
// metadata, including:
//
-// - The generation number of the commit. Commits with no parents have
-// generation number 1; commits with parents have generation number
-// one more than the maximum generation number of its parents. We
-// reserve zero as special, and can be used to mark a generation
-// number invalid or as "not computed".
+// - The generation number of the commit. Commits with no parents have
+// generation number 1; commits with parents have generation number
+// one more than the maximum generation number of its parents. We
+// reserve zero as special, and can be used to mark a generation
+// number invalid or as "not computed".
//
// - The root tree OID.
//
// - The commit date.
//
-// - The parents of the commit, stored using positional references within
-// the graph file.
+// - The parents of the commit, stored using positional references within
+// the graph file.
//
// These positional references are stored as unsigned 32-bit integers
// corresponding to the array position within the list of commit OIDs. Due
@@ -35,68 +38,68 @@
//
// HEADER:
//
-// 4-byte signature:
-// The signature is: {'C', 'G', 'P', 'H'}
+// 4-byte signature:
+// The signature is: {'C', 'G', 'P', 'H'}
//
-// 1-byte version number:
-// Currently, the only valid version is 1.
+// 1-byte version number:
+// Currently, the only valid version is 1.
//
-// 1-byte Hash Version (1 = SHA-1)
-// We infer the hash length (H) from this value.
+// 1-byte Hash Version (1 = SHA-1)
+// We infer the hash length (H) from this value.
//
-// 1-byte number (C) of "chunks"
+// 1-byte number (C) of "chunks"
//
-// 1-byte (reserved for later use)
-// Current clients should ignore this value.
+// 1-byte (reserved for later use)
+// Current clients should ignore this value.
//
// CHUNK LOOKUP:
//
-// (C + 1) * 12 bytes listing the table of contents for the chunks:
-// First 4 bytes describe the chunk id. Value 0 is a terminating label.
-// Other 8 bytes provide the byte-offset in current file for chunk to
-// start. (Chunks are ordered contiguously in the file, so you can infer
-// the length using the next chunk position if necessary.) Each chunk
-// ID appears at most once.
+// (C + 1) * 12 bytes listing the table of contents for the chunks:
+// First 4 bytes describe the chunk id. Value 0 is a terminating label.
+// Other 8 bytes provide the byte-offset in current file for chunk to
+// start. (Chunks are ordered contiguously in the file, so you can infer
+// the length using the next chunk position if necessary.) Each chunk
+// ID appears at most once.
//
-// The remaining data in the body is described one chunk at a time, and
-// these chunks may be given in any order. Chunks are required unless
-// otherwise specified.
+// The remaining data in the body is described one chunk at a time, and
+// these chunks may be given in any order. Chunks are required unless
+// otherwise specified.
//
// CHUNK DATA:
//
-// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes)
-// The ith entry, F[i], stores the number of OIDs with first
-// byte at most i. Thus F[255] stores the total
-// number of commits (N).
-//
-// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes)
-// The OIDs for all commits in the graph, sorted in ascending order.
-//
-// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes)
-// * The first H bytes are for the OID of the root tree.
-// * The next 8 bytes are for the positions of the first two parents
-// of the ith commit. Stores value 0x7000000 if no parent in that
-// position. If there are more than two parents, the second value
-// has its most-significant bit on and the other bits store an array
-// position into the Extra Edge List chunk.
-// * The next 8 bytes store the generation number of the commit and
-// the commit time in seconds since EPOCH. The generation number
-// uses the higher 30 bits of the first 4 bytes, while the commit
-// time uses the 32 bits of the second 4 bytes, along with the lowest
-// 2 bits of the lowest byte, storing the 33rd and 34th bit of the
-// commit time.
-//
-// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional]
-// This list of 4-byte values store the second through nth parents for
-// all octopus merges. The second parent value in the commit data stores
-// an array position within this list along with the most-significant bit
-// on. Starting at that array position, iterate through this list of commit
-// positions for the parents until reaching a value with the most-significant
-// bit on. The other bits correspond to the position of the last parent.
+// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes)
+// The ith entry, F[i], stores the number of OIDs with first
+// byte at most i. Thus F[255] stores the total
+// number of commits (N).
+//
+// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes)
+// The OIDs for all commits in the graph, sorted in ascending order.
+//
+// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes)
+// * The first H bytes are for the OID of the root tree.
+// * The next 8 bytes are for the positions of the first two parents
+// of the ith commit. Stores value 0x7000000 if no parent in that
+// position. If there are more than two parents, the second value
+// has its most-significant bit on and the other bits store an array
+// position into the Extra Edge List chunk.
+// * The next 8 bytes store the generation number of the commit and
+// the commit time in seconds since EPOCH. The generation number
+// uses the higher 30 bits of the first 4 bytes, while the commit
+// time uses the 32 bits of the second 4 bytes, along with the lowest
+// 2 bits of the lowest byte, storing the 33rd and 34th bit of the
+// commit time.
+//
+// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional]
+// This list of 4-byte values store the second through nth parents for
+// all octopus merges. The second parent value in the commit data stores
+// an array position within this list along with the most-significant bit
+// on. Starting at that array position, iterate through this list of commit
+// positions for the parents until reaching a value with the most-significant
+// bit on. The other bits correspond to the position of the last parent.
//
// TRAILER:
//
-// H-byte HASH-checksum of all of the above.
+// H-byte HASH-checksum of all of the above.
//
// Source:
// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt
diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go
index 674f52e..3176353 100644
--- a/plumbing/format/commitgraph/encoder.go
+++ b/plumbing/format/commitgraph/encoder.go
@@ -10,12 +10,18 @@ import (
)
// Encoder writes MemoryIndex structs to an output stream.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type Encoder struct {
io.Writer
hash hash.Hash
}
// NewEncoder returns a new stream encoder that writes to w.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func NewEncoder(w io.Writer) *Encoder {
h := hash.New(hash.CryptoType)
mw := io.MultiWriter(w, h)
@@ -23,6 +29,9 @@ func NewEncoder(w io.Writer) *Encoder {
}
// Encode writes an index into the commit-graph file
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func (e *Encoder) Encode(idx Index) error {
// Get all the hashes in the input index
hashes := idx.Hashes()
diff --git a/plumbing/format/commitgraph/file.go b/plumbing/format/commitgraph/file.go
index 17c1c5d..ef8fb34 100644
--- a/plumbing/format/commitgraph/file.go
+++ b/plumbing/format/commitgraph/file.go
@@ -13,6 +13,9 @@ import (
"github.com/go-git/go-git/v5/utils/binary"
)
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
+
var (
// ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph
// file version is not supported.
@@ -51,6 +54,9 @@ type fileIndex struct {
// OpenFileIndex opens a serialized commit graph file in the format described at
// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func OpenFileIndex(reader io.ReaderAt) (Index, error) {
fi := &fileIndex{reader: reader}
diff --git a/plumbing/format/commitgraph/memory.go b/plumbing/format/commitgraph/memory.go
index b24ce36..06415e5 100644
--- a/plumbing/format/commitgraph/memory.go
+++ b/plumbing/format/commitgraph/memory.go
@@ -6,12 +6,18 @@ import (
// MemoryIndex provides a way to build the commit-graph in memory
// for later encoding to file.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type MemoryIndex struct {
commitData []*CommitData
indexMap map[plumbing.Hash]int
}
// NewMemoryIndex creates in-memory commit graph representation
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func NewMemoryIndex() *MemoryIndex {
return &MemoryIndex{
indexMap: make(map[plumbing.Hash]int),
diff --git a/plumbing/format/commitgraph/v2/chain.go b/plumbing/format/commitgraph/v2/chain.go
new file mode 100644
index 0000000..8da60d0
--- /dev/null
+++ b/plumbing/format/commitgraph/v2/chain.go
@@ -0,0 +1,100 @@
+package v2
+
+import (
+ "bufio"
+ "io"
+ "path"
+
+ "github.com/go-git/go-billy/v5"
+ "github.com/go-git/go-git/v5/plumbing"
+)
+
+// OpenChainFile reads a commit chain file and returns a slice of the hashes within it
+//
+// Commit-Graph chains are described at https://git-scm.com/docs/commit-graph
+// and are new line separated list of graph file hashes, oldest to newest.
+//
+// This function simply reads the file and returns the hashes as a slice.
+func OpenChainFile(r io.Reader) ([]string, error) {
+ if r == nil {
+ return nil, io.ErrUnexpectedEOF
+ }
+ bufRd := bufio.NewReader(r)
+ chain := make([]string, 0, 8)
+ for {
+ line, err := bufRd.ReadSlice('\n')
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ return nil, err
+ }
+
+ hashStr := string(line[:len(line)-1])
+ if !plumbing.IsHash(hashStr) {
+ return nil, ErrMalformedCommitGraphFile
+ }
+ chain = append(chain, hashStr)
+ }
+ return chain, nil
+}
+
+// OpenChainOrFileIndex expects a billy.Filesystem representing a .git directory.
+// It will first attempt to read a commit-graph index file, before trying to read a
+// commit-graph chain file and its index files. If neither are present, an error is returned.
+// Otherwise an Index will be returned.
+//
+// See: https://git-scm.com/docs/commit-graph
+func OpenChainOrFileIndex(fs billy.Filesystem) (Index, error) {
+ file, err := fs.Open(path.Join("objects", "info", "commit-graph"))
+ if err != nil {
+ // try to open a chain file
+ return OpenChainIndex(fs)
+ }
+
+ index, err := OpenFileIndex(file)
+ if err != nil {
+ // Ignore any file closing errors and return the error from OpenFileIndex instead
+ _ = file.Close()
+ return nil, err
+ }
+ return index, nil
+}
+
+// OpenChainIndex expects a billy.Filesystem representing a .git directory.
+// It will read a commit-graph chain file and return a coalesced index.
+// If the chain file or a graph in that chain is not present, an error is returned.
+//
+// See: https://git-scm.com/docs/commit-graph
+func OpenChainIndex(fs billy.Filesystem) (Index, error) {
+ chainFile, err := fs.Open(path.Join("objects", "info", "commit-graphs", "commit-graph-chain"))
+ if err != nil {
+ return nil, err
+ }
+
+ chain, err := OpenChainFile(chainFile)
+ _ = chainFile.Close()
+ if err != nil {
+ return nil, err
+ }
+
+ var index Index
+ for _, hash := range chain {
+
+ file, err := fs.Open(path.Join("objects", "info", "commit-graphs", "graph-"+hash+".graph"))
+ if err != nil {
+ // Ignore all other file closing errors and return the error from opening the last file in the graph
+ _ = index.Close()
+ return nil, err
+ }
+
+ index, err = OpenFileIndexWithParent(file, index)
+ if err != nil {
+ // Ignore file closing errors and return the error from OpenFileIndex instead
+ _ = index.Close()
+ return nil, err
+ }
+ }
+
+ return index, nil
+}
diff --git a/plumbing/format/commitgraph/v2/chain_test.go b/plumbing/format/commitgraph/v2/chain_test.go
new file mode 100644
index 0000000..32ffd69
--- /dev/null
+++ b/plumbing/format/commitgraph/v2/chain_test.go
@@ -0,0 +1,100 @@
+package v2_test
+
+import (
+ "bytes"
+ "crypto"
+ "strings"
+
+ commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2"
+ "github.com/go-git/go-git/v5/plumbing/hash"
+
+ . "gopkg.in/check.v1"
+)
+
+func (s *CommitgraphSuite) TestOpenChainFile(c *C) {
+ sha1Data := []string{
+ "c336d16298a017486c4164c40f8acb28afe64e84",
+ "31eae7b619d166c366bf5df4991f04ba8cebea0a",
+ "b977a025ca21e3b5ca123d8093bd7917694f6da7",
+ "d2a38b4a5965d529566566640519d03d2bd10f6c",
+ "35b585759cbf29f8ec428ef89da20705d59f99ec",
+ "c2bbf9fe8009b22d0f390f3c8c3f13937067590f",
+ "fc9f0643b21cfe571046e27e0c4565f3a1ee96c8",
+ "c088fd6a7e1a38e9d5a9815265cb575bb08d08ff",
+ "5fddbeb678bd2c36c5e5c891ab8f2b143ced5baf",
+ "5d7303c49ac984a9fec60523f2d5297682e16646",
+ }
+
+ sha256Data := []string{
+ "b9efda7160f2647e0974ca623f8a8f8e25fb6944f1b8f78f4db1bf07932de8eb",
+ "7095c59f8bf46e12c21d2d9da344cfe383fae18d26f3ae4d4ab7b71e3d0ddfae",
+ "25a395cb62f7656294e40a001ee19fefcdf3013d265dfcf4b744cd2549891dec",
+ "7fbd564813a82227507d9dd70f1fd21fc1f180223cd3f42e0c3090c9a8b6a7d0",
+ "aa95db1db2df91bd7200a892dd1c03bc2704c4793400d016b3ca08c148b0f7c1",
+ "2176988184b570565dc33823a02f474ad59f667a0e971c86063a7fea64776a87",
+ "d0afc0e64171140eb7902110f807a1beaa38a603d4312fd4bd14a5db2784ba62",
+ "2822136f60bfc58bbd9d624cc19fbef9f0fc0efe2a61729242e1e5f9b77fa3d0",
+ "6f207b5c43463af96bc38c43b0bf45275fa327e656a8bba8e7fc55c5ab6870d8",
+ "6cf33782619b6ff0af9c081e46323f423f8b49bf3d043887c0549bef47d60f55",
+ "60ea0753d2d4e828983528294be3f57e2a3ba37df4f59e3236133c9e2b17afc5",
+ "6b3c9f4ba5092e0807774097953ec6e9f58e8371d775bd8738a0fa98d728ba3d",
+ "c97cab8564054e30515dbe67dda4e14638aabf17b3f042d18dc8461cd098b362",
+ "9f7ece76fd2c9dae08e75176347efffc1446ad74af66004dd34680edb205dfb5",
+ "23e7a7e481b00571b63c2a7d0432f9733dd85d18a9841a3d7b96743100da5824",
+ "e684b1253fa8eb6572f35bab2fd3b6efecabf8472ede43497cd9c171973cc341",
+ "8b9f04080b0c40f7ad2a6bb5e5296cd6c06e730dffce87a0375ae7bd0f85f86e",
+ "384a745f3b14edc89526a98b96b3247b2b548541c755aadee7664352ed7f12ae",
+ "b68c8a82cd5b839917e1058570a0408819b81d16dbab81db118cc8dfc3def044",
+ "fbaf04f1a401335be57e172f4326102c658d857fde6cf2bc987520d11fc99770",
+ "57acf2aa5ac736337b120c951536c8a2b2cb23a4f0f198e86f3433370fa63105",
+ "dd7fcba4c13b6ced0b6190cdb5861adcd08446a92d67f7ec0f02f9533e09bbb0",
+ "744ef481c9b13ebd3b6e43d7e9ba25f7c7a5c8e453e6f0d50f5d71aae1591689",
+ "2c573142f1edd52b64dcd42a9c3b0ca5c9c615f757d80d25bfb02ff3eb2257e2",
+ "ea65cc58ef8520cd0335de4318a0d3b3a1ac257b7e9f82e12483fa3bce6cc0cd",
+ "1dfa626ff1523b82e21a4c29476edcdc9a89842f3c7181f63a28cd4f46cc9923",
+ "aa1153e71af836121e6f6cc716cf64880c19221d8dc367ff42359de1b8ef30e9",
+ "a7c6ec6f6569e22d2fa6e8281639d27c59b633ea00ad8ef27a43171cc985fbda",
+ "627b706d63d2cfd5a388deeaa76655ef09146fe492ee17cb0043578cef9c2800",
+ "d40eaf091ef8357b734d1047a552436eaf057d99a0c6f2068b097c324099d360",
+ "87f0ef81641da4fd3438dcaae4819f0c92a0ade54e262b21f9ded4575ff3f234",
+ "3a00a29e08d29454b5197662f70ccab5699b0ce8c85af7fbf511b8915d97cfd0",
+ }
+
+ goodShas := sha1Data
+ badShas := sha256Data
+ if hash.CryptoType == crypto.SHA256 {
+ goodShas = sha256Data
+ badShas = sha1Data
+ }
+ chainData := strings.Join(goodShas, "\n") + "\n"
+
+ chainReader := strings.NewReader(chainData)
+
+ chain, err := commitgraph.OpenChainFile(chainReader)
+ c.Assert(err, IsNil)
+ c.Assert(goodShas, DeepEquals, chain)
+
+ // Test with bad shas
+ chainData = strings.Join(badShas, "\n") + "\n"
+
+ chainReader = strings.NewReader(chainData)
+
+ chain, err = commitgraph.OpenChainFile(chainReader)
+ c.Assert(err, Equals, commitgraph.ErrMalformedCommitGraphFile)
+ c.Assert(chain, IsNil)
+
+ // Test with empty file
+ emptyChainReader := bytes.NewReader(nil)
+
+ chain, err = commitgraph.OpenChainFile(emptyChainReader)
+ c.Assert(err, IsNil)
+ c.Assert(chain, DeepEquals, []string{})
+
+ // Test with file containing only newlines
+ newlineChainData := []byte("\n\n\n")
+ newlineChainReader := bytes.NewReader(newlineChainData)
+
+ chain, err = commitgraph.OpenChainFile(newlineChainReader)
+ c.Assert(err, Equals, commitgraph.ErrMalformedCommitGraphFile)
+ c.Assert(chain, IsNil)
+}
diff --git a/plumbing/format/commitgraph/v2/chunk.go b/plumbing/format/commitgraph/v2/chunk.go
new file mode 100644
index 0000000..ab24320
--- /dev/null
+++ b/plumbing/format/commitgraph/v2/chunk.go
@@ -0,0 +1,48 @@
+package v2
+
+import "bytes"
+
+const (
+ chunkSigLen = 4 // Length of a chunk signature
+ chunkSigOffset = 4 // Offset of each chunk signature in chunkSignatures
+)
+
+// chunkSignatures contains the coalesced byte signatures for each chunk type.
+// The order of the signatures must match the order of the ChunkType constants.
+// (When adding new chunk types you must avoid introducing ambiguity, and you may need to add padding separators to this list or reorder these signatures.)
+// (i.e. it would not be possible to add a new chunk type with the signature "IDFO" without some reordering or the addition of separators.)
+var chunkSignatures = []byte("OIDFOIDLCDATGDA2GDO2EDGEBIDXBDATBASE\000\000\000\000")
+
+// ChunkType represents the type of a chunk in the commit graph file.
+type ChunkType int
+
+const (
+ OIDFanoutChunk ChunkType = iota // "OIDF"
+ OIDLookupChunk // "OIDL"
+ CommitDataChunk // "CDAT"
+ GenerationDataChunk // "GDA2"
+ GenerationDataOverflowChunk // "GDO2"
+ ExtraEdgeListChunk // "EDGE"
+ BloomFilterIndexChunk // "BIDX"
+ BloomFilterDataChunk // "BDAT"
+ BaseGraphsListChunk // "BASE"
+ ZeroChunk // "\000\000\000\000"
+)
+
+// Signature returns the byte signature for the chunk type.
+func (ct ChunkType) Signature() []byte {
+ if ct >= BaseGraphsListChunk || ct < 0 { // not a valid chunk type just return ZeroChunk
+ return chunkSignatures[ZeroChunk*chunkSigOffset : ZeroChunk*chunkSigOffset+chunkSigLen]
+ }
+
+ return chunkSignatures[ct*chunkSigOffset : ct*chunkSigOffset+chunkSigLen]
+}
+
+// ChunkTypeFromBytes returns the chunk type for the given byte signature.
+func ChunkTypeFromBytes(b []byte) (ChunkType, bool) {
+ idx := bytes.Index(chunkSignatures, b)
+ if idx == -1 || idx%chunkSigOffset != 0 { // not found, or not aligned at chunkSigOffset
+ return -1, false
+ }
+ return ChunkType(idx / chunkSigOffset), true
+}
diff --git a/plumbing/format/commitgraph/v2/commitgraph.go b/plumbing/format/commitgraph/v2/commitgraph.go
new file mode 100644
index 0000000..7c67b63
--- /dev/null
+++ b/plumbing/format/commitgraph/v2/commitgraph.go
@@ -0,0 +1,40 @@
+package v2
+
+import (
+ "io"
+ "time"
+
+ "github.com/go-git/go-git/v5/plumbing"
+)
+
+// CommitData is a reduced representation of Commit as presented in the commit graph
+// file. It is merely useful as an optimization for walking the commit graphs.
+type CommitData struct {
+ // TreeHash is the hash of the root tree of the commit.
+ TreeHash plumbing.Hash
+ // ParentIndexes are the indexes of the parent commits of the commit.
+ ParentIndexes []uint32
+ // ParentHashes are the hashes of the parent commits of the commit.
+ ParentHashes []plumbing.Hash
+ // Generation number is the pre-computed generation in the commit graph
+ // or zero if not available.
+ Generation uint64
+ // When is the timestamp of the commit.
+ When time.Time
+}
+
+// Index represents a representation of commit graph that allows indexed
+// access to the nodes using commit object hash
+type Index interface {
+ // GetIndexByHash gets the index in the commit graph from commit hash, if available
+ GetIndexByHash(h plumbing.Hash) (uint32, error)
+ // GetHashByIndex gets the hash given an index in the commit graph
+ GetHashByIndex(i uint32) (plumbing.Hash, error)
+ // GetNodeByIndex gets the commit node from the commit graph using index
+ // obtained from child node, if available
+ GetCommitDataByIndex(i uint32) (*CommitData, error)
+ // Hashes returns all the hashes that are available in the index
+ Hashes() []plumbing.Hash
+
+ io.Closer
+}
diff --git a/plumbing/format/commitgraph/v2/commitgraph_test.go b/plumbing/format/commitgraph/v2/commitgraph_test.go
new file mode 100644
index 0000000..69fdcd9
--- /dev/null
+++ b/plumbing/format/commitgraph/v2/commitgraph_test.go
@@ -0,0 +1,165 @@
+package v2_test
+
+import (
+ "os"
+ "testing"
+
+ "github.com/go-git/go-billy/v5"
+ "github.com/go-git/go-billy/v5/util"
+ "github.com/go-git/go-git/v5/plumbing"
+ commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2"
+
+ fixtures "github.com/go-git/go-git-fixtures/v4"
+ . "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) { TestingT(t) }
+
+type CommitgraphSuite struct {
+ fixtures.Suite
+}
+
+var _ = Suite(&CommitgraphSuite{})
+
+func testReadIndex(c *C, fs billy.Filesystem, path string) commitgraph.Index {
+ reader, err := fs.Open(path)
+ c.Assert(err, IsNil)
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+ c.Assert(index, NotNil)
+ return index
+}
+
+func testDecodeHelper(c *C, index commitgraph.Index) {
+ // Root commit
+ nodeIndex, err := index.GetIndexByHash(plumbing.NewHash("347c91919944a68e9413581a1bc15519550a3afe"))
+ c.Assert(err, IsNil)
+ commitData, err := index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 0)
+ c.Assert(len(commitData.ParentHashes), Equals, 0)
+
+ // Regular commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("e713b52d7e13807e87a002e812041f248db3f643"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 1)
+ c.Assert(len(commitData.ParentHashes), Equals, 1)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "347c91919944a68e9413581a1bc15519550a3afe")
+
+ // Merge commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("b29328491a0682c259bcce28741eac71f3499f7d"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 2)
+ c.Assert(len(commitData.ParentHashes), Equals, 2)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
+ c.Assert(commitData.ParentHashes[1].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
+
+ // Octopus merge commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 3)
+ c.Assert(len(commitData.ParentHashes), Equals, 3)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "ce275064ad67d51e99f026084e20827901a8361c")
+ c.Assert(commitData.ParentHashes[1].String(), Equals, "bb13916df33ed23004c3ce9ed3b8487528e655c1")
+ c.Assert(commitData.ParentHashes[2].String(), Equals, "a45273fe2d63300e1962a9e26a6b15c276cd7082")
+
+ // Check all hashes
+ hashes := index.Hashes()
+ c.Assert(len(hashes), Equals, 11)
+ c.Assert(hashes[0].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
+ c.Assert(hashes[10].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
+}
+
+func (s *CommitgraphSuite) TestDecode(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ index := testReadIndex(c, dotgit, dotgit.Join("objects", "info", "commit-graph"))
+ defer index.Close()
+ testDecodeHelper(c, index)
+ })
+}
+
+func (s *CommitgraphSuite) TestDecodeChain(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ index, err := commitgraph.OpenChainOrFileIndex(dotgit)
+ c.Assert(err, IsNil)
+ defer index.Close()
+ testDecodeHelper(c, index)
+ })
+
+ fixtures.ByTag("commit-graph-chain").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ index, err := commitgraph.OpenChainOrFileIndex(dotgit)
+ c.Assert(err, IsNil)
+ defer index.Close()
+ testDecodeHelper(c, index)
+ })
+}
+
+func (s *CommitgraphSuite) TestReencode(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+
+ reader, err := dotgit.Open(dotgit.Join("objects", "info", "commit-graph"))
+ c.Assert(err, IsNil)
+ defer reader.Close()
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+ defer index.Close()
+
+ writer, err := util.TempFile(dotgit, "", "commit-graph")
+ c.Assert(err, IsNil)
+ tmpName := writer.Name()
+ defer os.Remove(tmpName)
+
+ encoder := commitgraph.NewEncoder(writer)
+ err = encoder.Encode(index)
+ c.Assert(err, IsNil)
+ writer.Close()
+
+ tmpIndex := testReadIndex(c, dotgit, tmpName)
+ defer tmpIndex.Close()
+ testDecodeHelper(c, tmpIndex)
+ })
+}
+
+func (s *CommitgraphSuite) TestReencodeInMemory(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+
+ reader, err := dotgit.Open(dotgit.Join("objects", "info", "commit-graph"))
+ c.Assert(err, IsNil)
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+
+ memoryIndex := commitgraph.NewMemoryIndex()
+ defer memoryIndex.Close()
+ for i, hash := range index.Hashes() {
+ commitData, err := index.GetCommitDataByIndex(uint32(i))
+ c.Assert(err, IsNil)
+ memoryIndex.Add(hash, commitData)
+ }
+ index.Close()
+
+ writer, err := util.TempFile(dotgit, "", "commit-graph")
+ c.Assert(err, IsNil)
+ tmpName := writer.Name()
+ defer os.Remove(tmpName)
+
+ encoder := commitgraph.NewEncoder(writer)
+ err = encoder.Encode(memoryIndex)
+ c.Assert(err, IsNil)
+ writer.Close()
+
+ tmpIndex := testReadIndex(c, dotgit, tmpName)
+ defer tmpIndex.Close()
+ testDecodeHelper(c, tmpIndex)
+ })
+}
diff --git a/plumbing/format/commitgraph/v2/doc.go b/plumbing/format/commitgraph/v2/doc.go
new file mode 100644
index 0000000..157621d
--- /dev/null
+++ b/plumbing/format/commitgraph/v2/doc.go
@@ -0,0 +1,106 @@
+// Package v2 implements encoding and decoding of commit-graph files.
+//
+// This package was created to work around the issues of the incorrect types in
+// the commitgraph package.
+//
+// Git commit graph format
+// =======================
+//
+// The Git commit graph stores a list of commit OIDs and some associated
+// metadata, including:
+//
+// - The generation number of the commit. Commits with no parents have
+// generation number 1; commits with parents have generation number
+// one more than the maximum generation number of its parents. We
+// reserve zero as special, and can be used to mark a generation
+// number invalid or as "not computed".
+//
+// - The root tree OID.
+//
+// - The commit date.
+//
+// - The parents of the commit, stored using positional references within
+// the graph file.
+//
+// These positional references are stored as unsigned 32-bit integers
+// corresponding to the array position within the list of commit OIDs. Due
+// to some special constants we use to track parents, we can store at most
+// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits.
+//
+// == Commit graph files have the following format:
+//
+// In order to allow extensions that add extra data to the graph, we organize
+// the body into "chunks" and provide a binary lookup table at the beginning
+// of the body. The header includes certain values, such as number of chunks
+// and hash type.
+//
+// All 4-byte numbers are in network order.
+//
+// HEADER:
+//
+// 4-byte signature:
+// The signature is: {'C', 'G', 'P', 'H'}
+//
+// 1-byte version number:
+// Currently, the only valid version is 1.
+//
+// 1-byte Hash Version (1 = SHA-1)
+// We infer the hash length (H) from this value.
+//
+// 1-byte number (C) of "chunks"
+//
+// 1-byte (reserved for later use)
+// Current clients should ignore this value.
+//
+// CHUNK LOOKUP:
+//
+// (C + 1) * 12 bytes listing the table of contents for the chunks:
+// First 4 bytes describe the chunk id. Value 0 is a terminating label.
+// Other 8 bytes provide the byte-offset in current file for chunk to
+// start. (Chunks are ordered contiguously in the file, so you can infer
+// the length using the next chunk position if necessary.) Each chunk
+// ID appears at most once.
+//
+// The remaining data in the body is described one chunk at a time, and
+// these chunks may be given in any order. Chunks are required unless
+// otherwise specified.
+//
+// CHUNK DATA:
+//
+// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes)
+// The ith entry, F[i], stores the number of OIDs with first
+// byte at most i. Thus F[255] stores the total
+// number of commits (N).
+//
+// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes)
+// The OIDs for all commits in the graph, sorted in ascending order.
+//
+// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes)
+// * The first H bytes are for the OID of the root tree.
+// * The next 8 bytes are for the positions of the first two parents
+// of the ith commit. Stores value 0x7000000 if no parent in that
+// position. If there are more than two parents, the second value
+// has its most-significant bit on and the other bits store an array
+// position into the Extra Edge List chunk.
+// * The next 8 bytes store the generation number of the commit and
+// the commit time in seconds since EPOCH. The generation number
+// uses the higher 30 bits of the first 4 bytes, while the commit
+// time uses the 32 bits of the second 4 bytes, along with the lowest
+// 2 bits of the lowest byte, storing the 33rd and 34th bit of the
+// commit time.
+//
+// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional]
+// This list of 4-byte values store the second through nth parents for
+// all octopus merges. The second parent value in the commit data stores
+// an array position within this list along with the most-significant bit
+// on. Starting at that array position, iterate through this list of commit
+// positions for the parents until reaching a value with the most-significant
+// bit on. The other bits correspond to the position of the last parent.
+//
+// TRAILER:
+//
+// H-byte HASH-checksum of all of the above.
+//
+// Source:
+// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt
+package v2
diff --git a/plumbing/format/commitgraph/v2/encoder.go b/plumbing/format/commitgraph/v2/encoder.go
new file mode 100644
index 0000000..d1e41f8
--- /dev/null
+++ b/plumbing/format/commitgraph/v2/encoder.go
@@ -0,0 +1,192 @@
+package v2
+
+import (
+ "crypto"
+ "io"
+
+ "github.com/go-git/go-git/v5/plumbing"
+ "github.com/go-git/go-git/v5/plumbing/hash"
+ "github.com/go-git/go-git/v5/utils/binary"
+)
+
+// Encoder writes MemoryIndex structs to an output stream.
+type Encoder struct {
+ io.Writer
+ hash hash.Hash
+}
+
+// NewEncoder returns a new stream encoder that writes to w.
+func NewEncoder(w io.Writer) *Encoder {
+ h := hash.New(hash.CryptoType)
+ mw := io.MultiWriter(w, h)
+ return &Encoder{mw, h}
+}
+
+// Encode writes an index into the commit-graph file
+func (e *Encoder) Encode(idx Index) error {
+ // Get all the hashes in the input index
+ hashes := idx.Hashes()
+
+ // Sort the inout and prepare helper structures we'll need for encoding
+ hashToIndex, fanout, extraEdgesCount := e.prepare(idx, hashes)
+
+ chunkSignatures := [][]byte{OIDFanoutChunk.Signature(), OIDLookupChunk.Signature(), CommitDataChunk.Signature()}
+ chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * hash.Size, uint64(len(hashes)) * (hash.Size + commitDataSize)}
+ if extraEdgesCount > 0 {
+ chunkSignatures = append(chunkSignatures, ExtraEdgeListChunk.Signature())
+ chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*4)
+ }
+
+ if err := e.encodeFileHeader(len(chunkSignatures)); err != nil {
+ return err
+ }
+ if err := e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil {
+ return err
+ }
+ if err := e.encodeFanout(fanout); err != nil {
+ return err
+ }
+ if err := e.encodeOidLookup(hashes); err != nil {
+ return err
+ }
+ if extraEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil {
+ if err = e.encodeExtraEdges(extraEdges); err != nil {
+ return err
+ }
+ } else {
+ return err
+ }
+
+ return e.encodeChecksum()
+}
+
+func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, extraEdgesCount uint32) {
+ // Sort the hashes and build our index
+ plumbing.HashesSort(hashes)
+ hashToIndex = make(map[plumbing.Hash]uint32)
+ fanout = make([]uint32, 256)
+ for i, hash := range hashes {
+ hashToIndex[hash] = uint32(i)
+ fanout[hash[0]]++
+ }
+
+ // Convert the fanout to cumulative values
+ for i := 1; i <= 0xff; i++ {
+ fanout[i] += fanout[i-1]
+ }
+
+ // Find out if we will need extra edge table
+ for i := 0; i < len(hashes); i++ {
+ v, _ := idx.GetCommitDataByIndex(uint32(i))
+ if len(v.ParentHashes) > 2 {
+ extraEdgesCount += uint32(len(v.ParentHashes) - 1)
+ break
+ }
+ }
+
+ return
+}
+
+func (e *Encoder) encodeFileHeader(chunkCount int) (err error) {
+ if _, err = e.Write(commitFileSignature); err == nil {
+ version := byte(1)
+ if hash.CryptoType == crypto.SHA256 {
+ version = byte(2)
+ }
+ _, err = e.Write([]byte{1, version, byte(chunkCount), 0})
+ }
+ return
+}
+
+func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) {
+ // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator
+ offset := uint64(8 + len(chunkSignatures)*12 + 12)
+ for i, signature := range chunkSignatures {
+ if _, err = e.Write(signature); err == nil {
+ err = binary.WriteUint64(e, offset)
+ }
+ if err != nil {
+ return
+ }
+ offset += chunkSizes[i]
+ }
+ if _, err = e.Write(ZeroChunk.Signature()); err == nil {
+ err = binary.WriteUint64(e, offset)
+ }
+ return
+}
+
+func (e *Encoder) encodeFanout(fanout []uint32) (err error) {
+ for i := 0; i <= 0xff; i++ {
+ if err = binary.WriteUint32(e, fanout[i]); err != nil {
+ return
+ }
+ }
+ return
+}
+
+func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) {
+ for _, hash := range hashes {
+ if _, err = e.Write(hash[:]); err != nil {
+ return err
+ }
+ }
+ return
+}
+
+func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (extraEdges []uint32, err error) {
+ for _, hash := range hashes {
+ origIndex, _ := idx.GetIndexByHash(hash)
+ commitData, _ := idx.GetCommitDataByIndex(origIndex)
+ if _, err = e.Write(commitData.TreeHash[:]); err != nil {
+ return
+ }
+
+ var parent1, parent2 uint32
+ if len(commitData.ParentHashes) == 0 {
+ parent1 = parentNone
+ parent2 = parentNone
+ } else if len(commitData.ParentHashes) == 1 {
+ parent1 = hashToIndex[commitData.ParentHashes[0]]
+ parent2 = parentNone
+ } else if len(commitData.ParentHashes) == 2 {
+ parent1 = hashToIndex[commitData.ParentHashes[0]]
+ parent2 = hashToIndex[commitData.ParentHashes[1]]
+ } else if len(commitData.ParentHashes) > 2 {
+ parent1 = hashToIndex[commitData.ParentHashes[0]]
+ parent2 = uint32(len(extraEdges)) | parentOctopusUsed
+ for _, parentHash := range commitData.ParentHashes[1:] {
+ extraEdges = append(extraEdges, hashToIndex[parentHash])
+ }
+ extraEdges[len(extraEdges)-1] |= parentLast
+ }
+
+ if err = binary.WriteUint32(e, parent1); err == nil {
+ err = binary.WriteUint32(e, parent2)
+ }
+ if err != nil {
+ return
+ }
+
+ unixTime := uint64(commitData.When.Unix())
+ unixTime |= uint64(commitData.Generation) << 34
+ if err = binary.WriteUint64(e, unixTime); err != nil {
+ return
+ }
+ }
+ return
+}
+
+func (e *Encoder) encodeExtraEdges(extraEdges []uint32) (err error) {
+ for _, parent := range extraEdges {
+ if err = binary.WriteUint32(e, parent); err != nil {
+ return
+ }
+ }
+ return
+}
+
+func (e *Encoder) encodeChecksum() error {
+ _, err := e.Write(e.hash.Sum(nil)[:hash.Size])
+ return err
+}
diff --git a/plumbing/format/commitgraph/v2/file.go b/plumbing/format/commitgraph/v2/file.go
new file mode 100644
index 0000000..69e0250
--- /dev/null
+++ b/plumbing/format/commitgraph/v2/file.go
@@ -0,0 +1,338 @@
+package v2
+
+import (
+ "bytes"
+ "crypto"
+ encbin "encoding/binary"
+ "errors"
+ "io"
+ "time"
+
+ "github.com/go-git/go-git/v5/plumbing"
+ "github.com/go-git/go-git/v5/plumbing/hash"
+ "github.com/go-git/go-git/v5/utils/binary"
+)
+
+var (
+ // ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph
+ // file version is not supported.
+ ErrUnsupportedVersion = errors.New("unsupported version")
+ // ErrUnsupportedHash is returned by OpenFileIndex when the commit graph
+ // hash function is not supported. Currently only SHA-1 is defined and
+ // supported.
+ ErrUnsupportedHash = errors.New("unsupported hash algorithm")
+ // ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit
+ // graph file is corrupted.
+ ErrMalformedCommitGraphFile = errors.New("malformed commit graph file")
+
+ commitFileSignature = []byte{'C', 'G', 'P', 'H'}
+
+ parentNone = uint32(0x70000000)
+ parentOctopusUsed = uint32(0x80000000)
+ parentOctopusMask = uint32(0x7fffffff)
+ parentLast = uint32(0x80000000)
+)
+
+const (
+ commitDataSize = 16
+)
+
+type fileIndex struct {
+ reader ReaderAtCloser
+ fanout [256]uint32
+ offsets [9]int64
+ parent Index
+}
+
+// ReaderAtCloser is an interface that combines io.ReaderAt and io.Closer.
+type ReaderAtCloser interface {
+ io.ReaderAt
+ io.Closer
+}
+
+// OpenFileIndex opens a serialized commit graph file in the format described at
+// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt
+func OpenFileIndex(reader ReaderAtCloser) (Index, error) {
+ return OpenFileIndexWithParent(reader, nil)
+}
+
+// OpenFileIndexWithParent opens a serialized commit graph file in the format described at
+// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt
+func OpenFileIndexWithParent(reader ReaderAtCloser, parent Index) (Index, error) {
+ if reader == nil {
+ return nil, io.ErrUnexpectedEOF
+ }
+ fi := &fileIndex{reader: reader, parent: parent}
+
+ if err := fi.verifyFileHeader(); err != nil {
+ return nil, err
+ }
+ if err := fi.readChunkHeaders(); err != nil {
+ return nil, err
+ }
+ if err := fi.readFanout(); err != nil {
+ return nil, err
+ }
+
+ return fi, nil
+}
+
+// Close closes the underlying reader and the parent index if it exists.
+func (fi *fileIndex) Close() (err error) {
+ if fi.parent != nil {
+ defer func() {
+ parentErr := fi.parent.Close()
+ // only report the error from the parent if there is no error from the reader
+ if err == nil {
+ err = parentErr
+ }
+ }()
+ }
+ err = fi.reader.Close()
+ return
+}
+
+func (fi *fileIndex) verifyFileHeader() error {
+ // Verify file signature
+ signature := make([]byte, 4)
+ if _, err := fi.reader.ReadAt(signature, 0); err != nil {
+ return err
+ }
+ if !bytes.Equal(signature, commitFileSignature) {
+ return ErrMalformedCommitGraphFile
+ }
+
+ // Read and verify the file header
+ header := make([]byte, 4)
+ if _, err := fi.reader.ReadAt(header, 4); err != nil {
+ return err
+ }
+ if header[0] != 1 {
+ return ErrUnsupportedVersion
+ }
+ if !(hash.CryptoType == crypto.SHA1 && header[1] == 1) &&
+ !(hash.CryptoType == crypto.SHA256 && header[1] == 2) {
+ // Unknown hash type / unsupported hash type
+ return ErrUnsupportedHash
+ }
+
+ return nil
+}
+
+func (fi *fileIndex) readChunkHeaders() error {
+ chunkID := make([]byte, 4)
+ for i := 0; ; i++ {
+ chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12)
+ if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil {
+ return err
+ }
+ chunkOffset, err := binary.ReadUint64(chunkHeader)
+ if err != nil {
+ return err
+ }
+
+ chunkType, ok := ChunkTypeFromBytes(chunkID)
+ if !ok {
+ continue
+ }
+ if chunkType == ZeroChunk || int(chunkType) >= len(fi.offsets) {
+ break
+ }
+ fi.offsets[chunkType] = int64(chunkOffset)
+ }
+
+ if fi.offsets[OIDFanoutChunk] <= 0 || fi.offsets[OIDLookupChunk] <= 0 || fi.offsets[CommitDataChunk] <= 0 {
+ return ErrMalformedCommitGraphFile
+ }
+
+ return nil
+}
+
+func (fi *fileIndex) readFanout() error {
+ fanoutReader := io.NewSectionReader(fi.reader, fi.offsets[OIDFanoutChunk], 256*4)
+ for i := 0; i < 256; i++ {
+ fanoutValue, err := binary.ReadUint32(fanoutReader)
+ if err != nil {
+ return err
+ }
+ if fanoutValue > 0x7fffffff {
+ return ErrMalformedCommitGraphFile
+ }
+ fi.fanout[i] = fanoutValue
+ }
+ return nil
+}
+
+// GetIndexByHash looks up the provided hash in the commit-graph fanout and returns the index of the commit data for the given hash.
+func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (uint32, error) {
+ var oid plumbing.Hash
+
+ // Find the hash in the oid lookup table
+ var low uint32
+ if h[0] == 0 {
+ low = 0
+ } else {
+ low = fi.fanout[h[0]-1]
+ }
+ high := fi.fanout[h[0]]
+ for low < high {
+ mid := (low + high) >> 1
+ offset := fi.offsets[OIDLookupChunk] + int64(mid)*hash.Size
+ if _, err := fi.reader.ReadAt(oid[:], offset); err != nil {
+ return 0, err
+ }
+ cmp := bytes.Compare(h[:], oid[:])
+ if cmp < 0 {
+ high = mid
+ } else if cmp == 0 {
+ return mid, nil
+ } else {
+ low = mid + 1
+ }
+ }
+
+ if fi.parent != nil {
+ idx, err := fi.parent.GetIndexByHash(h)
+ if err != nil {
+ return 0, err
+ }
+ return idx + fi.fanout[0xff], nil
+ }
+
+ return 0, plumbing.ErrObjectNotFound
+}
+
+// GetCommitDataByIndex returns the commit data for the given index in the commit-graph.
+func (fi *fileIndex) GetCommitDataByIndex(idx uint32) (*CommitData, error) {
+ if idx >= fi.fanout[0xff] {
+ if fi.parent != nil {
+ data, err := fi.parent.GetCommitDataByIndex(idx - fi.fanout[0xff])
+ if err != nil {
+ return nil, err
+ }
+ for i := range data.ParentIndexes {
+ data.ParentIndexes[i] += fi.fanout[0xff]
+ }
+ return data, nil
+ }
+
+ return nil, plumbing.ErrObjectNotFound
+ }
+
+ offset := fi.offsets[CommitDataChunk] + int64(idx)*(hash.Size+commitDataSize)
+ commitDataReader := io.NewSectionReader(fi.reader, offset, hash.Size+commitDataSize)
+
+ treeHash, err := binary.ReadHash(commitDataReader)
+ if err != nil {
+ return nil, err
+ }
+ parent1, err := binary.ReadUint32(commitDataReader)
+ if err != nil {
+ return nil, err
+ }
+ parent2, err := binary.ReadUint32(commitDataReader)
+ if err != nil {
+ return nil, err
+ }
+ genAndTime, err := binary.ReadUint64(commitDataReader)
+ if err != nil {
+ return nil, err
+ }
+
+ var parentIndexes []uint32
+ if parent2&parentOctopusUsed == parentOctopusUsed {
+ // Octopus merge
+ parentIndexes = []uint32{parent1 & parentOctopusMask}
+ offset := fi.offsets[ExtraEdgeListChunk] + 4*int64(parent2&parentOctopusMask)
+ buf := make([]byte, 4)
+ for {
+ _, err := fi.reader.ReadAt(buf, offset)
+ if err != nil {
+ return nil, err
+ }
+
+ parent := encbin.BigEndian.Uint32(buf)
+ offset += 4
+ parentIndexes = append(parentIndexes, parent&parentOctopusMask)
+ if parent&parentLast == parentLast {
+ break
+ }
+ }
+ } else if parent2 != parentNone {
+ parentIndexes = []uint32{parent1 & parentOctopusMask, parent2 & parentOctopusMask}
+ } else if parent1 != parentNone {
+ parentIndexes = []uint32{parent1 & parentOctopusMask}
+ }
+
+ parentHashes, err := fi.getHashesFromIndexes(parentIndexes)
+ if err != nil {
+ return nil, err
+ }
+
+ return &CommitData{
+ TreeHash: treeHash,
+ ParentIndexes: parentIndexes,
+ ParentHashes: parentHashes,
+ Generation: genAndTime >> 34,
+ When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0),
+ }, nil
+}
+
+// GetHashByIndex looks up the hash for the given index in the commit-graph.
+func (fi *fileIndex) GetHashByIndex(idx uint32) (found plumbing.Hash, err error) {
+ if idx >= fi.fanout[0xff] {
+ if fi.parent != nil {
+ return fi.parent.GetHashByIndex(idx - fi.fanout[0xff])
+ }
+ return found, ErrMalformedCommitGraphFile
+ }
+
+ offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size
+ if _, err := fi.reader.ReadAt(found[:], offset); err != nil {
+ return found, err
+ }
+
+ return found, nil
+}
+
+func (fi *fileIndex) getHashesFromIndexes(indexes []uint32) ([]plumbing.Hash, error) {
+ hashes := make([]plumbing.Hash, len(indexes))
+
+ for i, idx := range indexes {
+ if idx >= fi.fanout[0xff] {
+ if fi.parent != nil {
+ hash, err := fi.parent.GetHashByIndex(idx - fi.fanout[0xff])
+ if err != nil {
+ return nil, err
+ }
+ hashes[i] = hash
+ continue
+ }
+
+ return nil, ErrMalformedCommitGraphFile
+ }
+
+ offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size
+ if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil {
+ return nil, err
+ }
+ }
+
+ return hashes, nil
+}
+
+// Hashes returns all the hashes that are available in the index.
+func (fi *fileIndex) Hashes() []plumbing.Hash {
+ hashes := make([]plumbing.Hash, fi.fanout[0xff])
+ for i := uint32(0); i < fi.fanout[0xff]; i++ {
+ offset := fi.offsets[OIDLookupChunk] + int64(i)*hash.Size
+ if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < hash.Size {
+ return nil
+ }
+ }
+ if fi.parent != nil {
+ parentHashes := fi.parent.Hashes()
+ hashes = append(hashes, parentHashes...)
+ }
+ return hashes
+}
diff --git a/plumbing/format/commitgraph/v2/memory.go b/plumbing/format/commitgraph/v2/memory.go
new file mode 100644
index 0000000..ab7ddfa
--- /dev/null
+++ b/plumbing/format/commitgraph/v2/memory.go
@@ -0,0 +1,91 @@
+package v2
+
+import (
+ "github.com/go-git/go-git/v5/plumbing"
+)
+
+// MemoryIndex provides a way to build the commit-graph in memory
+// for later encoding to file.
+type MemoryIndex struct {
+ commitData []commitData
+ indexMap map[plumbing.Hash]uint32
+}
+
+type commitData struct {
+ Hash plumbing.Hash
+ *CommitData
+}
+
+// NewMemoryIndex creates in-memory commit graph representation
+func NewMemoryIndex() *MemoryIndex {
+ return &MemoryIndex{
+ indexMap: make(map[plumbing.Hash]uint32),
+ }
+}
+
+// GetIndexByHash gets the index in the commit graph from commit hash, if available
+func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (uint32, error) {
+ i, ok := mi.indexMap[h]
+ if ok {
+ return i, nil
+ }
+
+ return 0, plumbing.ErrObjectNotFound
+}
+
+// GetHashByIndex gets the hash given an index in the commit graph
+func (mi *MemoryIndex) GetHashByIndex(i uint32) (plumbing.Hash, error) {
+ if i >= uint32(len(mi.commitData)) {
+ return plumbing.ZeroHash, plumbing.ErrObjectNotFound
+ }
+
+ return mi.commitData[i].Hash, nil
+}
+
+// GetCommitDataByIndex gets the commit node from the commit graph using index
+// obtained from child node, if available
+func (mi *MemoryIndex) GetCommitDataByIndex(i uint32) (*CommitData, error) {
+ if i >= uint32(len(mi.commitData)) {
+ return nil, plumbing.ErrObjectNotFound
+ }
+
+ commitData := mi.commitData[i]
+
+ // Map parent hashes to parent indexes
+ if commitData.ParentIndexes == nil {
+ parentIndexes := make([]uint32, len(commitData.ParentHashes))
+ for i, parentHash := range commitData.ParentHashes {
+ var err error
+ if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil {
+ return nil, err
+ }
+ }
+ commitData.ParentIndexes = parentIndexes
+ }
+
+ return commitData.CommitData, nil
+}
+
+// Hashes returns all the hashes that are available in the index
+func (mi *MemoryIndex) Hashes() []plumbing.Hash {
+ hashes := make([]plumbing.Hash, 0, len(mi.indexMap))
+ for k := range mi.indexMap {
+ hashes = append(hashes, k)
+ }
+ return hashes
+}
+
+// Add adds new node to the memory index
+func (mi *MemoryIndex) Add(hash plumbing.Hash, data *CommitData) {
+ // The parent indexes are calculated lazily in GetNodeByIndex
+ // which allows adding nodes out of order as long as all parents
+ // are eventually resolved
+ data.ParentIndexes = nil
+ mi.indexMap[hash] = uint32(len(mi.commitData))
+ mi.commitData = append(mi.commitData, commitData{Hash: hash, CommitData: data})
+}
+
+// Close closes the index
+func (mi *MemoryIndex) Close() error {
+ return nil
+}