From 946bb8183643bdda90810fc48e450a008894b244 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sat, 30 Sep 2023 11:41:23 +0100 Subject: plumbing: commitgraph, fix types and handle commit-graph-chains Unfortunately the original variant makes some incorrect typing assumptions about commit-graphs which make handling graph chains difficult to do correctly. This creates a new subpackage and deprecates the old one. It then adds support commit graph chains. Signed-off-by: Andrew Thornton --- plumbing/format/commitgraph/v2/chain.go | 100 ++++++ plumbing/format/commitgraph/v2/chain_test.go | 100 ++++++ plumbing/format/commitgraph/v2/chunk.go | 48 +++ plumbing/format/commitgraph/v2/commitgraph.go | 40 +++ plumbing/format/commitgraph/v2/commitgraph_test.go | 165 ++++++++++ plumbing/format/commitgraph/v2/doc.go | 106 +++++++ plumbing/format/commitgraph/v2/encoder.go | 192 ++++++++++++ plumbing/format/commitgraph/v2/file.go | 338 +++++++++++++++++++++ plumbing/format/commitgraph/v2/memory.go | 91 ++++++ 9 files changed, 1180 insertions(+) create mode 100644 plumbing/format/commitgraph/v2/chain.go create mode 100644 plumbing/format/commitgraph/v2/chain_test.go create mode 100644 plumbing/format/commitgraph/v2/chunk.go create mode 100644 plumbing/format/commitgraph/v2/commitgraph.go create mode 100644 plumbing/format/commitgraph/v2/commitgraph_test.go create mode 100644 plumbing/format/commitgraph/v2/doc.go create mode 100644 plumbing/format/commitgraph/v2/encoder.go create mode 100644 plumbing/format/commitgraph/v2/file.go create mode 100644 plumbing/format/commitgraph/v2/memory.go (limited to 'plumbing/format/commitgraph/v2') diff --git a/plumbing/format/commitgraph/v2/chain.go b/plumbing/format/commitgraph/v2/chain.go new file mode 100644 index 0000000..8da60d0 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chain.go @@ -0,0 +1,100 @@ +package v2 + +import ( + "bufio" + "io" + "path" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-git/v5/plumbing" +) + +// OpenChainFile reads a commit chain file and returns a slice of the hashes within it +// +// Commit-Graph chains are described at https://git-scm.com/docs/commit-graph +// and are new line separated list of graph file hashes, oldest to newest. +// +// This function simply reads the file and returns the hashes as a slice. +func OpenChainFile(r io.Reader) ([]string, error) { + if r == nil { + return nil, io.ErrUnexpectedEOF + } + bufRd := bufio.NewReader(r) + chain := make([]string, 0, 8) + for { + line, err := bufRd.ReadSlice('\n') + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + + hashStr := string(line[:len(line)-1]) + if !plumbing.IsHash(hashStr) { + return nil, ErrMalformedCommitGraphFile + } + chain = append(chain, hashStr) + } + return chain, nil +} + +// OpenChainOrFileIndex expects a billy.Filesystem representing a .git directory. +// It will first attempt to read a commit-graph index file, before trying to read a +// commit-graph chain file and its index files. If neither are present, an error is returned. +// Otherwise an Index will be returned. +// +// See: https://git-scm.com/docs/commit-graph +func OpenChainOrFileIndex(fs billy.Filesystem) (Index, error) { + file, err := fs.Open(path.Join("objects", "info", "commit-graph")) + if err != nil { + // try to open a chain file + return OpenChainIndex(fs) + } + + index, err := OpenFileIndex(file) + if err != nil { + // Ignore any file closing errors and return the error from OpenFileIndex instead + _ = file.Close() + return nil, err + } + return index, nil +} + +// OpenChainIndex expects a billy.Filesystem representing a .git directory. +// It will read a commit-graph chain file and return a coalesced index. +// If the chain file or a graph in that chain is not present, an error is returned. +// +// See: https://git-scm.com/docs/commit-graph +func OpenChainIndex(fs billy.Filesystem) (Index, error) { + chainFile, err := fs.Open(path.Join("objects", "info", "commit-graphs", "commit-graph-chain")) + if err != nil { + return nil, err + } + + chain, err := OpenChainFile(chainFile) + _ = chainFile.Close() + if err != nil { + return nil, err + } + + var index Index + for _, hash := range chain { + + file, err := fs.Open(path.Join("objects", "info", "commit-graphs", "graph-"+hash+".graph")) + if err != nil { + // Ignore all other file closing errors and return the error from opening the last file in the graph + _ = index.Close() + return nil, err + } + + index, err = OpenFileIndexWithParent(file, index) + if err != nil { + // Ignore file closing errors and return the error from OpenFileIndex instead + _ = index.Close() + return nil, err + } + } + + return index, nil +} diff --git a/plumbing/format/commitgraph/v2/chain_test.go b/plumbing/format/commitgraph/v2/chain_test.go new file mode 100644 index 0000000..32ffd69 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chain_test.go @@ -0,0 +1,100 @@ +package v2_test + +import ( + "bytes" + "crypto" + "strings" + + commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2" + "github.com/go-git/go-git/v5/plumbing/hash" + + . "gopkg.in/check.v1" +) + +func (s *CommitgraphSuite) TestOpenChainFile(c *C) { + sha1Data := []string{ + "c336d16298a017486c4164c40f8acb28afe64e84", + "31eae7b619d166c366bf5df4991f04ba8cebea0a", + "b977a025ca21e3b5ca123d8093bd7917694f6da7", + "d2a38b4a5965d529566566640519d03d2bd10f6c", + "35b585759cbf29f8ec428ef89da20705d59f99ec", + "c2bbf9fe8009b22d0f390f3c8c3f13937067590f", + "fc9f0643b21cfe571046e27e0c4565f3a1ee96c8", + "c088fd6a7e1a38e9d5a9815265cb575bb08d08ff", + "5fddbeb678bd2c36c5e5c891ab8f2b143ced5baf", + "5d7303c49ac984a9fec60523f2d5297682e16646", + } + + sha256Data := []string{ + "b9efda7160f2647e0974ca623f8a8f8e25fb6944f1b8f78f4db1bf07932de8eb", + "7095c59f8bf46e12c21d2d9da344cfe383fae18d26f3ae4d4ab7b71e3d0ddfae", + "25a395cb62f7656294e40a001ee19fefcdf3013d265dfcf4b744cd2549891dec", + "7fbd564813a82227507d9dd70f1fd21fc1f180223cd3f42e0c3090c9a8b6a7d0", + "aa95db1db2df91bd7200a892dd1c03bc2704c4793400d016b3ca08c148b0f7c1", + "2176988184b570565dc33823a02f474ad59f667a0e971c86063a7fea64776a87", + "d0afc0e64171140eb7902110f807a1beaa38a603d4312fd4bd14a5db2784ba62", + "2822136f60bfc58bbd9d624cc19fbef9f0fc0efe2a61729242e1e5f9b77fa3d0", + "6f207b5c43463af96bc38c43b0bf45275fa327e656a8bba8e7fc55c5ab6870d8", + "6cf33782619b6ff0af9c081e46323f423f8b49bf3d043887c0549bef47d60f55", + "60ea0753d2d4e828983528294be3f57e2a3ba37df4f59e3236133c9e2b17afc5", + "6b3c9f4ba5092e0807774097953ec6e9f58e8371d775bd8738a0fa98d728ba3d", + "c97cab8564054e30515dbe67dda4e14638aabf17b3f042d18dc8461cd098b362", + "9f7ece76fd2c9dae08e75176347efffc1446ad74af66004dd34680edb205dfb5", + "23e7a7e481b00571b63c2a7d0432f9733dd85d18a9841a3d7b96743100da5824", + "e684b1253fa8eb6572f35bab2fd3b6efecabf8472ede43497cd9c171973cc341", + "8b9f04080b0c40f7ad2a6bb5e5296cd6c06e730dffce87a0375ae7bd0f85f86e", + "384a745f3b14edc89526a98b96b3247b2b548541c755aadee7664352ed7f12ae", + "b68c8a82cd5b839917e1058570a0408819b81d16dbab81db118cc8dfc3def044", + "fbaf04f1a401335be57e172f4326102c658d857fde6cf2bc987520d11fc99770", + "57acf2aa5ac736337b120c951536c8a2b2cb23a4f0f198e86f3433370fa63105", + "dd7fcba4c13b6ced0b6190cdb5861adcd08446a92d67f7ec0f02f9533e09bbb0", + "744ef481c9b13ebd3b6e43d7e9ba25f7c7a5c8e453e6f0d50f5d71aae1591689", + "2c573142f1edd52b64dcd42a9c3b0ca5c9c615f757d80d25bfb02ff3eb2257e2", + "ea65cc58ef8520cd0335de4318a0d3b3a1ac257b7e9f82e12483fa3bce6cc0cd", + "1dfa626ff1523b82e21a4c29476edcdc9a89842f3c7181f63a28cd4f46cc9923", + "aa1153e71af836121e6f6cc716cf64880c19221d8dc367ff42359de1b8ef30e9", + "a7c6ec6f6569e22d2fa6e8281639d27c59b633ea00ad8ef27a43171cc985fbda", + "627b706d63d2cfd5a388deeaa76655ef09146fe492ee17cb0043578cef9c2800", + "d40eaf091ef8357b734d1047a552436eaf057d99a0c6f2068b097c324099d360", + "87f0ef81641da4fd3438dcaae4819f0c92a0ade54e262b21f9ded4575ff3f234", + "3a00a29e08d29454b5197662f70ccab5699b0ce8c85af7fbf511b8915d97cfd0", + } + + goodShas := sha1Data + badShas := sha256Data + if hash.CryptoType == crypto.SHA256 { + goodShas = sha256Data + badShas = sha1Data + } + chainData := strings.Join(goodShas, "\n") + "\n" + + chainReader := strings.NewReader(chainData) + + chain, err := commitgraph.OpenChainFile(chainReader) + c.Assert(err, IsNil) + c.Assert(goodShas, DeepEquals, chain) + + // Test with bad shas + chainData = strings.Join(badShas, "\n") + "\n" + + chainReader = strings.NewReader(chainData) + + chain, err = commitgraph.OpenChainFile(chainReader) + c.Assert(err, Equals, commitgraph.ErrMalformedCommitGraphFile) + c.Assert(chain, IsNil) + + // Test with empty file + emptyChainReader := bytes.NewReader(nil) + + chain, err = commitgraph.OpenChainFile(emptyChainReader) + c.Assert(err, IsNil) + c.Assert(chain, DeepEquals, []string{}) + + // Test with file containing only newlines + newlineChainData := []byte("\n\n\n") + newlineChainReader := bytes.NewReader(newlineChainData) + + chain, err = commitgraph.OpenChainFile(newlineChainReader) + c.Assert(err, Equals, commitgraph.ErrMalformedCommitGraphFile) + c.Assert(chain, IsNil) +} diff --git a/plumbing/format/commitgraph/v2/chunk.go b/plumbing/format/commitgraph/v2/chunk.go new file mode 100644 index 0000000..ab24320 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chunk.go @@ -0,0 +1,48 @@ +package v2 + +import "bytes" + +const ( + chunkSigLen = 4 // Length of a chunk signature + chunkSigOffset = 4 // Offset of each chunk signature in chunkSignatures +) + +// chunkSignatures contains the coalesced byte signatures for each chunk type. +// The order of the signatures must match the order of the ChunkType constants. +// (When adding new chunk types you must avoid introducing ambiguity, and you may need to add padding separators to this list or reorder these signatures.) +// (i.e. it would not be possible to add a new chunk type with the signature "IDFO" without some reordering or the addition of separators.) +var chunkSignatures = []byte("OIDFOIDLCDATGDA2GDO2EDGEBIDXBDATBASE\000\000\000\000") + +// ChunkType represents the type of a chunk in the commit graph file. +type ChunkType int + +const ( + OIDFanoutChunk ChunkType = iota // "OIDF" + OIDLookupChunk // "OIDL" + CommitDataChunk // "CDAT" + GenerationDataChunk // "GDA2" + GenerationDataOverflowChunk // "GDO2" + ExtraEdgeListChunk // "EDGE" + BloomFilterIndexChunk // "BIDX" + BloomFilterDataChunk // "BDAT" + BaseGraphsListChunk // "BASE" + ZeroChunk // "\000\000\000\000" +) + +// Signature returns the byte signature for the chunk type. +func (ct ChunkType) Signature() []byte { + if ct >= BaseGraphsListChunk || ct < 0 { // not a valid chunk type just return ZeroChunk + return chunkSignatures[ZeroChunk*chunkSigOffset : ZeroChunk*chunkSigOffset+chunkSigLen] + } + + return chunkSignatures[ct*chunkSigOffset : ct*chunkSigOffset+chunkSigLen] +} + +// ChunkTypeFromBytes returns the chunk type for the given byte signature. +func ChunkTypeFromBytes(b []byte) (ChunkType, bool) { + idx := bytes.Index(chunkSignatures, b) + if idx == -1 || idx%chunkSigOffset != 0 { // not found, or not aligned at chunkSigOffset + return -1, false + } + return ChunkType(idx / chunkSigOffset), true +} diff --git a/plumbing/format/commitgraph/v2/commitgraph.go b/plumbing/format/commitgraph/v2/commitgraph.go new file mode 100644 index 0000000..7c67b63 --- /dev/null +++ b/plumbing/format/commitgraph/v2/commitgraph.go @@ -0,0 +1,40 @@ +package v2 + +import ( + "io" + "time" + + "github.com/go-git/go-git/v5/plumbing" +) + +// CommitData is a reduced representation of Commit as presented in the commit graph +// file. It is merely useful as an optimization for walking the commit graphs. +type CommitData struct { + // TreeHash is the hash of the root tree of the commit. + TreeHash plumbing.Hash + // ParentIndexes are the indexes of the parent commits of the commit. + ParentIndexes []uint32 + // ParentHashes are the hashes of the parent commits of the commit. + ParentHashes []plumbing.Hash + // Generation number is the pre-computed generation in the commit graph + // or zero if not available. + Generation uint64 + // When is the timestamp of the commit. + When time.Time +} + +// Index represents a representation of commit graph that allows indexed +// access to the nodes using commit object hash +type Index interface { + // GetIndexByHash gets the index in the commit graph from commit hash, if available + GetIndexByHash(h plumbing.Hash) (uint32, error) + // GetHashByIndex gets the hash given an index in the commit graph + GetHashByIndex(i uint32) (plumbing.Hash, error) + // GetNodeByIndex gets the commit node from the commit graph using index + // obtained from child node, if available + GetCommitDataByIndex(i uint32) (*CommitData, error) + // Hashes returns all the hashes that are available in the index + Hashes() []plumbing.Hash + + io.Closer +} diff --git a/plumbing/format/commitgraph/v2/commitgraph_test.go b/plumbing/format/commitgraph/v2/commitgraph_test.go new file mode 100644 index 0000000..69fdcd9 --- /dev/null +++ b/plumbing/format/commitgraph/v2/commitgraph_test.go @@ -0,0 +1,165 @@ +package v2_test + +import ( + "os" + "testing" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-billy/v5/util" + "github.com/go-git/go-git/v5/plumbing" + commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2" + + fixtures "github.com/go-git/go-git-fixtures/v4" + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type CommitgraphSuite struct { + fixtures.Suite +} + +var _ = Suite(&CommitgraphSuite{}) + +func testReadIndex(c *C, fs billy.Filesystem, path string) commitgraph.Index { + reader, err := fs.Open(path) + c.Assert(err, IsNil) + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + c.Assert(index, NotNil) + return index +} + +func testDecodeHelper(c *C, index commitgraph.Index) { + // Root commit + nodeIndex, err := index.GetIndexByHash(plumbing.NewHash("347c91919944a68e9413581a1bc15519550a3afe")) + c.Assert(err, IsNil) + commitData, err := index.GetCommitDataByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(commitData.ParentIndexes), Equals, 0) + c.Assert(len(commitData.ParentHashes), Equals, 0) + + // Regular commit + nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("e713b52d7e13807e87a002e812041f248db3f643")) + c.Assert(err, IsNil) + commitData, err = index.GetCommitDataByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(commitData.ParentIndexes), Equals, 1) + c.Assert(len(commitData.ParentHashes), Equals, 1) + c.Assert(commitData.ParentHashes[0].String(), Equals, "347c91919944a68e9413581a1bc15519550a3afe") + + // Merge commit + nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("b29328491a0682c259bcce28741eac71f3499f7d")) + c.Assert(err, IsNil) + commitData, err = index.GetCommitDataByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(commitData.ParentIndexes), Equals, 2) + c.Assert(len(commitData.ParentHashes), Equals, 2) + c.Assert(commitData.ParentHashes[0].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643") + c.Assert(commitData.ParentHashes[1].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981") + + // Octopus merge commit + nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560")) + c.Assert(err, IsNil) + commitData, err = index.GetCommitDataByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(commitData.ParentIndexes), Equals, 3) + c.Assert(len(commitData.ParentHashes), Equals, 3) + c.Assert(commitData.ParentHashes[0].String(), Equals, "ce275064ad67d51e99f026084e20827901a8361c") + c.Assert(commitData.ParentHashes[1].String(), Equals, "bb13916df33ed23004c3ce9ed3b8487528e655c1") + c.Assert(commitData.ParentHashes[2].String(), Equals, "a45273fe2d63300e1962a9e26a6b15c276cd7082") + + // Check all hashes + hashes := index.Hashes() + c.Assert(len(hashes), Equals, 11) + c.Assert(hashes[0].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981") + c.Assert(hashes[10].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643") +} + +func (s *CommitgraphSuite) TestDecode(c *C) { + fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + index := testReadIndex(c, dotgit, dotgit.Join("objects", "info", "commit-graph")) + defer index.Close() + testDecodeHelper(c, index) + }) +} + +func (s *CommitgraphSuite) TestDecodeChain(c *C) { + fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + index, err := commitgraph.OpenChainOrFileIndex(dotgit) + c.Assert(err, IsNil) + defer index.Close() + testDecodeHelper(c, index) + }) + + fixtures.ByTag("commit-graph-chain").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + index, err := commitgraph.OpenChainOrFileIndex(dotgit) + c.Assert(err, IsNil) + defer index.Close() + testDecodeHelper(c, index) + }) +} + +func (s *CommitgraphSuite) TestReencode(c *C) { + fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + + reader, err := dotgit.Open(dotgit.Join("objects", "info", "commit-graph")) + c.Assert(err, IsNil) + defer reader.Close() + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + defer index.Close() + + writer, err := util.TempFile(dotgit, "", "commit-graph") + c.Assert(err, IsNil) + tmpName := writer.Name() + defer os.Remove(tmpName) + + encoder := commitgraph.NewEncoder(writer) + err = encoder.Encode(index) + c.Assert(err, IsNil) + writer.Close() + + tmpIndex := testReadIndex(c, dotgit, tmpName) + defer tmpIndex.Close() + testDecodeHelper(c, tmpIndex) + }) +} + +func (s *CommitgraphSuite) TestReencodeInMemory(c *C) { + fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + + reader, err := dotgit.Open(dotgit.Join("objects", "info", "commit-graph")) + c.Assert(err, IsNil) + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + + memoryIndex := commitgraph.NewMemoryIndex() + defer memoryIndex.Close() + for i, hash := range index.Hashes() { + commitData, err := index.GetCommitDataByIndex(uint32(i)) + c.Assert(err, IsNil) + memoryIndex.Add(hash, commitData) + } + index.Close() + + writer, err := util.TempFile(dotgit, "", "commit-graph") + c.Assert(err, IsNil) + tmpName := writer.Name() + defer os.Remove(tmpName) + + encoder := commitgraph.NewEncoder(writer) + err = encoder.Encode(memoryIndex) + c.Assert(err, IsNil) + writer.Close() + + tmpIndex := testReadIndex(c, dotgit, tmpName) + defer tmpIndex.Close() + testDecodeHelper(c, tmpIndex) + }) +} diff --git a/plumbing/format/commitgraph/v2/doc.go b/plumbing/format/commitgraph/v2/doc.go new file mode 100644 index 0000000..157621d --- /dev/null +++ b/plumbing/format/commitgraph/v2/doc.go @@ -0,0 +1,106 @@ +// Package v2 implements encoding and decoding of commit-graph files. +// +// This package was created to work around the issues of the incorrect types in +// the commitgraph package. +// +// Git commit graph format +// ======================= +// +// The Git commit graph stores a list of commit OIDs and some associated +// metadata, including: +// +// - The generation number of the commit. Commits with no parents have +// generation number 1; commits with parents have generation number +// one more than the maximum generation number of its parents. We +// reserve zero as special, and can be used to mark a generation +// number invalid or as "not computed". +// +// - The root tree OID. +// +// - The commit date. +// +// - The parents of the commit, stored using positional references within +// the graph file. +// +// These positional references are stored as unsigned 32-bit integers +// corresponding to the array position within the list of commit OIDs. Due +// to some special constants we use to track parents, we can store at most +// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits. +// +// == Commit graph files have the following format: +// +// In order to allow extensions that add extra data to the graph, we organize +// the body into "chunks" and provide a binary lookup table at the beginning +// of the body. The header includes certain values, such as number of chunks +// and hash type. +// +// All 4-byte numbers are in network order. +// +// HEADER: +// +// 4-byte signature: +// The signature is: {'C', 'G', 'P', 'H'} +// +// 1-byte version number: +// Currently, the only valid version is 1. +// +// 1-byte Hash Version (1 = SHA-1) +// We infer the hash length (H) from this value. +// +// 1-byte number (C) of "chunks" +// +// 1-byte (reserved for later use) +// Current clients should ignore this value. +// +// CHUNK LOOKUP: +// +// (C + 1) * 12 bytes listing the table of contents for the chunks: +// First 4 bytes describe the chunk id. Value 0 is a terminating label. +// Other 8 bytes provide the byte-offset in current file for chunk to +// start. (Chunks are ordered contiguously in the file, so you can infer +// the length using the next chunk position if necessary.) Each chunk +// ID appears at most once. +// +// The remaining data in the body is described one chunk at a time, and +// these chunks may be given in any order. Chunks are required unless +// otherwise specified. +// +// CHUNK DATA: +// +// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) +// The ith entry, F[i], stores the number of OIDs with first +// byte at most i. Thus F[255] stores the total +// number of commits (N). +// +// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) +// The OIDs for all commits in the graph, sorted in ascending order. +// +// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) +// * The first H bytes are for the OID of the root tree. +// * The next 8 bytes are for the positions of the first two parents +// of the ith commit. Stores value 0x7000000 if no parent in that +// position. If there are more than two parents, the second value +// has its most-significant bit on and the other bits store an array +// position into the Extra Edge List chunk. +// * The next 8 bytes store the generation number of the commit and +// the commit time in seconds since EPOCH. The generation number +// uses the higher 30 bits of the first 4 bytes, while the commit +// time uses the 32 bits of the second 4 bytes, along with the lowest +// 2 bits of the lowest byte, storing the 33rd and 34th bit of the +// commit time. +// +// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] +// This list of 4-byte values store the second through nth parents for +// all octopus merges. The second parent value in the commit data stores +// an array position within this list along with the most-significant bit +// on. Starting at that array position, iterate through this list of commit +// positions for the parents until reaching a value with the most-significant +// bit on. The other bits correspond to the position of the last parent. +// +// TRAILER: +// +// H-byte HASH-checksum of all of the above. +// +// Source: +// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt +package v2 diff --git a/plumbing/format/commitgraph/v2/encoder.go b/plumbing/format/commitgraph/v2/encoder.go new file mode 100644 index 0000000..d1e41f8 --- /dev/null +++ b/plumbing/format/commitgraph/v2/encoder.go @@ -0,0 +1,192 @@ +package v2 + +import ( + "crypto" + "io" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/hash" + "github.com/go-git/go-git/v5/utils/binary" +) + +// Encoder writes MemoryIndex structs to an output stream. +type Encoder struct { + io.Writer + hash hash.Hash +} + +// NewEncoder returns a new stream encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := hash.New(hash.CryptoType) + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +// Encode writes an index into the commit-graph file +func (e *Encoder) Encode(idx Index) error { + // Get all the hashes in the input index + hashes := idx.Hashes() + + // Sort the inout and prepare helper structures we'll need for encoding + hashToIndex, fanout, extraEdgesCount := e.prepare(idx, hashes) + + chunkSignatures := [][]byte{OIDFanoutChunk.Signature(), OIDLookupChunk.Signature(), CommitDataChunk.Signature()} + chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * hash.Size, uint64(len(hashes)) * (hash.Size + commitDataSize)} + if extraEdgesCount > 0 { + chunkSignatures = append(chunkSignatures, ExtraEdgeListChunk.Signature()) + chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*4) + } + + if err := e.encodeFileHeader(len(chunkSignatures)); err != nil { + return err + } + if err := e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { + return err + } + if err := e.encodeFanout(fanout); err != nil { + return err + } + if err := e.encodeOidLookup(hashes); err != nil { + return err + } + if extraEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil { + if err = e.encodeExtraEdges(extraEdges); err != nil { + return err + } + } else { + return err + } + + return e.encodeChecksum() +} + +func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, extraEdgesCount uint32) { + // Sort the hashes and build our index + plumbing.HashesSort(hashes) + hashToIndex = make(map[plumbing.Hash]uint32) + fanout = make([]uint32, 256) + for i, hash := range hashes { + hashToIndex[hash] = uint32(i) + fanout[hash[0]]++ + } + + // Convert the fanout to cumulative values + for i := 1; i <= 0xff; i++ { + fanout[i] += fanout[i-1] + } + + // Find out if we will need extra edge table + for i := 0; i < len(hashes); i++ { + v, _ := idx.GetCommitDataByIndex(uint32(i)) + if len(v.ParentHashes) > 2 { + extraEdgesCount += uint32(len(v.ParentHashes) - 1) + break + } + } + + return +} + +func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { + if _, err = e.Write(commitFileSignature); err == nil { + version := byte(1) + if hash.CryptoType == crypto.SHA256 { + version = byte(2) + } + _, err = e.Write([]byte{1, version, byte(chunkCount), 0}) + } + return +} + +func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { + // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator + offset := uint64(8 + len(chunkSignatures)*12 + 12) + for i, signature := range chunkSignatures { + if _, err = e.Write(signature); err == nil { + err = binary.WriteUint64(e, offset) + } + if err != nil { + return + } + offset += chunkSizes[i] + } + if _, err = e.Write(ZeroChunk.Signature()); err == nil { + err = binary.WriteUint64(e, offset) + } + return +} + +func (e *Encoder) encodeFanout(fanout []uint32) (err error) { + for i := 0; i <= 0xff; i++ { + if err = binary.WriteUint32(e, fanout[i]); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { + for _, hash := range hashes { + if _, err = e.Write(hash[:]); err != nil { + return err + } + } + return +} + +func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (extraEdges []uint32, err error) { + for _, hash := range hashes { + origIndex, _ := idx.GetIndexByHash(hash) + commitData, _ := idx.GetCommitDataByIndex(origIndex) + if _, err = e.Write(commitData.TreeHash[:]); err != nil { + return + } + + var parent1, parent2 uint32 + if len(commitData.ParentHashes) == 0 { + parent1 = parentNone + parent2 = parentNone + } else if len(commitData.ParentHashes) == 1 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = parentNone + } else if len(commitData.ParentHashes) == 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = hashToIndex[commitData.ParentHashes[1]] + } else if len(commitData.ParentHashes) > 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = uint32(len(extraEdges)) | parentOctopusUsed + for _, parentHash := range commitData.ParentHashes[1:] { + extraEdges = append(extraEdges, hashToIndex[parentHash]) + } + extraEdges[len(extraEdges)-1] |= parentLast + } + + if err = binary.WriteUint32(e, parent1); err == nil { + err = binary.WriteUint32(e, parent2) + } + if err != nil { + return + } + + unixTime := uint64(commitData.When.Unix()) + unixTime |= uint64(commitData.Generation) << 34 + if err = binary.WriteUint64(e, unixTime); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeExtraEdges(extraEdges []uint32) (err error) { + for _, parent := range extraEdges { + if err = binary.WriteUint32(e, parent); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeChecksum() error { + _, err := e.Write(e.hash.Sum(nil)[:hash.Size]) + return err +} diff --git a/plumbing/format/commitgraph/v2/file.go b/plumbing/format/commitgraph/v2/file.go new file mode 100644 index 0000000..69e0250 --- /dev/null +++ b/plumbing/format/commitgraph/v2/file.go @@ -0,0 +1,338 @@ +package v2 + +import ( + "bytes" + "crypto" + encbin "encoding/binary" + "errors" + "io" + "time" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/hash" + "github.com/go-git/go-git/v5/utils/binary" +) + +var ( + // ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph + // file version is not supported. + ErrUnsupportedVersion = errors.New("unsupported version") + // ErrUnsupportedHash is returned by OpenFileIndex when the commit graph + // hash function is not supported. Currently only SHA-1 is defined and + // supported. + ErrUnsupportedHash = errors.New("unsupported hash algorithm") + // ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit + // graph file is corrupted. + ErrMalformedCommitGraphFile = errors.New("malformed commit graph file") + + commitFileSignature = []byte{'C', 'G', 'P', 'H'} + + parentNone = uint32(0x70000000) + parentOctopusUsed = uint32(0x80000000) + parentOctopusMask = uint32(0x7fffffff) + parentLast = uint32(0x80000000) +) + +const ( + commitDataSize = 16 +) + +type fileIndex struct { + reader ReaderAtCloser + fanout [256]uint32 + offsets [9]int64 + parent Index +} + +// ReaderAtCloser is an interface that combines io.ReaderAt and io.Closer. +type ReaderAtCloser interface { + io.ReaderAt + io.Closer +} + +// OpenFileIndex opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndex(reader ReaderAtCloser) (Index, error) { + return OpenFileIndexWithParent(reader, nil) +} + +// OpenFileIndexWithParent opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndexWithParent(reader ReaderAtCloser, parent Index) (Index, error) { + if reader == nil { + return nil, io.ErrUnexpectedEOF + } + fi := &fileIndex{reader: reader, parent: parent} + + if err := fi.verifyFileHeader(); err != nil { + return nil, err + } + if err := fi.readChunkHeaders(); err != nil { + return nil, err + } + if err := fi.readFanout(); err != nil { + return nil, err + } + + return fi, nil +} + +// Close closes the underlying reader and the parent index if it exists. +func (fi *fileIndex) Close() (err error) { + if fi.parent != nil { + defer func() { + parentErr := fi.parent.Close() + // only report the error from the parent if there is no error from the reader + if err == nil { + err = parentErr + } + }() + } + err = fi.reader.Close() + return +} + +func (fi *fileIndex) verifyFileHeader() error { + // Verify file signature + signature := make([]byte, 4) + if _, err := fi.reader.ReadAt(signature, 0); err != nil { + return err + } + if !bytes.Equal(signature, commitFileSignature) { + return ErrMalformedCommitGraphFile + } + + // Read and verify the file header + header := make([]byte, 4) + if _, err := fi.reader.ReadAt(header, 4); err != nil { + return err + } + if header[0] != 1 { + return ErrUnsupportedVersion + } + if !(hash.CryptoType == crypto.SHA1 && header[1] == 1) && + !(hash.CryptoType == crypto.SHA256 && header[1] == 2) { + // Unknown hash type / unsupported hash type + return ErrUnsupportedHash + } + + return nil +} + +func (fi *fileIndex) readChunkHeaders() error { + chunkID := make([]byte, 4) + for i := 0; ; i++ { + chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12) + if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil { + return err + } + chunkOffset, err := binary.ReadUint64(chunkHeader) + if err != nil { + return err + } + + chunkType, ok := ChunkTypeFromBytes(chunkID) + if !ok { + continue + } + if chunkType == ZeroChunk || int(chunkType) >= len(fi.offsets) { + break + } + fi.offsets[chunkType] = int64(chunkOffset) + } + + if fi.offsets[OIDFanoutChunk] <= 0 || fi.offsets[OIDLookupChunk] <= 0 || fi.offsets[CommitDataChunk] <= 0 { + return ErrMalformedCommitGraphFile + } + + return nil +} + +func (fi *fileIndex) readFanout() error { + fanoutReader := io.NewSectionReader(fi.reader, fi.offsets[OIDFanoutChunk], 256*4) + for i := 0; i < 256; i++ { + fanoutValue, err := binary.ReadUint32(fanoutReader) + if err != nil { + return err + } + if fanoutValue > 0x7fffffff { + return ErrMalformedCommitGraphFile + } + fi.fanout[i] = fanoutValue + } + return nil +} + +// GetIndexByHash looks up the provided hash in the commit-graph fanout and returns the index of the commit data for the given hash. +func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (uint32, error) { + var oid plumbing.Hash + + // Find the hash in the oid lookup table + var low uint32 + if h[0] == 0 { + low = 0 + } else { + low = fi.fanout[h[0]-1] + } + high := fi.fanout[h[0]] + for low < high { + mid := (low + high) >> 1 + offset := fi.offsets[OIDLookupChunk] + int64(mid)*hash.Size + if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { + return 0, err + } + cmp := bytes.Compare(h[:], oid[:]) + if cmp < 0 { + high = mid + } else if cmp == 0 { + return mid, nil + } else { + low = mid + 1 + } + } + + if fi.parent != nil { + idx, err := fi.parent.GetIndexByHash(h) + if err != nil { + return 0, err + } + return idx + fi.fanout[0xff], nil + } + + return 0, plumbing.ErrObjectNotFound +} + +// GetCommitDataByIndex returns the commit data for the given index in the commit-graph. +func (fi *fileIndex) GetCommitDataByIndex(idx uint32) (*CommitData, error) { + if idx >= fi.fanout[0xff] { + if fi.parent != nil { + data, err := fi.parent.GetCommitDataByIndex(idx - fi.fanout[0xff]) + if err != nil { + return nil, err + } + for i := range data.ParentIndexes { + data.ParentIndexes[i] += fi.fanout[0xff] + } + return data, nil + } + + return nil, plumbing.ErrObjectNotFound + } + + offset := fi.offsets[CommitDataChunk] + int64(idx)*(hash.Size+commitDataSize) + commitDataReader := io.NewSectionReader(fi.reader, offset, hash.Size+commitDataSize) + + treeHash, err := binary.ReadHash(commitDataReader) + if err != nil { + return nil, err + } + parent1, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + parent2, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + genAndTime, err := binary.ReadUint64(commitDataReader) + if err != nil { + return nil, err + } + + var parentIndexes []uint32 + if parent2&parentOctopusUsed == parentOctopusUsed { + // Octopus merge + parentIndexes = []uint32{parent1 & parentOctopusMask} + offset := fi.offsets[ExtraEdgeListChunk] + 4*int64(parent2&parentOctopusMask) + buf := make([]byte, 4) + for { + _, err := fi.reader.ReadAt(buf, offset) + if err != nil { + return nil, err + } + + parent := encbin.BigEndian.Uint32(buf) + offset += 4 + parentIndexes = append(parentIndexes, parent&parentOctopusMask) + if parent&parentLast == parentLast { + break + } + } + } else if parent2 != parentNone { + parentIndexes = []uint32{parent1 & parentOctopusMask, parent2 & parentOctopusMask} + } else if parent1 != parentNone { + parentIndexes = []uint32{parent1 & parentOctopusMask} + } + + parentHashes, err := fi.getHashesFromIndexes(parentIndexes) + if err != nil { + return nil, err + } + + return &CommitData{ + TreeHash: treeHash, + ParentIndexes: parentIndexes, + ParentHashes: parentHashes, + Generation: genAndTime >> 34, + When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0), + }, nil +} + +// GetHashByIndex looks up the hash for the given index in the commit-graph. +func (fi *fileIndex) GetHashByIndex(idx uint32) (found plumbing.Hash, err error) { + if idx >= fi.fanout[0xff] { + if fi.parent != nil { + return fi.parent.GetHashByIndex(idx - fi.fanout[0xff]) + } + return found, ErrMalformedCommitGraphFile + } + + offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size + if _, err := fi.reader.ReadAt(found[:], offset); err != nil { + return found, err + } + + return found, nil +} + +func (fi *fileIndex) getHashesFromIndexes(indexes []uint32) ([]plumbing.Hash, error) { + hashes := make([]plumbing.Hash, len(indexes)) + + for i, idx := range indexes { + if idx >= fi.fanout[0xff] { + if fi.parent != nil { + hash, err := fi.parent.GetHashByIndex(idx - fi.fanout[0xff]) + if err != nil { + return nil, err + } + hashes[i] = hash + continue + } + + return nil, ErrMalformedCommitGraphFile + } + + offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size + if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil { + return nil, err + } + } + + return hashes, nil +} + +// Hashes returns all the hashes that are available in the index. +func (fi *fileIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, fi.fanout[0xff]) + for i := uint32(0); i < fi.fanout[0xff]; i++ { + offset := fi.offsets[OIDLookupChunk] + int64(i)*hash.Size + if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < hash.Size { + return nil + } + } + if fi.parent != nil { + parentHashes := fi.parent.Hashes() + hashes = append(hashes, parentHashes...) + } + return hashes +} diff --git a/plumbing/format/commitgraph/v2/memory.go b/plumbing/format/commitgraph/v2/memory.go new file mode 100644 index 0000000..ab7ddfa --- /dev/null +++ b/plumbing/format/commitgraph/v2/memory.go @@ -0,0 +1,91 @@ +package v2 + +import ( + "github.com/go-git/go-git/v5/plumbing" +) + +// MemoryIndex provides a way to build the commit-graph in memory +// for later encoding to file. +type MemoryIndex struct { + commitData []commitData + indexMap map[plumbing.Hash]uint32 +} + +type commitData struct { + Hash plumbing.Hash + *CommitData +} + +// NewMemoryIndex creates in-memory commit graph representation +func NewMemoryIndex() *MemoryIndex { + return &MemoryIndex{ + indexMap: make(map[plumbing.Hash]uint32), + } +} + +// GetIndexByHash gets the index in the commit graph from commit hash, if available +func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (uint32, error) { + i, ok := mi.indexMap[h] + if ok { + return i, nil + } + + return 0, plumbing.ErrObjectNotFound +} + +// GetHashByIndex gets the hash given an index in the commit graph +func (mi *MemoryIndex) GetHashByIndex(i uint32) (plumbing.Hash, error) { + if i >= uint32(len(mi.commitData)) { + return plumbing.ZeroHash, plumbing.ErrObjectNotFound + } + + return mi.commitData[i].Hash, nil +} + +// GetCommitDataByIndex gets the commit node from the commit graph using index +// obtained from child node, if available +func (mi *MemoryIndex) GetCommitDataByIndex(i uint32) (*CommitData, error) { + if i >= uint32(len(mi.commitData)) { + return nil, plumbing.ErrObjectNotFound + } + + commitData := mi.commitData[i] + + // Map parent hashes to parent indexes + if commitData.ParentIndexes == nil { + parentIndexes := make([]uint32, len(commitData.ParentHashes)) + for i, parentHash := range commitData.ParentHashes { + var err error + if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil { + return nil, err + } + } + commitData.ParentIndexes = parentIndexes + } + + return commitData.CommitData, nil +} + +// Hashes returns all the hashes that are available in the index +func (mi *MemoryIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, 0, len(mi.indexMap)) + for k := range mi.indexMap { + hashes = append(hashes, k) + } + return hashes +} + +// Add adds new node to the memory index +func (mi *MemoryIndex) Add(hash plumbing.Hash, data *CommitData) { + // The parent indexes are calculated lazily in GetNodeByIndex + // which allows adding nodes out of order as long as all parents + // are eventually resolved + data.ParentIndexes = nil + mi.indexMap[hash] = uint32(len(mi.commitData)) + mi.commitData = append(mi.commitData, commitData{Hash: hash, CommitData: data}) +} + +// Close closes the index +func (mi *MemoryIndex) Close() error { + return nil +} -- cgit