From 946bb8183643bdda90810fc48e450a008894b244 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sat, 30 Sep 2023 11:41:23 +0100 Subject: plumbing: commitgraph, fix types and handle commit-graph-chains Unfortunately the original variant makes some incorrect typing assumptions about commit-graphs which make handling graph chains difficult to do correctly. This creates a new subpackage and deprecates the old one. It then adds support commit graph chains. Signed-off-by: Andrew Thornton --- go.mod | 4 +- go.sum | 16 +- plumbing/format/commitgraph/commitgraph.go | 6 + plumbing/format/commitgraph/doc.go | 113 +++---- plumbing/format/commitgraph/encoder.go | 9 + plumbing/format/commitgraph/file.go | 6 + plumbing/format/commitgraph/memory.go | 6 + plumbing/format/commitgraph/v2/chain.go | 100 ++++++ plumbing/format/commitgraph/v2/chain_test.go | 100 ++++++ plumbing/format/commitgraph/v2/chunk.go | 48 +++ plumbing/format/commitgraph/v2/commitgraph.go | 40 +++ plumbing/format/commitgraph/v2/commitgraph_test.go | 165 ++++++++++ plumbing/format/commitgraph/v2/doc.go | 106 +++++++ plumbing/format/commitgraph/v2/encoder.go | 192 ++++++++++++ plumbing/format/commitgraph/v2/file.go | 338 +++++++++++++++++++++ plumbing/format/commitgraph/v2/memory.go | 91 ++++++ plumbing/object/commitgraph/commitnode_graph.go | 264 ++++++++-------- plumbing/object/commitgraph/commitnode_object.go | 180 +++++------ plumbing/object/commitgraph/commitnode_test.go | 9 +- .../object/commitgraph/commitnode_walker_ctime.go | 210 ++++++------- 20 files changed, 1607 insertions(+), 396 deletions(-) create mode 100644 plumbing/format/commitgraph/v2/chain.go create mode 100644 plumbing/format/commitgraph/v2/chain_test.go create mode 100644 plumbing/format/commitgraph/v2/chunk.go create mode 100644 plumbing/format/commitgraph/v2/commitgraph.go create mode 100644 plumbing/format/commitgraph/v2/commitgraph_test.go create mode 100644 plumbing/format/commitgraph/v2/doc.go create mode 100644 plumbing/format/commitgraph/v2/encoder.go create mode 100644 plumbing/format/commitgraph/v2/file.go create mode 100644 plumbing/format/commitgraph/v2/memory.go diff --git a/go.mod b/go.mod index 8de45b2..9df2020 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/gliderlabs/ssh v0.3.5 github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 github.com/go-git/go-billy/v5 v5.5.0 - github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20230305113008-0c11038e723f + github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231007200033-41cf6f1b6389 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da github.com/google/go-cmp v0.5.9 github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 @@ -25,7 +25,7 @@ require ( github.com/xanzy/ssh-agent v0.3.3 golang.org/x/crypto v0.13.0 golang.org/x/net v0.15.0 - golang.org/x/sys v0.12.0 + golang.org/x/sys v0.13.0 golang.org/x/text v0.13.0 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c ) diff --git a/go.sum b/go.sum index 8deb90e..7c62b10 100644 --- a/go.sum +++ b/go.sum @@ -30,11 +30,10 @@ github.com/gliderlabs/ssh v0.3.5 h1:OcaySEmAQJgyYcArR+gGGTHCyE7nvhEMTlYY+Dp8CpY= github.com/gliderlabs/ssh v0.3.5/go.mod h1:8XB4KraRrX39qHhT6yxPsHedjA08I/uBVwj4xC+/+z4= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= -github.com/go-git/go-billy/v5 v5.3.1/go.mod h1:pmpqyWchKfYfrkb/UVH4otLvyi/5gJlGI4Hb3ZqZ3W0= github.com/go-git/go-billy/v5 v5.5.0 h1:yEY4yhzCDuMGSv83oGxiBotRzhwhNr8VZyphhiu+mTU= github.com/go-git/go-billy/v5 v5.5.0/go.mod h1:hmexnoNsr2SJU1Ju67OaNz5ASJY3+sHgFRpCtpDCKow= -github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20230305113008-0c11038e723f h1:Pz0DHeFij3XFhoBRGUDPzSJ+w2UcK5/0JvF8DRI58r8= -github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20230305113008-0c11038e723f/go.mod h1:8LHG1a3SRW71ettAD/jW13h8c6AqjVSeL11RAdgaqpo= +github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231007200033-41cf6f1b6389 h1:AlfdJ8f+G+4a4fXeHmAlKfyR3Yup4sVGCXlh+e+TrE8= +github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231007200033-41cf6f1b6389/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= @@ -55,7 +54,6 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/matryer/is v1.2.0 h1:92UTHpy8CDwaJ08GqLDzhhuixiBUUD1p3AU6PHddz4A= github.com/matryer/is v1.2.0/go.mod h1:2fLPjFQM9rhQ15aVEtbuwhJinnOqrmgXPNdZsdwlWXA= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= github.com/pjbgf/sha1cd v0.3.0 h1:4D5XXmUUBUl/xQ6IjCkEAbqXskkq/4O7LmGn0AqMDs4= github.com/pjbgf/sha1cd v0.3.0/go.mod h1:nZ1rrWOcGJ5uZgEEVL1VUM9iRQiZvWdbZjkKyFzPPsI= @@ -76,8 +74,7 @@ github.com/skeema/knownhosts v1.2.0/go.mod h1:g4fPeYpque7P0xefxtGzV81ihjC8sX2Iqp github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= @@ -109,7 +106,6 @@ golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -124,8 +120,8 @@ golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= -golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20220722155259-a9ba230a4035/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -151,13 +147,11 @@ golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/plumbing/format/commitgraph/commitgraph.go b/plumbing/format/commitgraph/commitgraph.go index 3d59323..e772d26 100644 --- a/plumbing/format/commitgraph/commitgraph.go +++ b/plumbing/format/commitgraph/commitgraph.go @@ -8,6 +8,9 @@ import ( // CommitData is a reduced representation of Commit as presented in the commit graph // file. It is merely useful as an optimization for walking the commit graphs. +// +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. type CommitData struct { // TreeHash is the hash of the root tree of the commit. TreeHash plumbing.Hash @@ -24,6 +27,9 @@ type CommitData struct { // Index represents a representation of commit graph that allows indexed // access to the nodes using commit object hash +// +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. type Index interface { // GetIndexByHash gets the index in the commit graph from commit hash, if available GetIndexByHash(h plumbing.Hash) (int, error) diff --git a/plumbing/format/commitgraph/doc.go b/plumbing/format/commitgraph/doc.go index 41cd8b1..c320e18 100644 --- a/plumbing/format/commitgraph/doc.go +++ b/plumbing/format/commitgraph/doc.go @@ -1,23 +1,26 @@ // Package commitgraph implements encoding and decoding of commit-graph files. // +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. +// // Git commit graph format // ======================= // // The Git commit graph stores a list of commit OIDs and some associated // metadata, including: // -// - The generation number of the commit. Commits with no parents have -// generation number 1; commits with parents have generation number -// one more than the maximum generation number of its parents. We -// reserve zero as special, and can be used to mark a generation -// number invalid or as "not computed". +// - The generation number of the commit. Commits with no parents have +// generation number 1; commits with parents have generation number +// one more than the maximum generation number of its parents. We +// reserve zero as special, and can be used to mark a generation +// number invalid or as "not computed". // // - The root tree OID. // // - The commit date. // -// - The parents of the commit, stored using positional references within -// the graph file. +// - The parents of the commit, stored using positional references within +// the graph file. // // These positional references are stored as unsigned 32-bit integers // corresponding to the array position within the list of commit OIDs. Due @@ -35,68 +38,68 @@ // // HEADER: // -// 4-byte signature: -// The signature is: {'C', 'G', 'P', 'H'} +// 4-byte signature: +// The signature is: {'C', 'G', 'P', 'H'} // -// 1-byte version number: -// Currently, the only valid version is 1. +// 1-byte version number: +// Currently, the only valid version is 1. // -// 1-byte Hash Version (1 = SHA-1) -// We infer the hash length (H) from this value. +// 1-byte Hash Version (1 = SHA-1) +// We infer the hash length (H) from this value. // -// 1-byte number (C) of "chunks" +// 1-byte number (C) of "chunks" // -// 1-byte (reserved for later use) -// Current clients should ignore this value. +// 1-byte (reserved for later use) +// Current clients should ignore this value. // // CHUNK LOOKUP: // -// (C + 1) * 12 bytes listing the table of contents for the chunks: -// First 4 bytes describe the chunk id. Value 0 is a terminating label. -// Other 8 bytes provide the byte-offset in current file for chunk to -// start. (Chunks are ordered contiguously in the file, so you can infer -// the length using the next chunk position if necessary.) Each chunk -// ID appears at most once. +// (C + 1) * 12 bytes listing the table of contents for the chunks: +// First 4 bytes describe the chunk id. Value 0 is a terminating label. +// Other 8 bytes provide the byte-offset in current file for chunk to +// start. (Chunks are ordered contiguously in the file, so you can infer +// the length using the next chunk position if necessary.) Each chunk +// ID appears at most once. // -// The remaining data in the body is described one chunk at a time, and -// these chunks may be given in any order. Chunks are required unless -// otherwise specified. +// The remaining data in the body is described one chunk at a time, and +// these chunks may be given in any order. Chunks are required unless +// otherwise specified. // // CHUNK DATA: // -// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) -// The ith entry, F[i], stores the number of OIDs with first -// byte at most i. Thus F[255] stores the total -// number of commits (N). -// -// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) -// The OIDs for all commits in the graph, sorted in ascending order. -// -// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) -// * The first H bytes are for the OID of the root tree. -// * The next 8 bytes are for the positions of the first two parents -// of the ith commit. Stores value 0x7000000 if no parent in that -// position. If there are more than two parents, the second value -// has its most-significant bit on and the other bits store an array -// position into the Extra Edge List chunk. -// * The next 8 bytes store the generation number of the commit and -// the commit time in seconds since EPOCH. The generation number -// uses the higher 30 bits of the first 4 bytes, while the commit -// time uses the 32 bits of the second 4 bytes, along with the lowest -// 2 bits of the lowest byte, storing the 33rd and 34th bit of the -// commit time. -// -// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] -// This list of 4-byte values store the second through nth parents for -// all octopus merges. The second parent value in the commit data stores -// an array position within this list along with the most-significant bit -// on. Starting at that array position, iterate through this list of commit -// positions for the parents until reaching a value with the most-significant -// bit on. The other bits correspond to the position of the last parent. +// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) +// The ith entry, F[i], stores the number of OIDs with first +// byte at most i. Thus F[255] stores the total +// number of commits (N). +// +// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) +// The OIDs for all commits in the graph, sorted in ascending order. +// +// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) +// * The first H bytes are for the OID of the root tree. +// * The next 8 bytes are for the positions of the first two parents +// of the ith commit. Stores value 0x7000000 if no parent in that +// position. If there are more than two parents, the second value +// has its most-significant bit on and the other bits store an array +// position into the Extra Edge List chunk. +// * The next 8 bytes store the generation number of the commit and +// the commit time in seconds since EPOCH. The generation number +// uses the higher 30 bits of the first 4 bytes, while the commit +// time uses the 32 bits of the second 4 bytes, along with the lowest +// 2 bits of the lowest byte, storing the 33rd and 34th bit of the +// commit time. +// +// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] +// This list of 4-byte values store the second through nth parents for +// all octopus merges. The second parent value in the commit data stores +// an array position within this list along with the most-significant bit +// on. Starting at that array position, iterate through this list of commit +// positions for the parents until reaching a value with the most-significant +// bit on. The other bits correspond to the position of the last parent. // // TRAILER: // -// H-byte HASH-checksum of all of the above. +// H-byte HASH-checksum of all of the above. // // Source: // https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go index 674f52e..3176353 100644 --- a/plumbing/format/commitgraph/encoder.go +++ b/plumbing/format/commitgraph/encoder.go @@ -10,12 +10,18 @@ import ( ) // Encoder writes MemoryIndex structs to an output stream. +// +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. type Encoder struct { io.Writer hash hash.Hash } // NewEncoder returns a new stream encoder that writes to w. +// +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. func NewEncoder(w io.Writer) *Encoder { h := hash.New(hash.CryptoType) mw := io.MultiWriter(w, h) @@ -23,6 +29,9 @@ func NewEncoder(w io.Writer) *Encoder { } // Encode writes an index into the commit-graph file +// +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. func (e *Encoder) Encode(idx Index) error { // Get all the hashes in the input index hashes := idx.Hashes() diff --git a/plumbing/format/commitgraph/file.go b/plumbing/format/commitgraph/file.go index 17c1c5d..ef8fb34 100644 --- a/plumbing/format/commitgraph/file.go +++ b/plumbing/format/commitgraph/file.go @@ -13,6 +13,9 @@ import ( "github.com/go-git/go-git/v5/utils/binary" ) +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. + var ( // ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph // file version is not supported. @@ -51,6 +54,9 @@ type fileIndex struct { // OpenFileIndex opens a serialized commit graph file in the format described at // https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +// +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. func OpenFileIndex(reader io.ReaderAt) (Index, error) { fi := &fileIndex{reader: reader} diff --git a/plumbing/format/commitgraph/memory.go b/plumbing/format/commitgraph/memory.go index b24ce36..06415e5 100644 --- a/plumbing/format/commitgraph/memory.go +++ b/plumbing/format/commitgraph/memory.go @@ -6,12 +6,18 @@ import ( // MemoryIndex provides a way to build the commit-graph in memory // for later encoding to file. +// +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. type MemoryIndex struct { commitData []*CommitData indexMap map[plumbing.Hash]int } // NewMemoryIndex creates in-memory commit graph representation +// +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. func NewMemoryIndex() *MemoryIndex { return &MemoryIndex{ indexMap: make(map[plumbing.Hash]int), diff --git a/plumbing/format/commitgraph/v2/chain.go b/plumbing/format/commitgraph/v2/chain.go new file mode 100644 index 0000000..8da60d0 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chain.go @@ -0,0 +1,100 @@ +package v2 + +import ( + "bufio" + "io" + "path" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-git/v5/plumbing" +) + +// OpenChainFile reads a commit chain file and returns a slice of the hashes within it +// +// Commit-Graph chains are described at https://git-scm.com/docs/commit-graph +// and are new line separated list of graph file hashes, oldest to newest. +// +// This function simply reads the file and returns the hashes as a slice. +func OpenChainFile(r io.Reader) ([]string, error) { + if r == nil { + return nil, io.ErrUnexpectedEOF + } + bufRd := bufio.NewReader(r) + chain := make([]string, 0, 8) + for { + line, err := bufRd.ReadSlice('\n') + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + + hashStr := string(line[:len(line)-1]) + if !plumbing.IsHash(hashStr) { + return nil, ErrMalformedCommitGraphFile + } + chain = append(chain, hashStr) + } + return chain, nil +} + +// OpenChainOrFileIndex expects a billy.Filesystem representing a .git directory. +// It will first attempt to read a commit-graph index file, before trying to read a +// commit-graph chain file and its index files. If neither are present, an error is returned. +// Otherwise an Index will be returned. +// +// See: https://git-scm.com/docs/commit-graph +func OpenChainOrFileIndex(fs billy.Filesystem) (Index, error) { + file, err := fs.Open(path.Join("objects", "info", "commit-graph")) + if err != nil { + // try to open a chain file + return OpenChainIndex(fs) + } + + index, err := OpenFileIndex(file) + if err != nil { + // Ignore any file closing errors and return the error from OpenFileIndex instead + _ = file.Close() + return nil, err + } + return index, nil +} + +// OpenChainIndex expects a billy.Filesystem representing a .git directory. +// It will read a commit-graph chain file and return a coalesced index. +// If the chain file or a graph in that chain is not present, an error is returned. +// +// See: https://git-scm.com/docs/commit-graph +func OpenChainIndex(fs billy.Filesystem) (Index, error) { + chainFile, err := fs.Open(path.Join("objects", "info", "commit-graphs", "commit-graph-chain")) + if err != nil { + return nil, err + } + + chain, err := OpenChainFile(chainFile) + _ = chainFile.Close() + if err != nil { + return nil, err + } + + var index Index + for _, hash := range chain { + + file, err := fs.Open(path.Join("objects", "info", "commit-graphs", "graph-"+hash+".graph")) + if err != nil { + // Ignore all other file closing errors and return the error from opening the last file in the graph + _ = index.Close() + return nil, err + } + + index, err = OpenFileIndexWithParent(file, index) + if err != nil { + // Ignore file closing errors and return the error from OpenFileIndex instead + _ = index.Close() + return nil, err + } + } + + return index, nil +} diff --git a/plumbing/format/commitgraph/v2/chain_test.go b/plumbing/format/commitgraph/v2/chain_test.go new file mode 100644 index 0000000..32ffd69 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chain_test.go @@ -0,0 +1,100 @@ +package v2_test + +import ( + "bytes" + "crypto" + "strings" + + commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2" + "github.com/go-git/go-git/v5/plumbing/hash" + + . "gopkg.in/check.v1" +) + +func (s *CommitgraphSuite) TestOpenChainFile(c *C) { + sha1Data := []string{ + "c336d16298a017486c4164c40f8acb28afe64e84", + "31eae7b619d166c366bf5df4991f04ba8cebea0a", + "b977a025ca21e3b5ca123d8093bd7917694f6da7", + "d2a38b4a5965d529566566640519d03d2bd10f6c", + "35b585759cbf29f8ec428ef89da20705d59f99ec", + "c2bbf9fe8009b22d0f390f3c8c3f13937067590f", + "fc9f0643b21cfe571046e27e0c4565f3a1ee96c8", + "c088fd6a7e1a38e9d5a9815265cb575bb08d08ff", + "5fddbeb678bd2c36c5e5c891ab8f2b143ced5baf", + "5d7303c49ac984a9fec60523f2d5297682e16646", + } + + sha256Data := []string{ + "b9efda7160f2647e0974ca623f8a8f8e25fb6944f1b8f78f4db1bf07932de8eb", + "7095c59f8bf46e12c21d2d9da344cfe383fae18d26f3ae4d4ab7b71e3d0ddfae", + "25a395cb62f7656294e40a001ee19fefcdf3013d265dfcf4b744cd2549891dec", + "7fbd564813a82227507d9dd70f1fd21fc1f180223cd3f42e0c3090c9a8b6a7d0", + "aa95db1db2df91bd7200a892dd1c03bc2704c4793400d016b3ca08c148b0f7c1", + "2176988184b570565dc33823a02f474ad59f667a0e971c86063a7fea64776a87", + "d0afc0e64171140eb7902110f807a1beaa38a603d4312fd4bd14a5db2784ba62", + "2822136f60bfc58bbd9d624cc19fbef9f0fc0efe2a61729242e1e5f9b77fa3d0", + "6f207b5c43463af96bc38c43b0bf45275fa327e656a8bba8e7fc55c5ab6870d8", + "6cf33782619b6ff0af9c081e46323f423f8b49bf3d043887c0549bef47d60f55", + "60ea0753d2d4e828983528294be3f57e2a3ba37df4f59e3236133c9e2b17afc5", + "6b3c9f4ba5092e0807774097953ec6e9f58e8371d775bd8738a0fa98d728ba3d", + "c97cab8564054e30515dbe67dda4e14638aabf17b3f042d18dc8461cd098b362", + "9f7ece76fd2c9dae08e75176347efffc1446ad74af66004dd34680edb205dfb5", + "23e7a7e481b00571b63c2a7d0432f9733dd85d18a9841a3d7b96743100da5824", + "e684b1253fa8eb6572f35bab2fd3b6efecabf8472ede43497cd9c171973cc341", + "8b9f04080b0c40f7ad2a6bb5e5296cd6c06e730dffce87a0375ae7bd0f85f86e", + "384a745f3b14edc89526a98b96b3247b2b548541c755aadee7664352ed7f12ae", + "b68c8a82cd5b839917e1058570a0408819b81d16dbab81db118cc8dfc3def044", + "fbaf04f1a401335be57e172f4326102c658d857fde6cf2bc987520d11fc99770", + "57acf2aa5ac736337b120c951536c8a2b2cb23a4f0f198e86f3433370fa63105", + "dd7fcba4c13b6ced0b6190cdb5861adcd08446a92d67f7ec0f02f9533e09bbb0", + "744ef481c9b13ebd3b6e43d7e9ba25f7c7a5c8e453e6f0d50f5d71aae1591689", + "2c573142f1edd52b64dcd42a9c3b0ca5c9c615f757d80d25bfb02ff3eb2257e2", + "ea65cc58ef8520cd0335de4318a0d3b3a1ac257b7e9f82e12483fa3bce6cc0cd", + "1dfa626ff1523b82e21a4c29476edcdc9a89842f3c7181f63a28cd4f46cc9923", + "aa1153e71af836121e6f6cc716cf64880c19221d8dc367ff42359de1b8ef30e9", + "a7c6ec6f6569e22d2fa6e8281639d27c59b633ea00ad8ef27a43171cc985fbda", + "627b706d63d2cfd5a388deeaa76655ef09146fe492ee17cb0043578cef9c2800", + "d40eaf091ef8357b734d1047a552436eaf057d99a0c6f2068b097c324099d360", + "87f0ef81641da4fd3438dcaae4819f0c92a0ade54e262b21f9ded4575ff3f234", + "3a00a29e08d29454b5197662f70ccab5699b0ce8c85af7fbf511b8915d97cfd0", + } + + goodShas := sha1Data + badShas := sha256Data + if hash.CryptoType == crypto.SHA256 { + goodShas = sha256Data + badShas = sha1Data + } + chainData := strings.Join(goodShas, "\n") + "\n" + + chainReader := strings.NewReader(chainData) + + chain, err := commitgraph.OpenChainFile(chainReader) + c.Assert(err, IsNil) + c.Assert(goodShas, DeepEquals, chain) + + // Test with bad shas + chainData = strings.Join(badShas, "\n") + "\n" + + chainReader = strings.NewReader(chainData) + + chain, err = commitgraph.OpenChainFile(chainReader) + c.Assert(err, Equals, commitgraph.ErrMalformedCommitGraphFile) + c.Assert(chain, IsNil) + + // Test with empty file + emptyChainReader := bytes.NewReader(nil) + + chain, err = commitgraph.OpenChainFile(emptyChainReader) + c.Assert(err, IsNil) + c.Assert(chain, DeepEquals, []string{}) + + // Test with file containing only newlines + newlineChainData := []byte("\n\n\n") + newlineChainReader := bytes.NewReader(newlineChainData) + + chain, err = commitgraph.OpenChainFile(newlineChainReader) + c.Assert(err, Equals, commitgraph.ErrMalformedCommitGraphFile) + c.Assert(chain, IsNil) +} diff --git a/plumbing/format/commitgraph/v2/chunk.go b/plumbing/format/commitgraph/v2/chunk.go new file mode 100644 index 0000000..ab24320 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chunk.go @@ -0,0 +1,48 @@ +package v2 + +import "bytes" + +const ( + chunkSigLen = 4 // Length of a chunk signature + chunkSigOffset = 4 // Offset of each chunk signature in chunkSignatures +) + +// chunkSignatures contains the coalesced byte signatures for each chunk type. +// The order of the signatures must match the order of the ChunkType constants. +// (When adding new chunk types you must avoid introducing ambiguity, and you may need to add padding separators to this list or reorder these signatures.) +// (i.e. it would not be possible to add a new chunk type with the signature "IDFO" without some reordering or the addition of separators.) +var chunkSignatures = []byte("OIDFOIDLCDATGDA2GDO2EDGEBIDXBDATBASE\000\000\000\000") + +// ChunkType represents the type of a chunk in the commit graph file. +type ChunkType int + +const ( + OIDFanoutChunk ChunkType = iota // "OIDF" + OIDLookupChunk // "OIDL" + CommitDataChunk // "CDAT" + GenerationDataChunk // "GDA2" + GenerationDataOverflowChunk // "GDO2" + ExtraEdgeListChunk // "EDGE" + BloomFilterIndexChunk // "BIDX" + BloomFilterDataChunk // "BDAT" + BaseGraphsListChunk // "BASE" + ZeroChunk // "\000\000\000\000" +) + +// Signature returns the byte signature for the chunk type. +func (ct ChunkType) Signature() []byte { + if ct >= BaseGraphsListChunk || ct < 0 { // not a valid chunk type just return ZeroChunk + return chunkSignatures[ZeroChunk*chunkSigOffset : ZeroChunk*chunkSigOffset+chunkSigLen] + } + + return chunkSignatures[ct*chunkSigOffset : ct*chunkSigOffset+chunkSigLen] +} + +// ChunkTypeFromBytes returns the chunk type for the given byte signature. +func ChunkTypeFromBytes(b []byte) (ChunkType, bool) { + idx := bytes.Index(chunkSignatures, b) + if idx == -1 || idx%chunkSigOffset != 0 { // not found, or not aligned at chunkSigOffset + return -1, false + } + return ChunkType(idx / chunkSigOffset), true +} diff --git a/plumbing/format/commitgraph/v2/commitgraph.go b/plumbing/format/commitgraph/v2/commitgraph.go new file mode 100644 index 0000000..7c67b63 --- /dev/null +++ b/plumbing/format/commitgraph/v2/commitgraph.go @@ -0,0 +1,40 @@ +package v2 + +import ( + "io" + "time" + + "github.com/go-git/go-git/v5/plumbing" +) + +// CommitData is a reduced representation of Commit as presented in the commit graph +// file. It is merely useful as an optimization for walking the commit graphs. +type CommitData struct { + // TreeHash is the hash of the root tree of the commit. + TreeHash plumbing.Hash + // ParentIndexes are the indexes of the parent commits of the commit. + ParentIndexes []uint32 + // ParentHashes are the hashes of the parent commits of the commit. + ParentHashes []plumbing.Hash + // Generation number is the pre-computed generation in the commit graph + // or zero if not available. + Generation uint64 + // When is the timestamp of the commit. + When time.Time +} + +// Index represents a representation of commit graph that allows indexed +// access to the nodes using commit object hash +type Index interface { + // GetIndexByHash gets the index in the commit graph from commit hash, if available + GetIndexByHash(h plumbing.Hash) (uint32, error) + // GetHashByIndex gets the hash given an index in the commit graph + GetHashByIndex(i uint32) (plumbing.Hash, error) + // GetNodeByIndex gets the commit node from the commit graph using index + // obtained from child node, if available + GetCommitDataByIndex(i uint32) (*CommitData, error) + // Hashes returns all the hashes that are available in the index + Hashes() []plumbing.Hash + + io.Closer +} diff --git a/plumbing/format/commitgraph/v2/commitgraph_test.go b/plumbing/format/commitgraph/v2/commitgraph_test.go new file mode 100644 index 0000000..69fdcd9 --- /dev/null +++ b/plumbing/format/commitgraph/v2/commitgraph_test.go @@ -0,0 +1,165 @@ +package v2_test + +import ( + "os" + "testing" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-billy/v5/util" + "github.com/go-git/go-git/v5/plumbing" + commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2" + + fixtures "github.com/go-git/go-git-fixtures/v4" + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type CommitgraphSuite struct { + fixtures.Suite +} + +var _ = Suite(&CommitgraphSuite{}) + +func testReadIndex(c *C, fs billy.Filesystem, path string) commitgraph.Index { + reader, err := fs.Open(path) + c.Assert(err, IsNil) + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + c.Assert(index, NotNil) + return index +} + +func testDecodeHelper(c *C, index commitgraph.Index) { + // Root commit + nodeIndex, err := index.GetIndexByHash(plumbing.NewHash("347c91919944a68e9413581a1bc15519550a3afe")) + c.Assert(err, IsNil) + commitData, err := index.GetCommitDataByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(commitData.ParentIndexes), Equals, 0) + c.Assert(len(commitData.ParentHashes), Equals, 0) + + // Regular commit + nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("e713b52d7e13807e87a002e812041f248db3f643")) + c.Assert(err, IsNil) + commitData, err = index.GetCommitDataByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(commitData.ParentIndexes), Equals, 1) + c.Assert(len(commitData.ParentHashes), Equals, 1) + c.Assert(commitData.ParentHashes[0].String(), Equals, "347c91919944a68e9413581a1bc15519550a3afe") + + // Merge commit + nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("b29328491a0682c259bcce28741eac71f3499f7d")) + c.Assert(err, IsNil) + commitData, err = index.GetCommitDataByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(commitData.ParentIndexes), Equals, 2) + c.Assert(len(commitData.ParentHashes), Equals, 2) + c.Assert(commitData.ParentHashes[0].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643") + c.Assert(commitData.ParentHashes[1].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981") + + // Octopus merge commit + nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560")) + c.Assert(err, IsNil) + commitData, err = index.GetCommitDataByIndex(nodeIndex) + c.Assert(err, IsNil) + c.Assert(len(commitData.ParentIndexes), Equals, 3) + c.Assert(len(commitData.ParentHashes), Equals, 3) + c.Assert(commitData.ParentHashes[0].String(), Equals, "ce275064ad67d51e99f026084e20827901a8361c") + c.Assert(commitData.ParentHashes[1].String(), Equals, "bb13916df33ed23004c3ce9ed3b8487528e655c1") + c.Assert(commitData.ParentHashes[2].String(), Equals, "a45273fe2d63300e1962a9e26a6b15c276cd7082") + + // Check all hashes + hashes := index.Hashes() + c.Assert(len(hashes), Equals, 11) + c.Assert(hashes[0].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981") + c.Assert(hashes[10].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643") +} + +func (s *CommitgraphSuite) TestDecode(c *C) { + fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + index := testReadIndex(c, dotgit, dotgit.Join("objects", "info", "commit-graph")) + defer index.Close() + testDecodeHelper(c, index) + }) +} + +func (s *CommitgraphSuite) TestDecodeChain(c *C) { + fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + index, err := commitgraph.OpenChainOrFileIndex(dotgit) + c.Assert(err, IsNil) + defer index.Close() + testDecodeHelper(c, index) + }) + + fixtures.ByTag("commit-graph-chain").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + index, err := commitgraph.OpenChainOrFileIndex(dotgit) + c.Assert(err, IsNil) + defer index.Close() + testDecodeHelper(c, index) + }) +} + +func (s *CommitgraphSuite) TestReencode(c *C) { + fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + + reader, err := dotgit.Open(dotgit.Join("objects", "info", "commit-graph")) + c.Assert(err, IsNil) + defer reader.Close() + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + defer index.Close() + + writer, err := util.TempFile(dotgit, "", "commit-graph") + c.Assert(err, IsNil) + tmpName := writer.Name() + defer os.Remove(tmpName) + + encoder := commitgraph.NewEncoder(writer) + err = encoder.Encode(index) + c.Assert(err, IsNil) + writer.Close() + + tmpIndex := testReadIndex(c, dotgit, tmpName) + defer tmpIndex.Close() + testDecodeHelper(c, tmpIndex) + }) +} + +func (s *CommitgraphSuite) TestReencodeInMemory(c *C) { + fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) { + dotgit := f.DotGit() + + reader, err := dotgit.Open(dotgit.Join("objects", "info", "commit-graph")) + c.Assert(err, IsNil) + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + + memoryIndex := commitgraph.NewMemoryIndex() + defer memoryIndex.Close() + for i, hash := range index.Hashes() { + commitData, err := index.GetCommitDataByIndex(uint32(i)) + c.Assert(err, IsNil) + memoryIndex.Add(hash, commitData) + } + index.Close() + + writer, err := util.TempFile(dotgit, "", "commit-graph") + c.Assert(err, IsNil) + tmpName := writer.Name() + defer os.Remove(tmpName) + + encoder := commitgraph.NewEncoder(writer) + err = encoder.Encode(memoryIndex) + c.Assert(err, IsNil) + writer.Close() + + tmpIndex := testReadIndex(c, dotgit, tmpName) + defer tmpIndex.Close() + testDecodeHelper(c, tmpIndex) + }) +} diff --git a/plumbing/format/commitgraph/v2/doc.go b/plumbing/format/commitgraph/v2/doc.go new file mode 100644 index 0000000..157621d --- /dev/null +++ b/plumbing/format/commitgraph/v2/doc.go @@ -0,0 +1,106 @@ +// Package v2 implements encoding and decoding of commit-graph files. +// +// This package was created to work around the issues of the incorrect types in +// the commitgraph package. +// +// Git commit graph format +// ======================= +// +// The Git commit graph stores a list of commit OIDs and some associated +// metadata, including: +// +// - The generation number of the commit. Commits with no parents have +// generation number 1; commits with parents have generation number +// one more than the maximum generation number of its parents. We +// reserve zero as special, and can be used to mark a generation +// number invalid or as "not computed". +// +// - The root tree OID. +// +// - The commit date. +// +// - The parents of the commit, stored using positional references within +// the graph file. +// +// These positional references are stored as unsigned 32-bit integers +// corresponding to the array position within the list of commit OIDs. Due +// to some special constants we use to track parents, we can store at most +// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits. +// +// == Commit graph files have the following format: +// +// In order to allow extensions that add extra data to the graph, we organize +// the body into "chunks" and provide a binary lookup table at the beginning +// of the body. The header includes certain values, such as number of chunks +// and hash type. +// +// All 4-byte numbers are in network order. +// +// HEADER: +// +// 4-byte signature: +// The signature is: {'C', 'G', 'P', 'H'} +// +// 1-byte version number: +// Currently, the only valid version is 1. +// +// 1-byte Hash Version (1 = SHA-1) +// We infer the hash length (H) from this value. +// +// 1-byte number (C) of "chunks" +// +// 1-byte (reserved for later use) +// Current clients should ignore this value. +// +// CHUNK LOOKUP: +// +// (C + 1) * 12 bytes listing the table of contents for the chunks: +// First 4 bytes describe the chunk id. Value 0 is a terminating label. +// Other 8 bytes provide the byte-offset in current file for chunk to +// start. (Chunks are ordered contiguously in the file, so you can infer +// the length using the next chunk position if necessary.) Each chunk +// ID appears at most once. +// +// The remaining data in the body is described one chunk at a time, and +// these chunks may be given in any order. Chunks are required unless +// otherwise specified. +// +// CHUNK DATA: +// +// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) +// The ith entry, F[i], stores the number of OIDs with first +// byte at most i. Thus F[255] stores the total +// number of commits (N). +// +// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) +// The OIDs for all commits in the graph, sorted in ascending order. +// +// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) +// * The first H bytes are for the OID of the root tree. +// * The next 8 bytes are for the positions of the first two parents +// of the ith commit. Stores value 0x7000000 if no parent in that +// position. If there are more than two parents, the second value +// has its most-significant bit on and the other bits store an array +// position into the Extra Edge List chunk. +// * The next 8 bytes store the generation number of the commit and +// the commit time in seconds since EPOCH. The generation number +// uses the higher 30 bits of the first 4 bytes, while the commit +// time uses the 32 bits of the second 4 bytes, along with the lowest +// 2 bits of the lowest byte, storing the 33rd and 34th bit of the +// commit time. +// +// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] +// This list of 4-byte values store the second through nth parents for +// all octopus merges. The second parent value in the commit data stores +// an array position within this list along with the most-significant bit +// on. Starting at that array position, iterate through this list of commit +// positions for the parents until reaching a value with the most-significant +// bit on. The other bits correspond to the position of the last parent. +// +// TRAILER: +// +// H-byte HASH-checksum of all of the above. +// +// Source: +// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt +package v2 diff --git a/plumbing/format/commitgraph/v2/encoder.go b/plumbing/format/commitgraph/v2/encoder.go new file mode 100644 index 0000000..d1e41f8 --- /dev/null +++ b/plumbing/format/commitgraph/v2/encoder.go @@ -0,0 +1,192 @@ +package v2 + +import ( + "crypto" + "io" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/hash" + "github.com/go-git/go-git/v5/utils/binary" +) + +// Encoder writes MemoryIndex structs to an output stream. +type Encoder struct { + io.Writer + hash hash.Hash +} + +// NewEncoder returns a new stream encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := hash.New(hash.CryptoType) + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +// Encode writes an index into the commit-graph file +func (e *Encoder) Encode(idx Index) error { + // Get all the hashes in the input index + hashes := idx.Hashes() + + // Sort the inout and prepare helper structures we'll need for encoding + hashToIndex, fanout, extraEdgesCount := e.prepare(idx, hashes) + + chunkSignatures := [][]byte{OIDFanoutChunk.Signature(), OIDLookupChunk.Signature(), CommitDataChunk.Signature()} + chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * hash.Size, uint64(len(hashes)) * (hash.Size + commitDataSize)} + if extraEdgesCount > 0 { + chunkSignatures = append(chunkSignatures, ExtraEdgeListChunk.Signature()) + chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*4) + } + + if err := e.encodeFileHeader(len(chunkSignatures)); err != nil { + return err + } + if err := e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { + return err + } + if err := e.encodeFanout(fanout); err != nil { + return err + } + if err := e.encodeOidLookup(hashes); err != nil { + return err + } + if extraEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil { + if err = e.encodeExtraEdges(extraEdges); err != nil { + return err + } + } else { + return err + } + + return e.encodeChecksum() +} + +func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, extraEdgesCount uint32) { + // Sort the hashes and build our index + plumbing.HashesSort(hashes) + hashToIndex = make(map[plumbing.Hash]uint32) + fanout = make([]uint32, 256) + for i, hash := range hashes { + hashToIndex[hash] = uint32(i) + fanout[hash[0]]++ + } + + // Convert the fanout to cumulative values + for i := 1; i <= 0xff; i++ { + fanout[i] += fanout[i-1] + } + + // Find out if we will need extra edge table + for i := 0; i < len(hashes); i++ { + v, _ := idx.GetCommitDataByIndex(uint32(i)) + if len(v.ParentHashes) > 2 { + extraEdgesCount += uint32(len(v.ParentHashes) - 1) + break + } + } + + return +} + +func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { + if _, err = e.Write(commitFileSignature); err == nil { + version := byte(1) + if hash.CryptoType == crypto.SHA256 { + version = byte(2) + } + _, err = e.Write([]byte{1, version, byte(chunkCount), 0}) + } + return +} + +func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { + // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator + offset := uint64(8 + len(chunkSignatures)*12 + 12) + for i, signature := range chunkSignatures { + if _, err = e.Write(signature); err == nil { + err = binary.WriteUint64(e, offset) + } + if err != nil { + return + } + offset += chunkSizes[i] + } + if _, err = e.Write(ZeroChunk.Signature()); err == nil { + err = binary.WriteUint64(e, offset) + } + return +} + +func (e *Encoder) encodeFanout(fanout []uint32) (err error) { + for i := 0; i <= 0xff; i++ { + if err = binary.WriteUint32(e, fanout[i]); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { + for _, hash := range hashes { + if _, err = e.Write(hash[:]); err != nil { + return err + } + } + return +} + +func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (extraEdges []uint32, err error) { + for _, hash := range hashes { + origIndex, _ := idx.GetIndexByHash(hash) + commitData, _ := idx.GetCommitDataByIndex(origIndex) + if _, err = e.Write(commitData.TreeHash[:]); err != nil { + return + } + + var parent1, parent2 uint32 + if len(commitData.ParentHashes) == 0 { + parent1 = parentNone + parent2 = parentNone + } else if len(commitData.ParentHashes) == 1 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = parentNone + } else if len(commitData.ParentHashes) == 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = hashToIndex[commitData.ParentHashes[1]] + } else if len(commitData.ParentHashes) > 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = uint32(len(extraEdges)) | parentOctopusUsed + for _, parentHash := range commitData.ParentHashes[1:] { + extraEdges = append(extraEdges, hashToIndex[parentHash]) + } + extraEdges[len(extraEdges)-1] |= parentLast + } + + if err = binary.WriteUint32(e, parent1); err == nil { + err = binary.WriteUint32(e, parent2) + } + if err != nil { + return + } + + unixTime := uint64(commitData.When.Unix()) + unixTime |= uint64(commitData.Generation) << 34 + if err = binary.WriteUint64(e, unixTime); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeExtraEdges(extraEdges []uint32) (err error) { + for _, parent := range extraEdges { + if err = binary.WriteUint32(e, parent); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeChecksum() error { + _, err := e.Write(e.hash.Sum(nil)[:hash.Size]) + return err +} diff --git a/plumbing/format/commitgraph/v2/file.go b/plumbing/format/commitgraph/v2/file.go new file mode 100644 index 0000000..69e0250 --- /dev/null +++ b/plumbing/format/commitgraph/v2/file.go @@ -0,0 +1,338 @@ +package v2 + +import ( + "bytes" + "crypto" + encbin "encoding/binary" + "errors" + "io" + "time" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/hash" + "github.com/go-git/go-git/v5/utils/binary" +) + +var ( + // ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph + // file version is not supported. + ErrUnsupportedVersion = errors.New("unsupported version") + // ErrUnsupportedHash is returned by OpenFileIndex when the commit graph + // hash function is not supported. Currently only SHA-1 is defined and + // supported. + ErrUnsupportedHash = errors.New("unsupported hash algorithm") + // ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit + // graph file is corrupted. + ErrMalformedCommitGraphFile = errors.New("malformed commit graph file") + + commitFileSignature = []byte{'C', 'G', 'P', 'H'} + + parentNone = uint32(0x70000000) + parentOctopusUsed = uint32(0x80000000) + parentOctopusMask = uint32(0x7fffffff) + parentLast = uint32(0x80000000) +) + +const ( + commitDataSize = 16 +) + +type fileIndex struct { + reader ReaderAtCloser + fanout [256]uint32 + offsets [9]int64 + parent Index +} + +// ReaderAtCloser is an interface that combines io.ReaderAt and io.Closer. +type ReaderAtCloser interface { + io.ReaderAt + io.Closer +} + +// OpenFileIndex opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndex(reader ReaderAtCloser) (Index, error) { + return OpenFileIndexWithParent(reader, nil) +} + +// OpenFileIndexWithParent opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndexWithParent(reader ReaderAtCloser, parent Index) (Index, error) { + if reader == nil { + return nil, io.ErrUnexpectedEOF + } + fi := &fileIndex{reader: reader, parent: parent} + + if err := fi.verifyFileHeader(); err != nil { + return nil, err + } + if err := fi.readChunkHeaders(); err != nil { + return nil, err + } + if err := fi.readFanout(); err != nil { + return nil, err + } + + return fi, nil +} + +// Close closes the underlying reader and the parent index if it exists. +func (fi *fileIndex) Close() (err error) { + if fi.parent != nil { + defer func() { + parentErr := fi.parent.Close() + // only report the error from the parent if there is no error from the reader + if err == nil { + err = parentErr + } + }() + } + err = fi.reader.Close() + return +} + +func (fi *fileIndex) verifyFileHeader() error { + // Verify file signature + signature := make([]byte, 4) + if _, err := fi.reader.ReadAt(signature, 0); err != nil { + return err + } + if !bytes.Equal(signature, commitFileSignature) { + return ErrMalformedCommitGraphFile + } + + // Read and verify the file header + header := make([]byte, 4) + if _, err := fi.reader.ReadAt(header, 4); err != nil { + return err + } + if header[0] != 1 { + return ErrUnsupportedVersion + } + if !(hash.CryptoType == crypto.SHA1 && header[1] == 1) && + !(hash.CryptoType == crypto.SHA256 && header[1] == 2) { + // Unknown hash type / unsupported hash type + return ErrUnsupportedHash + } + + return nil +} + +func (fi *fileIndex) readChunkHeaders() error { + chunkID := make([]byte, 4) + for i := 0; ; i++ { + chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12) + if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil { + return err + } + chunkOffset, err := binary.ReadUint64(chunkHeader) + if err != nil { + return err + } + + chunkType, ok := ChunkTypeFromBytes(chunkID) + if !ok { + continue + } + if chunkType == ZeroChunk || int(chunkType) >= len(fi.offsets) { + break + } + fi.offsets[chunkType] = int64(chunkOffset) + } + + if fi.offsets[OIDFanoutChunk] <= 0 || fi.offsets[OIDLookupChunk] <= 0 || fi.offsets[CommitDataChunk] <= 0 { + return ErrMalformedCommitGraphFile + } + + return nil +} + +func (fi *fileIndex) readFanout() error { + fanoutReader := io.NewSectionReader(fi.reader, fi.offsets[OIDFanoutChunk], 256*4) + for i := 0; i < 256; i++ { + fanoutValue, err := binary.ReadUint32(fanoutReader) + if err != nil { + return err + } + if fanoutValue > 0x7fffffff { + return ErrMalformedCommitGraphFile + } + fi.fanout[i] = fanoutValue + } + return nil +} + +// GetIndexByHash looks up the provided hash in the commit-graph fanout and returns the index of the commit data for the given hash. +func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (uint32, error) { + var oid plumbing.Hash + + // Find the hash in the oid lookup table + var low uint32 + if h[0] == 0 { + low = 0 + } else { + low = fi.fanout[h[0]-1] + } + high := fi.fanout[h[0]] + for low < high { + mid := (low + high) >> 1 + offset := fi.offsets[OIDLookupChunk] + int64(mid)*hash.Size + if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { + return 0, err + } + cmp := bytes.Compare(h[:], oid[:]) + if cmp < 0 { + high = mid + } else if cmp == 0 { + return mid, nil + } else { + low = mid + 1 + } + } + + if fi.parent != nil { + idx, err := fi.parent.GetIndexByHash(h) + if err != nil { + return 0, err + } + return idx + fi.fanout[0xff], nil + } + + return 0, plumbing.ErrObjectNotFound +} + +// GetCommitDataByIndex returns the commit data for the given index in the commit-graph. +func (fi *fileIndex) GetCommitDataByIndex(idx uint32) (*CommitData, error) { + if idx >= fi.fanout[0xff] { + if fi.parent != nil { + data, err := fi.parent.GetCommitDataByIndex(idx - fi.fanout[0xff]) + if err != nil { + return nil, err + } + for i := range data.ParentIndexes { + data.ParentIndexes[i] += fi.fanout[0xff] + } + return data, nil + } + + return nil, plumbing.ErrObjectNotFound + } + + offset := fi.offsets[CommitDataChunk] + int64(idx)*(hash.Size+commitDataSize) + commitDataReader := io.NewSectionReader(fi.reader, offset, hash.Size+commitDataSize) + + treeHash, err := binary.ReadHash(commitDataReader) + if err != nil { + return nil, err + } + parent1, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + parent2, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + genAndTime, err := binary.ReadUint64(commitDataReader) + if err != nil { + return nil, err + } + + var parentIndexes []uint32 + if parent2&parentOctopusUsed == parentOctopusUsed { + // Octopus merge + parentIndexes = []uint32{parent1 & parentOctopusMask} + offset := fi.offsets[ExtraEdgeListChunk] + 4*int64(parent2&parentOctopusMask) + buf := make([]byte, 4) + for { + _, err := fi.reader.ReadAt(buf, offset) + if err != nil { + return nil, err + } + + parent := encbin.BigEndian.Uint32(buf) + offset += 4 + parentIndexes = append(parentIndexes, parent&parentOctopusMask) + if parent&parentLast == parentLast { + break + } + } + } else if parent2 != parentNone { + parentIndexes = []uint32{parent1 & parentOctopusMask, parent2 & parentOctopusMask} + } else if parent1 != parentNone { + parentIndexes = []uint32{parent1 & parentOctopusMask} + } + + parentHashes, err := fi.getHashesFromIndexes(parentIndexes) + if err != nil { + return nil, err + } + + return &CommitData{ + TreeHash: treeHash, + ParentIndexes: parentIndexes, + ParentHashes: parentHashes, + Generation: genAndTime >> 34, + When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0), + }, nil +} + +// GetHashByIndex looks up the hash for the given index in the commit-graph. +func (fi *fileIndex) GetHashByIndex(idx uint32) (found plumbing.Hash, err error) { + if idx >= fi.fanout[0xff] { + if fi.parent != nil { + return fi.parent.GetHashByIndex(idx - fi.fanout[0xff]) + } + return found, ErrMalformedCommitGraphFile + } + + offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size + if _, err := fi.reader.ReadAt(found[:], offset); err != nil { + return found, err + } + + return found, nil +} + +func (fi *fileIndex) getHashesFromIndexes(indexes []uint32) ([]plumbing.Hash, error) { + hashes := make([]plumbing.Hash, len(indexes)) + + for i, idx := range indexes { + if idx >= fi.fanout[0xff] { + if fi.parent != nil { + hash, err := fi.parent.GetHashByIndex(idx - fi.fanout[0xff]) + if err != nil { + return nil, err + } + hashes[i] = hash + continue + } + + return nil, ErrMalformedCommitGraphFile + } + + offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size + if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil { + return nil, err + } + } + + return hashes, nil +} + +// Hashes returns all the hashes that are available in the index. +func (fi *fileIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, fi.fanout[0xff]) + for i := uint32(0); i < fi.fanout[0xff]; i++ { + offset := fi.offsets[OIDLookupChunk] + int64(i)*hash.Size + if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < hash.Size { + return nil + } + } + if fi.parent != nil { + parentHashes := fi.parent.Hashes() + hashes = append(hashes, parentHashes...) + } + return hashes +} diff --git a/plumbing/format/commitgraph/v2/memory.go b/plumbing/format/commitgraph/v2/memory.go new file mode 100644 index 0000000..ab7ddfa --- /dev/null +++ b/plumbing/format/commitgraph/v2/memory.go @@ -0,0 +1,91 @@ +package v2 + +import ( + "github.com/go-git/go-git/v5/plumbing" +) + +// MemoryIndex provides a way to build the commit-graph in memory +// for later encoding to file. +type MemoryIndex struct { + commitData []commitData + indexMap map[plumbing.Hash]uint32 +} + +type commitData struct { + Hash plumbing.Hash + *CommitData +} + +// NewMemoryIndex creates in-memory commit graph representation +func NewMemoryIndex() *MemoryIndex { + return &MemoryIndex{ + indexMap: make(map[plumbing.Hash]uint32), + } +} + +// GetIndexByHash gets the index in the commit graph from commit hash, if available +func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (uint32, error) { + i, ok := mi.indexMap[h] + if ok { + return i, nil + } + + return 0, plumbing.ErrObjectNotFound +} + +// GetHashByIndex gets the hash given an index in the commit graph +func (mi *MemoryIndex) GetHashByIndex(i uint32) (plumbing.Hash, error) { + if i >= uint32(len(mi.commitData)) { + return plumbing.ZeroHash, plumbing.ErrObjectNotFound + } + + return mi.commitData[i].Hash, nil +} + +// GetCommitDataByIndex gets the commit node from the commit graph using index +// obtained from child node, if available +func (mi *MemoryIndex) GetCommitDataByIndex(i uint32) (*CommitData, error) { + if i >= uint32(len(mi.commitData)) { + return nil, plumbing.ErrObjectNotFound + } + + commitData := mi.commitData[i] + + // Map parent hashes to parent indexes + if commitData.ParentIndexes == nil { + parentIndexes := make([]uint32, len(commitData.ParentHashes)) + for i, parentHash := range commitData.ParentHashes { + var err error + if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil { + return nil, err + } + } + commitData.ParentIndexes = parentIndexes + } + + return commitData.CommitData, nil +} + +// Hashes returns all the hashes that are available in the index +func (mi *MemoryIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, 0, len(mi.indexMap)) + for k := range mi.indexMap { + hashes = append(hashes, k) + } + return hashes +} + +// Add adds new node to the memory index +func (mi *MemoryIndex) Add(hash plumbing.Hash, data *CommitData) { + // The parent indexes are calculated lazily in GetNodeByIndex + // which allows adding nodes out of order as long as all parents + // are eventually resolved + data.ParentIndexes = nil + mi.indexMap[hash] = uint32(len(mi.commitData)) + mi.commitData = append(mi.commitData, commitData{Hash: hash, CommitData: data}) +} + +// Close closes the index +func (mi *MemoryIndex) Close() error { + return nil +} diff --git a/plumbing/object/commitgraph/commitnode_graph.go b/plumbing/object/commitgraph/commitnode_graph.go index 8e5d4e3..252b518 100644 --- a/plumbing/object/commitgraph/commitnode_graph.go +++ b/plumbing/object/commitgraph/commitnode_graph.go @@ -1,131 +1,133 @@ -package commitgraph - -import ( - "fmt" - "time" - - "github.com/go-git/go-git/v5/plumbing" - "github.com/go-git/go-git/v5/plumbing/format/commitgraph" - "github.com/go-git/go-git/v5/plumbing/object" - "github.com/go-git/go-git/v5/plumbing/storer" -) - -// graphCommitNode is a reduced representation of Commit as presented in the commit -// graph file (commitgraph.Node). It is merely useful as an optimization for walking -// the commit graphs. -// -// graphCommitNode implements the CommitNode interface. -type graphCommitNode struct { - // Hash for the Commit object - hash plumbing.Hash - // Index of the node in the commit graph file - index int - - commitData *commitgraph.CommitData - gci *graphCommitNodeIndex -} - -// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit -// graph files and the object store. -// -// graphCommitNodeIndex implements the CommitNodeIndex interface -type graphCommitNodeIndex struct { - commitGraph commitgraph.Index - s storer.EncodedObjectStorer -} - -// NewGraphCommitNodeIndex returns CommitNodeIndex implementation that uses commit-graph -// files as backing storage and falls back to object storage when necessary -func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { - return &graphCommitNodeIndex{commitGraph, s} -} - -func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { - // Check the commit graph first - parentIndex, err := gci.commitGraph.GetIndexByHash(hash) - if err == nil { - parent, err := gci.commitGraph.GetCommitDataByIndex(parentIndex) - if err != nil { - return nil, err - } - - return &graphCommitNode{ - hash: hash, - index: parentIndex, - commitData: parent, - gci: gci, - }, nil - } - - // Fallback to loading full commit object - commit, err := object.GetCommit(gci.s, hash) - if err != nil { - return nil, err - } - - return &objectCommitNode{ - nodeIndex: gci, - commit: commit, - }, nil -} - -func (c *graphCommitNode) ID() plumbing.Hash { - return c.hash -} - -func (c *graphCommitNode) Tree() (*object.Tree, error) { - return object.GetTree(c.gci.s, c.commitData.TreeHash) -} - -func (c *graphCommitNode) CommitTime() time.Time { - return c.commitData.When -} - -func (c *graphCommitNode) NumParents() int { - return len(c.commitData.ParentIndexes) -} - -func (c *graphCommitNode) ParentNodes() CommitNodeIter { - return newParentgraphCommitNodeIter(c) -} - -func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { - if i < 0 || i >= len(c.commitData.ParentIndexes) { - return nil, object.ErrParentNotFound - } - - parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i]) - if err != nil { - return nil, err - } - - return &graphCommitNode{ - hash: c.commitData.ParentHashes[i], - index: c.commitData.ParentIndexes[i], - commitData: parent, - gci: c.gci, - }, nil -} - -func (c *graphCommitNode) ParentHashes() []plumbing.Hash { - return c.commitData.ParentHashes -} - -func (c *graphCommitNode) Generation() uint64 { - // If the commit-graph file was generated with older Git version that - // set the generation to zero for every commit the generation assumption - // is still valid. It is just less useful. - return uint64(c.commitData.Generation) -} - -func (c *graphCommitNode) Commit() (*object.Commit, error) { - return object.GetCommit(c.gci.s, c.hash) -} - -func (c *graphCommitNode) String() string { - return fmt.Sprintf( - "%s %s\nDate: %s", - plumbing.CommitObject, c.ID(), - c.CommitTime().Format(object.DateFormat), - ) -} +package commitgraph + +import ( + "fmt" + "time" + + "github.com/go-git/go-git/v5/plumbing" + commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/plumbing/storer" +) + +// graphCommitNode is a reduced representation of Commit as presented in the commit +// graph file (commitgraph.Node). It is merely useful as an optimization for walking +// the commit graphs. +// +// graphCommitNode implements the CommitNode interface. +type graphCommitNode struct { + // Hash for the Commit object + hash plumbing.Hash + // Index of the node in the commit graph file + index uint32 + + commitData *commitgraph.CommitData + gci *graphCommitNodeIndex +} + +// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit +// graph files and the object store. +// +// graphCommitNodeIndex implements the CommitNodeIndex interface +type graphCommitNodeIndex struct { + commitGraph commitgraph.Index + s storer.EncodedObjectStorer +} + +// NewGraphCommitNodeIndex returns CommitNodeIndex implementation that uses commit-graph +// files as backing storage and falls back to object storage when necessary +func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { + return &graphCommitNodeIndex{commitGraph, s} +} + +func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + if gci.commitGraph != nil { + // Check the commit graph first + parentIndex, err := gci.commitGraph.GetIndexByHash(hash) + if err == nil { + parent, err := gci.commitGraph.GetCommitDataByIndex(parentIndex) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: hash, + index: parentIndex, + commitData: parent, + gci: gci, + }, nil + } + } + + // Fallback to loading full commit object + commit, err := object.GetCommit(gci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: gci, + commit: commit, + }, nil +} + +func (c *graphCommitNode) ID() plumbing.Hash { + return c.hash +} + +func (c *graphCommitNode) Tree() (*object.Tree, error) { + return object.GetTree(c.gci.s, c.commitData.TreeHash) +} + +func (c *graphCommitNode) CommitTime() time.Time { + return c.commitData.When +} + +func (c *graphCommitNode) NumParents() int { + return len(c.commitData.ParentIndexes) +} + +func (c *graphCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commitData.ParentIndexes) { + return nil, object.ErrParentNotFound + } + + parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i]) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: c.commitData.ParentHashes[i], + index: c.commitData.ParentIndexes[i], + commitData: parent, + gci: c.gci, + }, nil +} + +func (c *graphCommitNode) ParentHashes() []plumbing.Hash { + return c.commitData.ParentHashes +} + +func (c *graphCommitNode) Generation() uint64 { + // If the commit-graph file was generated with older Git version that + // set the generation to zero for every commit the generation assumption + // is still valid. It is just less useful. + return c.commitData.Generation +} + +func (c *graphCommitNode) Commit() (*object.Commit, error) { + return object.GetCommit(c.gci.s, c.hash) +} + +func (c *graphCommitNode) String() string { + return fmt.Sprintf( + "%s %s\nDate: %s", + plumbing.CommitObject, c.ID(), + c.CommitTime().Format(object.DateFormat), + ) +} diff --git a/plumbing/object/commitgraph/commitnode_object.go b/plumbing/object/commitgraph/commitnode_object.go index bdf8cb7..1bd37e3 100644 --- a/plumbing/object/commitgraph/commitnode_object.go +++ b/plumbing/object/commitgraph/commitnode_object.go @@ -1,90 +1,90 @@ -package commitgraph - -import ( - "math" - "time" - - "github.com/go-git/go-git/v5/plumbing" - "github.com/go-git/go-git/v5/plumbing/object" - "github.com/go-git/go-git/v5/plumbing/storer" -) - -// objectCommitNode is a representation of Commit as presented in the GIT object format. -// -// objectCommitNode implements the CommitNode interface. -type objectCommitNode struct { - nodeIndex CommitNodeIndex - commit *object.Commit -} - -// NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses -// only object storage to load the nodes -func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { - return &objectCommitNodeIndex{s} -} - -func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { - commit, err := object.GetCommit(oci.s, hash) - if err != nil { - return nil, err - } - - return &objectCommitNode{ - nodeIndex: oci, - commit: commit, - }, nil -} - -// objectCommitNodeIndex is an index that can load CommitNode objects only from the -// object store. -// -// objectCommitNodeIndex implements the CommitNodeIndex interface -type objectCommitNodeIndex struct { - s storer.EncodedObjectStorer -} - -func (c *objectCommitNode) CommitTime() time.Time { - return c.commit.Committer.When -} - -func (c *objectCommitNode) ID() plumbing.Hash { - return c.commit.ID() -} - -func (c *objectCommitNode) Tree() (*object.Tree, error) { - return c.commit.Tree() -} - -func (c *objectCommitNode) NumParents() int { - return c.commit.NumParents() -} - -func (c *objectCommitNode) ParentNodes() CommitNodeIter { - return newParentgraphCommitNodeIter(c) -} - -func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { - if i < 0 || i >= len(c.commit.ParentHashes) { - return nil, object.ErrParentNotFound - } - - // Note: It's necessary to go through CommitNodeIndex here to ensure - // that if the commit-graph file covers only part of the history we - // start using it when that part is reached. - return c.nodeIndex.Get(c.commit.ParentHashes[i]) -} - -func (c *objectCommitNode) ParentHashes() []plumbing.Hash { - return c.commit.ParentHashes -} - -func (c *objectCommitNode) Generation() uint64 { - // Commit nodes representing objects outside of the commit graph can never - // be reached by objects from the commit-graph thus we return the highest - // possible value. - return math.MaxUint64 -} - -func (c *objectCommitNode) Commit() (*object.Commit, error) { - return c.commit, nil -} +package commitgraph + +import ( + "math" + "time" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/plumbing/storer" +) + +// objectCommitNode is a representation of Commit as presented in the GIT object format. +// +// objectCommitNode implements the CommitNode interface. +type objectCommitNode struct { + nodeIndex CommitNodeIndex + commit *object.Commit +} + +// NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses +// only object storage to load the nodes +func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { + return &objectCommitNodeIndex{s} +} + +func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + commit, err := object.GetCommit(oci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: oci, + commit: commit, + }, nil +} + +// objectCommitNodeIndex is an index that can load CommitNode objects only from the +// object store. +// +// objectCommitNodeIndex implements the CommitNodeIndex interface +type objectCommitNodeIndex struct { + s storer.EncodedObjectStorer +} + +func (c *objectCommitNode) CommitTime() time.Time { + return c.commit.Committer.When +} + +func (c *objectCommitNode) ID() plumbing.Hash { + return c.commit.ID() +} + +func (c *objectCommitNode) Tree() (*object.Tree, error) { + return c.commit.Tree() +} + +func (c *objectCommitNode) NumParents() int { + return c.commit.NumParents() +} + +func (c *objectCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commit.ParentHashes) { + return nil, object.ErrParentNotFound + } + + // Note: It's necessary to go through CommitNodeIndex here to ensure + // that if the commit-graph file covers only part of the history we + // start using it when that part is reached. + return c.nodeIndex.Get(c.commit.ParentHashes[i]) +} + +func (c *objectCommitNode) ParentHashes() []plumbing.Hash { + return c.commit.ParentHashes +} + +func (c *objectCommitNode) Generation() uint64 { + // Commit nodes representing objects outside of the commit graph can never + // be reached by objects from the commit-graph thus we return the highest + // possible value. + return math.MaxUint64 +} + +func (c *objectCommitNode) Commit() (*object.Commit, error) { + return c.commit, nil +} diff --git a/plumbing/object/commitgraph/commitnode_test.go b/plumbing/object/commitgraph/commitnode_test.go index 91fb211..441ff6f 100644 --- a/plumbing/object/commitgraph/commitnode_test.go +++ b/plumbing/object/commitgraph/commitnode_test.go @@ -6,7 +6,7 @@ import ( "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/cache" - "github.com/go-git/go-git/v5/plumbing/format/commitgraph" + commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2" "github.com/go-git/go-git/v5/plumbing/format/packfile" "github.com/go-git/go-git/v5/storage/filesystem" @@ -115,6 +115,7 @@ func (s *CommitNodeSuite) TestCommitGraph(c *C) { defer reader.Close() index, err := commitgraph.OpenFileIndex(reader) c.Assert(err, IsNil) + defer index.Close() nodeIndex := NewGraphCommitNodeIndex(index, storer) testWalker(c, nodeIndex) @@ -132,10 +133,14 @@ func (s *CommitNodeSuite) TestMixedGraph(c *C) { defer reader.Close() fileIndex, err := commitgraph.OpenFileIndex(reader) c.Assert(err, IsNil) + defer fileIndex.Close() + memoryIndex := commitgraph.NewMemoryIndex() + defer memoryIndex.Close() + for i, hash := range fileIndex.Hashes() { if hash.String() != "b9d69064b190e7aedccf84731ca1d917871f8a1c" { - node, err := fileIndex.GetCommitDataByIndex(i) + node, err := fileIndex.GetCommitDataByIndex(uint32(i)) c.Assert(err, IsNil) memoryIndex.Add(hash, node) } diff --git a/plumbing/object/commitgraph/commitnode_walker_ctime.go b/plumbing/object/commitgraph/commitnode_walker_ctime.go index 281f10b..c26873c 100644 --- a/plumbing/object/commitgraph/commitnode_walker_ctime.go +++ b/plumbing/object/commitgraph/commitnode_walker_ctime.go @@ -1,105 +1,105 @@ -package commitgraph - -import ( - "io" - - "github.com/go-git/go-git/v5/plumbing" - "github.com/go-git/go-git/v5/plumbing/storer" - - "github.com/emirpasic/gods/trees/binaryheap" -) - -type commitNodeIteratorByCTime struct { - heap *binaryheap.Heap - seenExternal map[plumbing.Hash]bool - seen map[plumbing.Hash]bool -} - -// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history, -// starting at the given commit and visiting its parents while preserving Committer Time order. -// this appears to be the closest order to `git log` -// The given callback will be called for each visited commit. Each commit will -// be visited only once. If the callback returns an error, walking will stop -// and will return the error. Other errors might be returned if the history -// cannot be traversed (e.g. missing objects). Ignore allows to skip some -// commits from being iterated. -func NewCommitNodeIterCTime( - c CommitNode, - seenExternal map[plumbing.Hash]bool, - ignore []plumbing.Hash, -) CommitNodeIter { - seen := make(map[plumbing.Hash]bool) - for _, h := range ignore { - seen[h] = true - } - - heap := binaryheap.NewWith(func(a, b interface{}) int { - if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) { - return 1 - } - return -1 - }) - - heap.Push(c) - - return &commitNodeIteratorByCTime{ - heap: heap, - seenExternal: seenExternal, - seen: seen, - } -} - -func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) { - var c CommitNode - for { - cIn, ok := w.heap.Pop() - if !ok { - return nil, io.EOF - } - c = cIn.(CommitNode) - cID := c.ID() - - if w.seen[cID] || w.seenExternal[cID] { - continue - } - - w.seen[cID] = true - - for i, h := range c.ParentHashes() { - if w.seen[h] || w.seenExternal[h] { - continue - } - pc, err := c.ParentNode(i) - if err != nil { - return nil, err - } - w.heap.Push(pc) - } - - return c, nil - } -} - -func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error { - for { - c, err := w.Next() - if err == io.EOF { - break - } - if err != nil { - return err - } - - err = cb(c) - if err == storer.ErrStop { - break - } - if err != nil { - return err - } - } - - return nil -} - -func (w *commitNodeIteratorByCTime) Close() {} +package commitgraph + +import ( + "io" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/storer" + + "github.com/emirpasic/gods/trees/binaryheap" +) + +type commitNodeIteratorByCTime struct { + heap *binaryheap.Heap + seenExternal map[plumbing.Hash]bool + seen map[plumbing.Hash]bool +} + +// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history, +// starting at the given commit and visiting its parents while preserving Committer Time order. +// this appears to be the closest order to `git log` +// The given callback will be called for each visited commit. Each commit will +// be visited only once. If the callback returns an error, walking will stop +// and will return the error. Other errors might be returned if the history +// cannot be traversed (e.g. missing objects). Ignore allows to skip some +// commits from being iterated. +func NewCommitNodeIterCTime( + c CommitNode, + seenExternal map[plumbing.Hash]bool, + ignore []plumbing.Hash, +) CommitNodeIter { + seen := make(map[plumbing.Hash]bool) + for _, h := range ignore { + seen[h] = true + } + + heap := binaryheap.NewWith(func(a, b interface{}) int { + if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) { + return 1 + } + return -1 + }) + + heap.Push(c) + + return &commitNodeIteratorByCTime{ + heap: heap, + seenExternal: seenExternal, + seen: seen, + } +} + +func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) { + var c CommitNode + for { + cIn, ok := w.heap.Pop() + if !ok { + return nil, io.EOF + } + c = cIn.(CommitNode) + cID := c.ID() + + if w.seen[cID] || w.seenExternal[cID] { + continue + } + + w.seen[cID] = true + + for i, h := range c.ParentHashes() { + if w.seen[h] || w.seenExternal[h] { + continue + } + pc, err := c.ParentNode(i) + if err != nil { + return nil, err + } + w.heap.Push(pc) + } + + return c, nil + } +} + +func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error { + for { + c, err := w.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + err = cb(c) + if err == storer.ErrStop { + break + } + if err != nil { + return err + } + } + + return nil +} + +func (w *commitNodeIteratorByCTime) Close() {} -- cgit