diff options
Diffstat (limited to 'plumbing/format')
21 files changed, 1853 insertions, 231 deletions
diff --git a/plumbing/format/commitgraph/commitgraph.go b/plumbing/format/commitgraph/commitgraph.go index 3d59323..e772d26 100644 --- a/plumbing/format/commitgraph/commitgraph.go +++ b/plumbing/format/commitgraph/commitgraph.go @@ -8,6 +8,9 @@ import ( // CommitData is a reduced representation of Commit as presented in the commit graph
// file. It is merely useful as an optimization for walking the commit graphs.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type CommitData struct {
// TreeHash is the hash of the root tree of the commit.
TreeHash plumbing.Hash
@@ -24,6 +27,9 @@ type CommitData struct { // Index represents a representation of commit graph that allows indexed
// access to the nodes using commit object hash
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type Index interface {
// GetIndexByHash gets the index in the commit graph from commit hash, if available
GetIndexByHash(h plumbing.Hash) (int, error)
diff --git a/plumbing/format/commitgraph/doc.go b/plumbing/format/commitgraph/doc.go index 41cd8b1..c320e18 100644 --- a/plumbing/format/commitgraph/doc.go +++ b/plumbing/format/commitgraph/doc.go @@ -1,23 +1,26 @@ // Package commitgraph implements encoding and decoding of commit-graph files. // +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. +// // Git commit graph format // ======================= // // The Git commit graph stores a list of commit OIDs and some associated // metadata, including: // -// - The generation number of the commit. Commits with no parents have -// generation number 1; commits with parents have generation number -// one more than the maximum generation number of its parents. We -// reserve zero as special, and can be used to mark a generation -// number invalid or as "not computed". +// - The generation number of the commit. Commits with no parents have +// generation number 1; commits with parents have generation number +// one more than the maximum generation number of its parents. We +// reserve zero as special, and can be used to mark a generation +// number invalid or as "not computed". // // - The root tree OID. // // - The commit date. // -// - The parents of the commit, stored using positional references within -// the graph file. +// - The parents of the commit, stored using positional references within +// the graph file. // // These positional references are stored as unsigned 32-bit integers // corresponding to the array position within the list of commit OIDs. Due @@ -35,68 +38,68 @@ // // HEADER: // -// 4-byte signature: -// The signature is: {'C', 'G', 'P', 'H'} +// 4-byte signature: +// The signature is: {'C', 'G', 'P', 'H'} // -// 1-byte version number: -// Currently, the only valid version is 1. +// 1-byte version number: +// Currently, the only valid version is 1. // -// 1-byte Hash Version (1 = SHA-1) -// We infer the hash length (H) from this value. +// 1-byte Hash Version (1 = SHA-1) +// We infer the hash length (H) from this value. // -// 1-byte number (C) of "chunks" +// 1-byte number (C) of "chunks" // -// 1-byte (reserved for later use) -// Current clients should ignore this value. +// 1-byte (reserved for later use) +// Current clients should ignore this value. // // CHUNK LOOKUP: // -// (C + 1) * 12 bytes listing the table of contents for the chunks: -// First 4 bytes describe the chunk id. Value 0 is a terminating label. -// Other 8 bytes provide the byte-offset in current file for chunk to -// start. (Chunks are ordered contiguously in the file, so you can infer -// the length using the next chunk position if necessary.) Each chunk -// ID appears at most once. +// (C + 1) * 12 bytes listing the table of contents for the chunks: +// First 4 bytes describe the chunk id. Value 0 is a terminating label. +// Other 8 bytes provide the byte-offset in current file for chunk to +// start. (Chunks are ordered contiguously in the file, so you can infer +// the length using the next chunk position if necessary.) Each chunk +// ID appears at most once. // -// The remaining data in the body is described one chunk at a time, and -// these chunks may be given in any order. Chunks are required unless -// otherwise specified. +// The remaining data in the body is described one chunk at a time, and +// these chunks may be given in any order. Chunks are required unless +// otherwise specified. // // CHUNK DATA: // -// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) -// The ith entry, F[i], stores the number of OIDs with first -// byte at most i. Thus F[255] stores the total -// number of commits (N). -// -// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) -// The OIDs for all commits in the graph, sorted in ascending order. -// -// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) -// * The first H bytes are for the OID of the root tree. -// * The next 8 bytes are for the positions of the first two parents -// of the ith commit. Stores value 0x7000000 if no parent in that -// position. If there are more than two parents, the second value -// has its most-significant bit on and the other bits store an array -// position into the Extra Edge List chunk. -// * The next 8 bytes store the generation number of the commit and -// the commit time in seconds since EPOCH. The generation number -// uses the higher 30 bits of the first 4 bytes, while the commit -// time uses the 32 bits of the second 4 bytes, along with the lowest -// 2 bits of the lowest byte, storing the 33rd and 34th bit of the -// commit time. -// -// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] -// This list of 4-byte values store the second through nth parents for -// all octopus merges. The second parent value in the commit data stores -// an array position within this list along with the most-significant bit -// on. Starting at that array position, iterate through this list of commit -// positions for the parents until reaching a value with the most-significant -// bit on. The other bits correspond to the position of the last parent. +// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) +// The ith entry, F[i], stores the number of OIDs with first +// byte at most i. Thus F[255] stores the total +// number of commits (N). +// +// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) +// The OIDs for all commits in the graph, sorted in ascending order. +// +// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) +// * The first H bytes are for the OID of the root tree. +// * The next 8 bytes are for the positions of the first two parents +// of the ith commit. Stores value 0x7000000 if no parent in that +// position. If there are more than two parents, the second value +// has its most-significant bit on and the other bits store an array +// position into the Extra Edge List chunk. +// * The next 8 bytes store the generation number of the commit and +// the commit time in seconds since EPOCH. The generation number +// uses the higher 30 bits of the first 4 bytes, while the commit +// time uses the 32 bits of the second 4 bytes, along with the lowest +// 2 bits of the lowest byte, storing the 33rd and 34th bit of the +// commit time. +// +// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] +// This list of 4-byte values store the second through nth parents for +// all octopus merges. The second parent value in the commit data stores +// an array position within this list along with the most-significant bit +// on. Starting at that array position, iterate through this list of commit +// positions for the parents until reaching a value with the most-significant +// bit on. The other bits correspond to the position of the last parent. // // TRAILER: // -// H-byte HASH-checksum of all of the above. +// H-byte HASH-checksum of all of the above. // // Source: // https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go index f61025b..3176353 100644 --- a/plumbing/format/commitgraph/encoder.go +++ b/plumbing/format/commitgraph/encoder.go @@ -1,6 +1,7 @@ package commitgraph
import (
+ "crypto"
"io"
"github.com/go-git/go-git/v5/plumbing"
@@ -9,12 +10,18 @@ import ( )
// Encoder writes MemoryIndex structs to an output stream.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type Encoder struct {
io.Writer
hash hash.Hash
}
// NewEncoder returns a new stream encoder that writes to w.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func NewEncoder(w io.Writer) *Encoder {
h := hash.New(hash.CryptoType)
mw := io.MultiWriter(w, h)
@@ -22,6 +29,9 @@ func NewEncoder(w io.Writer) *Encoder { }
// Encode writes an index into the commit-graph file
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func (e *Encoder) Encode(idx Index) error {
// Get all the hashes in the input index
hashes := idx.Hashes()
@@ -30,7 +40,7 @@ func (e *Encoder) Encode(idx Index) error { hashToIndex, fanout, extraEdgesCount := e.prepare(idx, hashes)
chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature}
- chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * hash.Size, uint64(len(hashes)) * 36}
+ chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * hash.Size, uint64(len(hashes)) * (hash.Size + commitDataSize)}
if extraEdgesCount > 0 {
chunkSignatures = append(chunkSignatures, extraEdgeListSignature)
chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*4)
@@ -88,7 +98,11 @@ func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[pl func (e *Encoder) encodeFileHeader(chunkCount int) (err error) {
if _, err = e.Write(commitFileSignature); err == nil {
- _, err = e.Write([]byte{1, 1, byte(chunkCount), 0})
+ version := byte(1)
+ if hash.CryptoType == crypto.SHA256 {
+ version = byte(2)
+ }
+ _, err = e.Write([]byte{1, version, byte(chunkCount), 0})
}
return
}
diff --git a/plumbing/format/commitgraph/file.go b/plumbing/format/commitgraph/file.go index 1d25238..ef8fb34 100644 --- a/plumbing/format/commitgraph/file.go +++ b/plumbing/format/commitgraph/file.go @@ -2,15 +2,20 @@ package commitgraph import (
"bytes"
+ "crypto"
encbin "encoding/binary"
"errors"
"io"
"time"
"github.com/go-git/go-git/v5/plumbing"
+ "github.com/go-git/go-git/v5/plumbing/hash"
"github.com/go-git/go-git/v5/utils/binary"
)
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
+
var (
// ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph
// file version is not supported.
@@ -36,6 +41,8 @@ var ( parentLast = uint32(0x80000000)
)
+const commitDataSize = 16
+
type fileIndex struct {
reader io.ReaderAt
fanout [256]int
@@ -47,6 +54,9 @@ type fileIndex struct { // OpenFileIndex opens a serialized commit graph file in the format described at
// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func OpenFileIndex(reader io.ReaderAt) (Index, error) {
fi := &fileIndex{reader: reader}
@@ -65,7 +75,7 @@ func OpenFileIndex(reader io.ReaderAt) (Index, error) { func (fi *fileIndex) verifyFileHeader() error {
// Verify file signature
- var signature = make([]byte, 4)
+ signature := make([]byte, 4)
if _, err := fi.reader.ReadAt(signature, 0); err != nil {
return err
}
@@ -74,22 +84,31 @@ func (fi *fileIndex) verifyFileHeader() error { }
// Read and verify the file header
- var header = make([]byte, 4)
+ header := make([]byte, 4)
if _, err := fi.reader.ReadAt(header, 4); err != nil {
return err
}
if header[0] != 1 {
return ErrUnsupportedVersion
}
- if header[1] != 1 {
- return ErrUnsupportedHash
+ if hash.CryptoType == crypto.SHA1 {
+ if header[1] != 1 {
+ return ErrUnsupportedVersion
+ }
+ } else if hash.CryptoType == crypto.SHA256 {
+ if header[1] != 2 {
+ return ErrUnsupportedVersion
+ }
+ } else {
+ // Unknown hash type
+ return ErrUnsupportedVersion
}
return nil
}
func (fi *fileIndex) readChunkHeaders() error {
- var chunkID = make([]byte, 4)
+ chunkID := make([]byte, 4)
for i := 0; ; i++ {
chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12)
if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil {
@@ -148,7 +167,7 @@ func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (int, error) { high := fi.fanout[h[0]]
for low < high {
mid := (low + high) >> 1
- offset := fi.oidLookupOffset + int64(mid)*20
+ offset := fi.oidLookupOffset + int64(mid)*hash.Size
if _, err := fi.reader.ReadAt(oid[:], offset); err != nil {
return 0, err
}
@@ -170,8 +189,8 @@ func (fi *fileIndex) GetCommitDataByIndex(idx int) (*CommitData, error) { return nil, plumbing.ErrObjectNotFound
}
- offset := fi.commitDataOffset + int64(idx)*36
- commitDataReader := io.NewSectionReader(fi.reader, offset, 36)
+ offset := fi.commitDataOffset + int64(idx)*(hash.Size+commitDataSize)
+ commitDataReader := io.NewSectionReader(fi.reader, offset, hash.Size+commitDataSize)
treeHash, err := binary.ReadHash(commitDataReader)
if err != nil {
@@ -237,7 +256,7 @@ func (fi *fileIndex) getHashesFromIndexes(indexes []int) ([]plumbing.Hash, error return nil, ErrMalformedCommitGraphFile
}
- offset := fi.oidLookupOffset + int64(idx)*20
+ offset := fi.oidLookupOffset + int64(idx)*hash.Size
if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil {
return nil, err
}
@@ -250,8 +269,8 @@ func (fi *fileIndex) getHashesFromIndexes(indexes []int) ([]plumbing.Hash, error func (fi *fileIndex) Hashes() []plumbing.Hash {
hashes := make([]plumbing.Hash, fi.fanout[0xff])
for i := 0; i < fi.fanout[0xff]; i++ {
- offset := fi.oidLookupOffset + int64(i)*20
- if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < 20 {
+ offset := fi.oidLookupOffset + int64(i)*hash.Size
+ if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < hash.Size {
return nil
}
}
diff --git a/plumbing/format/commitgraph/memory.go b/plumbing/format/commitgraph/memory.go index b24ce36..06415e5 100644 --- a/plumbing/format/commitgraph/memory.go +++ b/plumbing/format/commitgraph/memory.go @@ -6,12 +6,18 @@ import ( // MemoryIndex provides a way to build the commit-graph in memory
// for later encoding to file.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type MemoryIndex struct {
commitData []*CommitData
indexMap map[plumbing.Hash]int
}
// NewMemoryIndex creates in-memory commit graph representation
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func NewMemoryIndex() *MemoryIndex {
return &MemoryIndex{
indexMap: make(map[plumbing.Hash]int),
diff --git a/plumbing/format/commitgraph/v2/chain.go b/plumbing/format/commitgraph/v2/chain.go new file mode 100644 index 0000000..8da60d0 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chain.go @@ -0,0 +1,100 @@ +package v2 + +import ( + "bufio" + "io" + "path" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-git/v5/plumbing" +) + +// OpenChainFile reads a commit chain file and returns a slice of the hashes within it +// +// Commit-Graph chains are described at https://git-scm.com/docs/commit-graph +// and are new line separated list of graph file hashes, oldest to newest. +// +// This function simply reads the file and returns the hashes as a slice. +func OpenChainFile(r io.Reader) ([]string, error) { + if r == nil { + return nil, io.ErrUnexpectedEOF + } + bufRd := bufio.NewReader(r) + chain := make([]string, 0, 8) + for { + line, err := bufRd.ReadSlice('\n') + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + + hashStr := string(line[:len(line)-1]) + if !plumbing.IsHash(hashStr) { + return nil, ErrMalformedCommitGraphFile + } + chain = append(chain, hashStr) + } + return chain, nil +} + +// OpenChainOrFileIndex expects a billy.Filesystem representing a .git directory. +// It will first attempt to read a commit-graph index file, before trying to read a +// commit-graph chain file and its index files. If neither are present, an error is returned. +// Otherwise an Index will be returned. +// +// See: https://git-scm.com/docs/commit-graph +func OpenChainOrFileIndex(fs billy.Filesystem) (Index, error) { + file, err := fs.Open(path.Join("objects", "info", "commit-graph")) + if err != nil { + // try to open a chain file + return OpenChainIndex(fs) + } + + index, err := OpenFileIndex(file) + if err != nil { + // Ignore any file closing errors and return the error from OpenFileIndex instead + _ = file.Close() + return nil, err + } + return index, nil +} + +// OpenChainIndex expects a billy.Filesystem representing a .git directory. +// It will read a commit-graph chain file and return a coalesced index. +// If the chain file or a graph in that chain is not present, an error is returned. +// +// See: https://git-scm.com/docs/commit-graph +func OpenChainIndex(fs billy.Filesystem) (Index, error) { + chainFile, err := fs.Open(path.Join("objects", "info", "commit-graphs", "commit-graph-chain")) + if err != nil { + return nil, err + } + + chain, err := OpenChainFile(chainFile) + _ = chainFile.Close() + if err != nil { + return nil, err + } + + var index Index + for _, hash := range chain { + + file, err := fs.Open(path.Join("objects", "info", "commit-graphs", "graph-"+hash+".graph")) + if err != nil { + // Ignore all other file closing errors and return the error from opening the last file in the graph + _ = index.Close() + return nil, err + } + + index, err = OpenFileIndexWithParent(file, index) + if err != nil { + // Ignore file closing errors and return the error from OpenFileIndex instead + _ = index.Close() + return nil, err + } + } + + return index, nil +} diff --git a/plumbing/format/commitgraph/v2/chain_test.go b/plumbing/format/commitgraph/v2/chain_test.go new file mode 100644 index 0000000..32ffd69 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chain_test.go @@ -0,0 +1,100 @@ +package v2_test + +import ( + "bytes" + "crypto" + "strings" + + commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2" + "github.com/go-git/go-git/v5/plumbing/hash" + + . "gopkg.in/check.v1" +) + +func (s *CommitgraphSuite) TestOpenChainFile(c *C) { + sha1Data := []string{ + "c336d16298a017486c4164c40f8acb28afe64e84", + "31eae7b619d166c366bf5df4991f04ba8cebea0a", + "b977a025ca21e3b5ca123d8093bd7917694f6da7", + "d2a38b4a5965d529566566640519d03d2bd10f6c", + "35b585759cbf29f8ec428ef89da20705d59f99ec", + "c2bbf9fe8009b22d0f390f3c8c3f13937067590f", + "fc9f0643b21cfe571046e27e0c4565f3a1ee96c8", + "c088fd6a7e1a38e9d5a9815265cb575bb08d08ff", + "5fddbeb678bd2c36c5e5c891ab8f2b143ced5baf", + "5d7303c49ac984a9fec60523f2d5297682e16646", + } + + sha256Data := []string{ + "b9efda7160f2647e0974ca623f8a8f8e25fb6944f1b8f78f4db1bf07932de8eb", + "7095c59f8bf46e12c21d2d9da344cfe383fae18d26f3ae4d4ab7b71e3d0ddfae", + "25a395cb62f7656294e40a001ee19fefcdf3013d265dfcf4b744cd2549891dec", + "7fbd564813a82227507d9dd70f1fd21fc1f180223cd3f42e0c3090c9a8b6a7d0", + "aa95db1db2df91bd7200a892dd1c03bc2704c4793400d016b3ca08c148b0f7c1", + "2176988184b570565dc33823a02f474ad59f667a0e971c86063a7fea64776a87", + "d0afc0e64171140eb7902110f807a1beaa38a603d4312fd4bd14a5db2784ba62", + "2822136f60bfc58bbd9d624cc19fbef9f0fc0efe2a61729242e1e5f9b77fa3d0", + "6f207b5c43463af96bc38c43b0bf45275fa327e656a8bba8e7fc55c5ab6870d8", + "6cf33782619b6ff0af9c081e46323f423f8b49bf3d043887c0549bef47d60f55", + "60ea0753d2d4e828983528294be3f57e2a3ba37df4f59e3236133c9e2b17afc5", + "6b3c9f4ba5092e0807774097953ec6e9f58e8371d775bd8738a0fa98d728ba3d", + "c97cab8564054e30515dbe67dda4e14638aabf17b3f042d18dc8461cd098b362", + "9f7ece76fd2c9dae08e75176347efffc1446ad74af66004dd34680edb205dfb5", + "23e7a7e481b00571b63c2a7d0432f9733dd85d18a9841a3d7b96743100da5824", + "e684b1253fa8eb6572f35bab2fd3b6efecabf8472ede43497cd9c171973cc341", + "8b9f04080b0c40f7ad2a6bb5e5296cd6c06e730dffce87a0375ae7bd0f85f86e", + "384a745f3b14edc89526a98b96b3247b2b548541c755aadee7664352ed7f12ae", + "b68c8a82cd5b839917e1058570a0408819b81d16dbab81db118cc8dfc3def044", + "fbaf04f1a401335be57e172f4326102c658d857fde6cf2bc987520d11fc99770", + "57acf2aa5ac736337b120c951536c8a2b2cb23a4f0f198e86f3433370fa63105", + "dd7fcba4c13b6ced0b6190cdb5861adcd08446a92d67f7ec0f02f9533e09bbb0", + "744ef481c9b13ebd3b6e43d7e9ba25f7c7a5c8e453e6f0d50f5d71aae1591689", + "2c573142f1edd52b64dcd42a9c3b0ca5c9c615f757d80d25bfb02ff3eb2257e2", + "ea65cc58ef8520cd0335de4318a0d3b3a1ac257b7e9f82e12483fa3bce6cc0cd", + "1dfa626ff1523b82e21a4c29476edcdc9a89842f3c7181f63a28cd4f46cc9923", + "aa1153e71af836121e6f6cc716cf64880c19221d8dc367ff42359de1b8ef30e9", + "a7c6ec6f6569e22d2fa6e8281639d27c59b633ea00ad8ef27a43171cc985fbda", + "627b706d63d2cfd5a388deeaa76655ef09146fe492ee17cb0043578cef9c2800", + "d40eaf091ef8357b734d1047a552436eaf057d99a0c6f2068b097c324099d360", + "87f0ef81641da4fd3438dcaae4819f0c92a0ade54e262b21f9ded4575ff3f234", + "3a00a29e08d29454b5197662f70ccab5699b0ce8c85af7fbf511b8915d97cfd0", + } + + goodShas := sha1Data + badShas := sha256Data + if hash.CryptoType == crypto.SHA256 { + goodShas = sha256Data + badShas = sha1Data + } + chainData := strings.Join(goodShas, "\n") + "\n" + + chainReader := strings.NewReader(chainData) + + chain, err := commitgraph.OpenChainFile(chainReader) + c.Assert(err, IsNil) + c.Assert(goodShas, DeepEquals, chain) + + // Test with bad shas + chainData = strings.Join(badShas, "\n") + "\n" + + chainReader = strings.NewReader(chainData) + + chain, err = commitgraph.OpenChainFile(chainReader) + c.Assert(err, Equals, commitgraph.ErrMalformedCommitGraphFile) + c.Assert(chain, IsNil) + + // Test with empty file + emptyChainReader := bytes.NewReader(nil) + + chain, err = commitgraph.OpenChainFile(emptyChainReader) + c.Assert(err, IsNil) + c.Assert(chain, DeepEquals, []string{}) + + // Test with file containing only newlines + newlineChainData := []byte("\n\n\n") + newlineChainReader := bytes.NewReader(newlineChainData) + + chain, err = commitgraph.OpenChainFile(newlineChainReader) + c.Assert(err, Equals, commitgraph.ErrMalformedCommitGraphFile) + c.Assert(chain, IsNil) +} diff --git a/plumbing/format/commitgraph/v2/chunk.go b/plumbing/format/commitgraph/v2/chunk.go new file mode 100644 index 0000000..11f4d31 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chunk.go @@ -0,0 +1,49 @@ +package v2 + +import "bytes" + +const ( + szChunkSig = 4 // Length of a chunk signature + chunkSigOffset = 4 // Offset of each chunk signature in chunkSignatures +) + +// chunkSignatures contains the coalesced byte signatures for each chunk type. +// The order of the signatures must match the order of the ChunkType constants. +// (When adding new chunk types you must avoid introducing ambiguity, and you may need to add padding separators to this list or reorder these signatures.) +// (i.e. it would not be possible to add a new chunk type with the signature "IDFO" without some reordering or the addition of separators.) +var chunkSignatures = []byte("OIDFOIDLCDATGDA2GDO2EDGEBIDXBDATBASE\000\000\000\000") + +// ChunkType represents the type of a chunk in the commit graph file. +type ChunkType int + +const ( + OIDFanoutChunk ChunkType = iota // "OIDF" + OIDLookupChunk // "OIDL" + CommitDataChunk // "CDAT" + GenerationDataChunk // "GDA2" + GenerationDataOverflowChunk // "GDO2" + ExtraEdgeListChunk // "EDGE" + BloomFilterIndexChunk // "BIDX" + BloomFilterDataChunk // "BDAT" + BaseGraphsListChunk // "BASE" + ZeroChunk // "\000\000\000\000" +) +const lenChunks = int(ZeroChunk) // ZeroChunk is not a valid chunk type, but it is used to determine the length of the chunk type list. + +// Signature returns the byte signature for the chunk type. +func (ct ChunkType) Signature() []byte { + if ct >= BaseGraphsListChunk || ct < 0 { // not a valid chunk type just return ZeroChunk + return chunkSignatures[ZeroChunk*chunkSigOffset : ZeroChunk*chunkSigOffset+szChunkSig] + } + + return chunkSignatures[ct*chunkSigOffset : ct*chunkSigOffset+szChunkSig] +} + +// ChunkTypeFromBytes returns the chunk type for the given byte signature. +func ChunkTypeFromBytes(b []byte) (ChunkType, bool) { + idx := bytes.Index(chunkSignatures, b) + if idx == -1 || idx%chunkSigOffset != 0 { // not found, or not aligned at chunkSigOffset + return -1, false + } + return ChunkType(idx / chunkSigOffset), true +} diff --git a/plumbing/format/commitgraph/v2/commitgraph.go b/plumbing/format/commitgraph/v2/commitgraph.go new file mode 100644 index 0000000..9c89cd9 --- /dev/null +++ b/plumbing/format/commitgraph/v2/commitgraph.go @@ -0,0 +1,57 @@ +package v2 + +import ( + "io" + "math" + "time" + + "github.com/go-git/go-git/v5/plumbing" +) + +// CommitData is a reduced representation of Commit as presented in the commit graph +// file. It is merely useful as an optimization for walking the commit graphs. +type CommitData struct { + // TreeHash is the hash of the root tree of the commit. + TreeHash plumbing.Hash + // ParentIndexes are the indexes of the parent commits of the commit. + ParentIndexes []uint32 + // ParentHashes are the hashes of the parent commits of the commit. + ParentHashes []plumbing.Hash + // Generation number is the pre-computed generation in the commit graph + // or zero if not available. + Generation uint64 + // GenerationV2 stores the corrected commit date for the commits + // It combines the contents of the GDA2 and GDO2 sections of the commit-graph + // with the commit time portion of the CDAT section. + GenerationV2 uint64 + // When is the timestamp of the commit. + When time.Time +} + +// GenerationV2Data returns the corrected commit date for the commits +func (c *CommitData) GenerationV2Data() uint64 { + if c.GenerationV2 == 0 || c.GenerationV2 == math.MaxUint64 { + return 0 + } + return c.GenerationV2 - uint64(c.When.Unix()) +} + +// Index represents a representation of commit graph that allows indexed +// access to the nodes using commit object hash +type Index interface { + // GetIndexByHash gets the index in the commit graph from commit hash, if available + GetIndexByHash(h plumbing.Hash) (uint32, error) + // GetHashByIndex gets the hash given an index in the commit graph + GetHashByIndex(i uint32) (plumbing.Hash, error) + // GetNodeByIndex gets the commit node from the commit graph using index + // obtained from child node, if available + GetCommitDataByIndex(i uint32) (*CommitData, error) + // Hashes returns all the hashes that are available in the index + Hashes() []plumbing.Hash + // HasGenerationV2 returns true if the commit graph has the corrected commit date data + HasGenerationV2() bool + // MaximumNumberOfHashes returns the maximum number of hashes within the index + MaximumNumberOfHashes() uint32 + + io.Closer +} diff --git a/plumbing/format/commitgraph/v2/commitgraph_test.go b/plumbing/format/commitgraph/v2/commitgraph_test.go new file mode 100644 index 0000000..1278405 --- /dev/null +++ b/plumbing/format/commitgraph/v2/commitgraph_test.go @@ -0,0 +1,200 @@ +package v2_test
+
+import (
+ "os"
+ "testing"
+
+ "github.com/go-git/go-billy/v5"
+ "github.com/go-git/go-billy/v5/util"
+ "github.com/go-git/go-git/v5/plumbing"
+ "github.com/go-git/go-git/v5/plumbing/cache"
+ commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2"
+ "github.com/go-git/go-git/v5/plumbing/format/packfile"
+ "github.com/go-git/go-git/v5/plumbing/object"
+ "github.com/go-git/go-git/v5/storage/filesystem"
+
+ fixtures "github.com/go-git/go-git-fixtures/v4"
+ . "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) { TestingT(t) }
+
+type CommitgraphSuite struct {
+ fixtures.Suite
+}
+
+var _ = Suite(&CommitgraphSuite{})
+
+func testReadIndex(c *C, fs billy.Filesystem, path string) commitgraph.Index {
+ reader, err := fs.Open(path)
+ c.Assert(err, IsNil)
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+ c.Assert(index, NotNil)
+ return index
+}
+
+func testDecodeHelper(c *C, index commitgraph.Index) {
+ // Root commit
+ nodeIndex, err := index.GetIndexByHash(plumbing.NewHash("347c91919944a68e9413581a1bc15519550a3afe"))
+ c.Assert(err, IsNil)
+ commitData, err := index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 0)
+ c.Assert(len(commitData.ParentHashes), Equals, 0)
+
+ // Regular commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("e713b52d7e13807e87a002e812041f248db3f643"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 1)
+ c.Assert(len(commitData.ParentHashes), Equals, 1)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "347c91919944a68e9413581a1bc15519550a3afe")
+
+ // Merge commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("b29328491a0682c259bcce28741eac71f3499f7d"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 2)
+ c.Assert(len(commitData.ParentHashes), Equals, 2)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
+ c.Assert(commitData.ParentHashes[1].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
+
+ // Octopus merge commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 3)
+ c.Assert(len(commitData.ParentHashes), Equals, 3)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "ce275064ad67d51e99f026084e20827901a8361c")
+ c.Assert(commitData.ParentHashes[1].String(), Equals, "bb13916df33ed23004c3ce9ed3b8487528e655c1")
+ c.Assert(commitData.ParentHashes[2].String(), Equals, "a45273fe2d63300e1962a9e26a6b15c276cd7082")
+
+ // Check all hashes
+ hashes := index.Hashes()
+ c.Assert(len(hashes), Equals, 11)
+ c.Assert(hashes[0].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
+ c.Assert(hashes[10].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
+}
+
+func (s *CommitgraphSuite) TestDecodeMultiChain(c *C) {
+ fixtures.ByTag("commit-graph-chain-2").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ index, err := commitgraph.OpenChainOrFileIndex(dotgit)
+ c.Assert(err, IsNil)
+ defer index.Close()
+ storer := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault())
+ p := f.Packfile()
+ defer p.Close()
+ packfile.UpdateObjectStorage(storer, p)
+
+ for idx, hash := range index.Hashes() {
+ idx2, err := index.GetIndexByHash(hash)
+ c.Assert(err, IsNil)
+ c.Assert(idx2, Equals, uint32(idx))
+ hash2, err := index.GetHashByIndex(idx2)
+ c.Assert(err, IsNil)
+ c.Assert(hash2.String(), Equals, hash.String())
+
+ commitData, err := index.GetCommitDataByIndex(uint32(idx))
+ c.Assert(err, IsNil)
+ commit, err := object.GetCommit(storer, hash)
+ c.Assert(err, IsNil)
+
+ for i, parent := range commit.ParentHashes {
+ c.Assert(hash.String()+":"+parent.String(), Equals, hash.String()+":"+commitData.ParentHashes[i].String())
+ }
+ }
+ })
+}
+
+func (s *CommitgraphSuite) TestDecode(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ index := testReadIndex(c, dotgit, dotgit.Join("objects", "info", "commit-graph"))
+ defer index.Close()
+ testDecodeHelper(c, index)
+ })
+}
+
+func (s *CommitgraphSuite) TestDecodeChain(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ index, err := commitgraph.OpenChainOrFileIndex(dotgit)
+ c.Assert(err, IsNil)
+ defer index.Close()
+ testDecodeHelper(c, index)
+ })
+
+ fixtures.ByTag("commit-graph-chain").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ index, err := commitgraph.OpenChainOrFileIndex(dotgit)
+ c.Assert(err, IsNil)
+ defer index.Close()
+ testDecodeHelper(c, index)
+ })
+}
+
+func (s *CommitgraphSuite) TestReencode(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+
+ reader, err := dotgit.Open(dotgit.Join("objects", "info", "commit-graph"))
+ c.Assert(err, IsNil)
+ defer reader.Close()
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+ defer index.Close()
+
+ writer, err := util.TempFile(dotgit, "", "commit-graph")
+ c.Assert(err, IsNil)
+ tmpName := writer.Name()
+ defer os.Remove(tmpName)
+
+ encoder := commitgraph.NewEncoder(writer)
+ err = encoder.Encode(index)
+ c.Assert(err, IsNil)
+ writer.Close()
+
+ tmpIndex := testReadIndex(c, dotgit, tmpName)
+ defer tmpIndex.Close()
+ testDecodeHelper(c, tmpIndex)
+ })
+}
+
+func (s *CommitgraphSuite) TestReencodeInMemory(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+
+ reader, err := dotgit.Open(dotgit.Join("objects", "info", "commit-graph"))
+ c.Assert(err, IsNil)
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+
+ memoryIndex := commitgraph.NewMemoryIndex()
+ defer memoryIndex.Close()
+ for i, hash := range index.Hashes() {
+ commitData, err := index.GetCommitDataByIndex(uint32(i))
+ c.Assert(err, IsNil)
+ memoryIndex.Add(hash, commitData)
+ }
+ index.Close()
+
+ writer, err := util.TempFile(dotgit, "", "commit-graph")
+ c.Assert(err, IsNil)
+ tmpName := writer.Name()
+ defer os.Remove(tmpName)
+
+ encoder := commitgraph.NewEncoder(writer)
+ err = encoder.Encode(memoryIndex)
+ c.Assert(err, IsNil)
+ writer.Close()
+
+ tmpIndex := testReadIndex(c, dotgit, tmpName)
+ defer tmpIndex.Close()
+ testDecodeHelper(c, tmpIndex)
+ })
+}
diff --git a/plumbing/format/commitgraph/v2/doc.go b/plumbing/format/commitgraph/v2/doc.go new file mode 100644 index 0000000..157621d --- /dev/null +++ b/plumbing/format/commitgraph/v2/doc.go @@ -0,0 +1,106 @@ +// Package v2 implements encoding and decoding of commit-graph files. +// +// This package was created to work around the issues of the incorrect types in +// the commitgraph package. +// +// Git commit graph format +// ======================= +// +// The Git commit graph stores a list of commit OIDs and some associated +// metadata, including: +// +// - The generation number of the commit. Commits with no parents have +// generation number 1; commits with parents have generation number +// one more than the maximum generation number of its parents. We +// reserve zero as special, and can be used to mark a generation +// number invalid or as "not computed". +// +// - The root tree OID. +// +// - The commit date. +// +// - The parents of the commit, stored using positional references within +// the graph file. +// +// These positional references are stored as unsigned 32-bit integers +// corresponding to the array position within the list of commit OIDs. Due +// to some special constants we use to track parents, we can store at most +// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits. +// +// == Commit graph files have the following format: +// +// In order to allow extensions that add extra data to the graph, we organize +// the body into "chunks" and provide a binary lookup table at the beginning +// of the body. The header includes certain values, such as number of chunks +// and hash type. +// +// All 4-byte numbers are in network order. +// +// HEADER: +// +// 4-byte signature: +// The signature is: {'C', 'G', 'P', 'H'} +// +// 1-byte version number: +// Currently, the only valid version is 1. +// +// 1-byte Hash Version (1 = SHA-1) +// We infer the hash length (H) from this value. +// +// 1-byte number (C) of "chunks" +// +// 1-byte (reserved for later use) +// Current clients should ignore this value. +// +// CHUNK LOOKUP: +// +// (C + 1) * 12 bytes listing the table of contents for the chunks: +// First 4 bytes describe the chunk id. Value 0 is a terminating label. +// Other 8 bytes provide the byte-offset in current file for chunk to +// start. (Chunks are ordered contiguously in the file, so you can infer +// the length using the next chunk position if necessary.) Each chunk +// ID appears at most once. +// +// The remaining data in the body is described one chunk at a time, and +// these chunks may be given in any order. Chunks are required unless +// otherwise specified. +// +// CHUNK DATA: +// +// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) +// The ith entry, F[i], stores the number of OIDs with first +// byte at most i. Thus F[255] stores the total +// number of commits (N). +// +// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) +// The OIDs for all commits in the graph, sorted in ascending order. +// +// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) +// * The first H bytes are for the OID of the root tree. +// * The next 8 bytes are for the positions of the first two parents +// of the ith commit. Stores value 0x7000000 if no parent in that +// position. If there are more than two parents, the second value +// has its most-significant bit on and the other bits store an array +// position into the Extra Edge List chunk. +// * The next 8 bytes store the generation number of the commit and +// the commit time in seconds since EPOCH. The generation number +// uses the higher 30 bits of the first 4 bytes, while the commit +// time uses the 32 bits of the second 4 bytes, along with the lowest +// 2 bits of the lowest byte, storing the 33rd and 34th bit of the +// commit time. +// +// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] +// This list of 4-byte values store the second through nth parents for +// all octopus merges. The second parent value in the commit data stores +// an array position within this list along with the most-significant bit +// on. Starting at that array position, iterate through this list of commit +// positions for the parents until reaching a value with the most-significant +// bit on. The other bits correspond to the position of the last parent. +// +// TRAILER: +// +// H-byte HASH-checksum of all of the above. +// +// Source: +// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt +package v2 diff --git a/plumbing/format/commitgraph/v2/encoder.go b/plumbing/format/commitgraph/v2/encoder.go new file mode 100644 index 0000000..b79bc77 --- /dev/null +++ b/plumbing/format/commitgraph/v2/encoder.go @@ -0,0 +1,250 @@ +package v2 + +import ( + "crypto" + "io" + "math" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/hash" + "github.com/go-git/go-git/v5/utils/binary" +) + +// Encoder writes MemoryIndex structs to an output stream. +type Encoder struct { + io.Writer + hash hash.Hash +} + +// NewEncoder returns a new stream encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := hash.New(hash.CryptoType) + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +// Encode writes an index into the commit-graph file +func (e *Encoder) Encode(idx Index) error { + // Get all the hashes in the input index + hashes := idx.Hashes() + + // Sort the inout and prepare helper structures we'll need for encoding + hashToIndex, fanout, extraEdgesCount, generationV2OverflowCount := e.prepare(idx, hashes) + + chunkSignatures := [][]byte{OIDFanoutChunk.Signature(), OIDLookupChunk.Signature(), CommitDataChunk.Signature()} + chunkSizes := []uint64{szUint32 * lenFanout, uint64(len(hashes)) * hash.Size, uint64(len(hashes)) * (hash.Size + szCommitData)} + if extraEdgesCount > 0 { + chunkSignatures = append(chunkSignatures, ExtraEdgeListChunk.Signature()) + chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*szUint32) + } + if idx.HasGenerationV2() { + chunkSignatures = append(chunkSignatures, GenerationDataChunk.Signature()) + chunkSizes = append(chunkSizes, uint64(len(hashes))*szUint32) + if generationV2OverflowCount > 0 { + chunkSignatures = append(chunkSignatures, GenerationDataOverflowChunk.Signature()) + chunkSizes = append(chunkSizes, uint64(generationV2OverflowCount)*szUint64) + } + } + + if err := e.encodeFileHeader(len(chunkSignatures)); err != nil { + return err + } + if err := e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { + return err + } + if err := e.encodeFanout(fanout); err != nil { + return err + } + if err := e.encodeOidLookup(hashes); err != nil { + return err + } + + extraEdges, generationV2Data, err := e.encodeCommitData(hashes, hashToIndex, idx) + if err != nil { + return err + } + if err = e.encodeExtraEdges(extraEdges); err != nil { + return err + } + if idx.HasGenerationV2() { + overflows, err := e.encodeGenerationV2Data(generationV2Data) + if err != nil { + return err + } + if err = e.encodeGenerationV2Overflow(overflows); err != nil { + return err + } + } + + return e.encodeChecksum() +} + +func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, extraEdgesCount uint32, generationV2OverflowCount uint32) { + // Sort the hashes and build our index + plumbing.HashesSort(hashes) + hashToIndex = make(map[plumbing.Hash]uint32) + fanout = make([]uint32, lenFanout) + for i, hash := range hashes { + hashToIndex[hash] = uint32(i) + fanout[hash[0]]++ + } + + // Convert the fanout to cumulative values + for i := 1; i < lenFanout; i++ { + fanout[i] += fanout[i-1] + } + + hasGenerationV2 := idx.HasGenerationV2() + + // Find out if we will need extra edge table + for i := 0; i < len(hashes); i++ { + v, _ := idx.GetCommitDataByIndex(uint32(i)) + if len(v.ParentHashes) > 2 { + extraEdgesCount += uint32(len(v.ParentHashes) - 1) + } + if hasGenerationV2 && v.GenerationV2Data() > math.MaxUint32 { + generationV2OverflowCount++ + } + } + + return +} + +func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { + if _, err = e.Write(commitFileSignature); err == nil { + version := byte(1) + if hash.CryptoType == crypto.SHA256 { + version = byte(2) + } + _, err = e.Write([]byte{1, version, byte(chunkCount), 0}) + } + return +} + +func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { + // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator + offset := uint64(szSignature + szHeader + (len(chunkSignatures)+1)*(szChunkSig+szUint64)) + for i, signature := range chunkSignatures { + if _, err = e.Write(signature); err == nil { + err = binary.WriteUint64(e, offset) + } + if err != nil { + return + } + offset += chunkSizes[i] + } + if _, err = e.Write(ZeroChunk.Signature()); err == nil { + err = binary.WriteUint64(e, offset) + } + return +} + +func (e *Encoder) encodeFanout(fanout []uint32) (err error) { + for i := 0; i <= 0xff; i++ { + if err = binary.WriteUint32(e, fanout[i]); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { + for _, hash := range hashes { + if _, err = e.Write(hash[:]); err != nil { + return err + } + } + return +} + +func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (extraEdges []uint32, generationV2Data []uint64, err error) { + if idx.HasGenerationV2() { + generationV2Data = make([]uint64, 0, len(hashes)) + } + for _, hash := range hashes { + origIndex, _ := idx.GetIndexByHash(hash) + commitData, _ := idx.GetCommitDataByIndex(origIndex) + if _, err = e.Write(commitData.TreeHash[:]); err != nil { + return + } + + var parent1, parent2 uint32 + if len(commitData.ParentHashes) == 0 { + parent1 = parentNone + parent2 = parentNone + } else if len(commitData.ParentHashes) == 1 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = parentNone + } else if len(commitData.ParentHashes) == 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = hashToIndex[commitData.ParentHashes[1]] + } else if len(commitData.ParentHashes) > 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = uint32(len(extraEdges)) | parentOctopusUsed + for _, parentHash := range commitData.ParentHashes[1:] { + extraEdges = append(extraEdges, hashToIndex[parentHash]) + } + extraEdges[len(extraEdges)-1] |= parentLast + } + + if err = binary.WriteUint32(e, parent1); err == nil { + err = binary.WriteUint32(e, parent2) + } + if err != nil { + return + } + + unixTime := uint64(commitData.When.Unix()) + unixTime |= uint64(commitData.Generation) << 34 + if err = binary.WriteUint64(e, unixTime); err != nil { + return + } + if generationV2Data != nil { + generationV2Data = append(generationV2Data, commitData.GenerationV2Data()) + } + } + return +} + +func (e *Encoder) encodeExtraEdges(extraEdges []uint32) (err error) { + for _, parent := range extraEdges { + if err = binary.WriteUint32(e, parent); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeGenerationV2Data(generationV2Data []uint64) (overflows []uint64, err error) { + head := 0 + for _, data := range generationV2Data { + if data >= 0x80000000 { + // overflow + if err = binary.WriteUint32(e, uint32(head)|0x80000000); err != nil { + return nil, err + } + generationV2Data[head] = data + head++ + continue + } + if err = binary.WriteUint32(e, uint32(data)); err != nil { + return nil, err + } + } + + return generationV2Data[:head], nil +} + +func (e *Encoder) encodeGenerationV2Overflow(overflows []uint64) (err error) { + for _, overflow := range overflows { + if err = binary.WriteUint64(e, overflow); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeChecksum() error { + _, err := e.Write(e.hash.Sum(nil)[:hash.Size]) + return err +} diff --git a/plumbing/format/commitgraph/v2/file.go b/plumbing/format/commitgraph/v2/file.go new file mode 100644 index 0000000..c5f61e4 --- /dev/null +++ b/plumbing/format/commitgraph/v2/file.go @@ -0,0 +1,412 @@ +package v2 + +import ( + "bytes" + "crypto" + encbin "encoding/binary" + "errors" + "io" + "time" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/hash" + "github.com/go-git/go-git/v5/utils/binary" +) + +var ( + // ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph + // file version is not supported. + ErrUnsupportedVersion = errors.New("unsupported version") + // ErrUnsupportedHash is returned by OpenFileIndex when the commit graph + // hash function is not supported. Currently only SHA-1 is defined and + // supported. + ErrUnsupportedHash = errors.New("unsupported hash algorithm") + // ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit + // graph file is corrupted. + ErrMalformedCommitGraphFile = errors.New("malformed commit graph file") + + commitFileSignature = []byte{'C', 'G', 'P', 'H'} + + parentNone = uint32(0x70000000) + parentOctopusUsed = uint32(0x80000000) + parentOctopusMask = uint32(0x7fffffff) + parentLast = uint32(0x80000000) +) + +const ( + szUint32 = 4 + szUint64 = 8 + + szSignature = 4 + szHeader = 4 + szCommitData = 2*szUint32 + szUint64 + + lenFanout = 256 +) + +type fileIndex struct { + reader ReaderAtCloser + fanout [lenFanout]uint32 + offsets [lenChunks]int64 + parent Index + hasGenerationV2 bool + minimumNumberOfHashes uint32 +} + +// ReaderAtCloser is an interface that combines io.ReaderAt and io.Closer. +type ReaderAtCloser interface { + io.ReaderAt + io.Closer +} + +// OpenFileIndex opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndex(reader ReaderAtCloser) (Index, error) { + return OpenFileIndexWithParent(reader, nil) +} + +// OpenFileIndexWithParent opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndexWithParent(reader ReaderAtCloser, parent Index) (Index, error) { + if reader == nil { + return nil, io.ErrUnexpectedEOF + } + fi := &fileIndex{reader: reader, parent: parent} + + if err := fi.verifyFileHeader(); err != nil { + return nil, err + } + if err := fi.readChunkHeaders(); err != nil { + return nil, err + } + if err := fi.readFanout(); err != nil { + return nil, err + } + + fi.hasGenerationV2 = fi.offsets[GenerationDataChunk] > 0 + if fi.parent != nil { + fi.hasGenerationV2 = fi.hasGenerationV2 && fi.parent.HasGenerationV2() + } + + if fi.parent != nil { + fi.minimumNumberOfHashes = fi.parent.MaximumNumberOfHashes() + } + + return fi, nil +} + +// Close closes the underlying reader and the parent index if it exists. +func (fi *fileIndex) Close() (err error) { + if fi.parent != nil { + defer func() { + parentErr := fi.parent.Close() + // only report the error from the parent if there is no error from the reader + if err == nil { + err = parentErr + } + }() + } + err = fi.reader.Close() + return +} + +func (fi *fileIndex) verifyFileHeader() error { + // Verify file signature + signature := make([]byte, szSignature) + if _, err := fi.reader.ReadAt(signature, 0); err != nil { + return err + } + if !bytes.Equal(signature, commitFileSignature) { + return ErrMalformedCommitGraphFile + } + + // Read and verify the file header + header := make([]byte, szHeader) + if _, err := fi.reader.ReadAt(header, szHeader); err != nil { + return err + } + if header[0] != 1 { + return ErrUnsupportedVersion + } + if !(hash.CryptoType == crypto.SHA1 && header[1] == 1) && + !(hash.CryptoType == crypto.SHA256 && header[1] == 2) { + // Unknown hash type / unsupported hash type + return ErrUnsupportedHash + } + + return nil +} + +func (fi *fileIndex) readChunkHeaders() error { + // The chunk table is a list of 4-byte chunk signatures and uint64 offsets into the file + chunkID := make([]byte, szChunkSig) + for i := 0; ; i++ { + chunkHeader := io.NewSectionReader(fi.reader, szSignature+szHeader+(int64(i)*(szChunkSig+szUint64)), szChunkSig+szUint64) + if _, err := io.ReadAtLeast(chunkHeader, chunkID, szChunkSig); err != nil { + return err + } + chunkOffset, err := binary.ReadUint64(chunkHeader) + if err != nil { + return err + } + + chunkType, ok := ChunkTypeFromBytes(chunkID) + if !ok { + continue + } + if chunkType == ZeroChunk || int(chunkType) >= len(fi.offsets) { + break + } + fi.offsets[chunkType] = int64(chunkOffset) + } + + if fi.offsets[OIDFanoutChunk] <= 0 || fi.offsets[OIDLookupChunk] <= 0 || fi.offsets[CommitDataChunk] <= 0 { + return ErrMalformedCommitGraphFile + } + + return nil +} + +func (fi *fileIndex) readFanout() error { + // The Fanout table is a 256 entry table of the number (as uint32) of OIDs with first byte at most i. + // Thus F[255] stores the total number of commits (N) + fanoutReader := io.NewSectionReader(fi.reader, fi.offsets[OIDFanoutChunk], lenFanout*szUint32) + for i := 0; i < 256; i++ { + fanoutValue, err := binary.ReadUint32(fanoutReader) + if err != nil { + return err + } + if fanoutValue > 0x7fffffff { + return ErrMalformedCommitGraphFile + } + fi.fanout[i] = fanoutValue + } + return nil +} + +// GetIndexByHash looks up the provided hash in the commit-graph fanout and returns the index of the commit data for the given hash. +func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (uint32, error) { + var oid plumbing.Hash + + // Find the hash in the oid lookup table + var low uint32 + if h[0] == 0 { + low = 0 + } else { + low = fi.fanout[h[0]-1] + } + high := fi.fanout[h[0]] + for low < high { + mid := (low + high) >> 1 + offset := fi.offsets[OIDLookupChunk] + int64(mid)*hash.Size + if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { + return 0, err + } + cmp := bytes.Compare(h[:], oid[:]) + if cmp < 0 { + high = mid + } else if cmp == 0 { + return mid + fi.minimumNumberOfHashes, nil + } else { + low = mid + 1 + } + } + + if fi.parent != nil { + idx, err := fi.parent.GetIndexByHash(h) + if err != nil { + return 0, err + } + return idx, nil + } + + return 0, plumbing.ErrObjectNotFound +} + +// GetCommitDataByIndex returns the commit data for the given index in the commit-graph. +func (fi *fileIndex) GetCommitDataByIndex(idx uint32) (*CommitData, error) { + if idx < fi.minimumNumberOfHashes { + if fi.parent != nil { + data, err := fi.parent.GetCommitDataByIndex(idx) + if err != nil { + return nil, err + } + return data, nil + } + + return nil, plumbing.ErrObjectNotFound + } + idx -= fi.minimumNumberOfHashes + if idx >= fi.fanout[0xff] { + return nil, plumbing.ErrObjectNotFound + } + + offset := fi.offsets[CommitDataChunk] + int64(idx)*(hash.Size+szCommitData) + commitDataReader := io.NewSectionReader(fi.reader, offset, hash.Size+szCommitData) + + treeHash, err := binary.ReadHash(commitDataReader) + if err != nil { + return nil, err + } + parent1, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + parent2, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + genAndTime, err := binary.ReadUint64(commitDataReader) + if err != nil { + return nil, err + } + + var parentIndexes []uint32 + if parent2&parentOctopusUsed == parentOctopusUsed { + // Octopus merge - Look-up the extra parents from the extra edge list + // The extra edge list is a list of uint32s, each of which is an index into the Commit Data table, terminated by a index with the most significant bit on. + parentIndexes = []uint32{parent1 & parentOctopusMask} + offset := fi.offsets[ExtraEdgeListChunk] + szUint32*int64(parent2&parentOctopusMask) + buf := make([]byte, szUint32) + for { + _, err := fi.reader.ReadAt(buf, offset) + if err != nil { + return nil, err + } + + parent := encbin.BigEndian.Uint32(buf) + offset += szUint32 + parentIndexes = append(parentIndexes, parent&parentOctopusMask) + if parent&parentLast == parentLast { + break + } + } + } else if parent2 != parentNone { + parentIndexes = []uint32{parent1 & parentOctopusMask, parent2 & parentOctopusMask} + } else if parent1 != parentNone { + parentIndexes = []uint32{parent1 & parentOctopusMask} + } + + parentHashes, err := fi.getHashesFromIndexes(parentIndexes) + if err != nil { + return nil, err + } + + generationV2 := uint64(0) + + if fi.hasGenerationV2 { + // set the GenerationV2 result to the commit time + generationV2 = uint64(genAndTime & 0x3FFFFFFFF) + + // Next read the generation (offset) data from the generation data chunk + offset := fi.offsets[GenerationDataChunk] + int64(idx)*szUint32 + buf := make([]byte, szUint32) + if _, err := fi.reader.ReadAt(buf, offset); err != nil { + return nil, err + } + genV2Data := encbin.BigEndian.Uint32(buf) + + // check if the data is an overflow that needs to be looked up in the overflow chunk + if genV2Data&0x80000000 > 0 { + // Overflow + offset := fi.offsets[GenerationDataOverflowChunk] + int64(genV2Data&0x7fffffff)*szUint64 + buf := make([]byte, 8) + if _, err := fi.reader.ReadAt(buf, offset); err != nil { + return nil, err + } + + generationV2 += encbin.BigEndian.Uint64(buf) + } else { + generationV2 += uint64(genV2Data) + } + } + + return &CommitData{ + TreeHash: treeHash, + ParentIndexes: parentIndexes, + ParentHashes: parentHashes, + Generation: genAndTime >> 34, + GenerationV2: generationV2, + When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0), + }, nil +} + +// GetHashByIndex looks up the hash for the given index in the commit-graph. +func (fi *fileIndex) GetHashByIndex(idx uint32) (found plumbing.Hash, err error) { + if idx < fi.minimumNumberOfHashes { + if fi.parent != nil { + return fi.parent.GetHashByIndex(idx) + } + return found, ErrMalformedCommitGraphFile + } + idx -= fi.minimumNumberOfHashes + if idx >= fi.fanout[0xff] { + return found, ErrMalformedCommitGraphFile + } + + offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size + if _, err := fi.reader.ReadAt(found[:], offset); err != nil { + return found, err + } + + return found, nil +} + +func (fi *fileIndex) getHashesFromIndexes(indexes []uint32) ([]plumbing.Hash, error) { + hashes := make([]plumbing.Hash, len(indexes)) + + for i, idx := range indexes { + if idx < fi.minimumNumberOfHashes { + if fi.parent != nil { + hash, err := fi.parent.GetHashByIndex(idx) + if err != nil { + return nil, err + } + hashes[i] = hash + continue + } + + return nil, ErrMalformedCommitGraphFile + } + + idx -= fi.minimumNumberOfHashes + if idx >= fi.fanout[0xff] { + return nil, ErrMalformedCommitGraphFile + } + + offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size + if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil { + return nil, err + } + } + + return hashes, nil +} + +// Hashes returns all the hashes that are available in the index. +func (fi *fileIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, fi.fanout[0xff]+fi.minimumNumberOfHashes) + for i := uint32(0); i < fi.minimumNumberOfHashes; i++ { + hash, err := fi.parent.GetHashByIndex(i) + if err != nil { + return nil + } + hashes[i] = hash + } + + for i := uint32(0); i < fi.fanout[0xff]; i++ { + offset := fi.offsets[OIDLookupChunk] + int64(i)*hash.Size + if n, err := fi.reader.ReadAt(hashes[i+fi.minimumNumberOfHashes][:], offset); err != nil || n < hash.Size { + return nil + } + } + return hashes +} + +func (fi *fileIndex) HasGenerationV2() bool { + return fi.hasGenerationV2 +} + +func (fi *fileIndex) MaximumNumberOfHashes() uint32 { + return fi.minimumNumberOfHashes + fi.fanout[0xff] +} diff --git a/plumbing/format/commitgraph/v2/memory.go b/plumbing/format/commitgraph/v2/memory.go new file mode 100644 index 0000000..8de0c5f --- /dev/null +++ b/plumbing/format/commitgraph/v2/memory.go @@ -0,0 +1,107 @@ +package v2 + +import ( + "math" + + "github.com/go-git/go-git/v5/plumbing" +) + +// MemoryIndex provides a way to build the commit-graph in memory +// for later encoding to file. +type MemoryIndex struct { + commitData []commitData + indexMap map[plumbing.Hash]uint32 + hasGenerationV2 bool +} + +type commitData struct { + Hash plumbing.Hash + *CommitData +} + +// NewMemoryIndex creates in-memory commit graph representation +func NewMemoryIndex() *MemoryIndex { + return &MemoryIndex{ + indexMap: make(map[plumbing.Hash]uint32), + hasGenerationV2: true, + } +} + +// GetIndexByHash gets the index in the commit graph from commit hash, if available +func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (uint32, error) { + i, ok := mi.indexMap[h] + if ok { + return i, nil + } + + return 0, plumbing.ErrObjectNotFound +} + +// GetHashByIndex gets the hash given an index in the commit graph +func (mi *MemoryIndex) GetHashByIndex(i uint32) (plumbing.Hash, error) { + if i >= uint32(len(mi.commitData)) { + return plumbing.ZeroHash, plumbing.ErrObjectNotFound + } + + return mi.commitData[i].Hash, nil +} + +// GetCommitDataByIndex gets the commit node from the commit graph using index +// obtained from child node, if available +func (mi *MemoryIndex) GetCommitDataByIndex(i uint32) (*CommitData, error) { + if i >= uint32(len(mi.commitData)) { + return nil, plumbing.ErrObjectNotFound + } + + commitData := mi.commitData[i] + + // Map parent hashes to parent indexes + if commitData.ParentIndexes == nil { + parentIndexes := make([]uint32, len(commitData.ParentHashes)) + for i, parentHash := range commitData.ParentHashes { + var err error + if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil { + return nil, err + } + } + commitData.ParentIndexes = parentIndexes + } + + return commitData.CommitData, nil +} + +// Hashes returns all the hashes that are available in the index +func (mi *MemoryIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, 0, len(mi.indexMap)) + for k := range mi.indexMap { + hashes = append(hashes, k) + } + return hashes +} + +// Add adds new node to the memory index +func (mi *MemoryIndex) Add(hash plumbing.Hash, data *CommitData) { + // The parent indexes are calculated lazily in GetNodeByIndex + // which allows adding nodes out of order as long as all parents + // are eventually resolved + data.ParentIndexes = nil + mi.indexMap[hash] = uint32(len(mi.commitData)) + mi.commitData = append(mi.commitData, commitData{Hash: hash, CommitData: data}) + if data.GenerationV2 == math.MaxUint64 { // if GenerationV2 is not available reset it to zero + data.GenerationV2 = 0 + } + mi.hasGenerationV2 = mi.hasGenerationV2 && data.GenerationV2 != 0 +} + +func (mi *MemoryIndex) HasGenerationV2() bool { + return mi.hasGenerationV2 +} + +// Close closes the index +func (mi *MemoryIndex) Close() error { + return nil +} + +func (mi *MemoryIndex) MaximumNumberOfHashes() uint32 { + return uint32(len(mi.indexMap)) +} diff --git a/plumbing/format/config/decoder_test.go b/plumbing/format/config/decoder_test.go index 0a8e92c..6283f5e 100644 --- a/plumbing/format/config/decoder_test.go +++ b/plumbing/format/config/decoder_test.go @@ -2,6 +2,7 @@ package config import ( "bytes" + "testing" . "gopkg.in/check.v1" ) @@ -91,3 +92,13 @@ func decodeFails(c *C, text string) { err := d.Decode(cfg) c.Assert(err, NotNil) } + +func FuzzDecoder(f *testing.F) { + + f.Fuzz(func(t *testing.T, input []byte) { + + d := NewDecoder(bytes.NewReader(input)) + cfg := &Config{} + d.Decode(cfg) + }) +} diff --git a/plumbing/format/packfile/delta_test.go b/plumbing/format/packfile/delta_test.go index e8f5ea6..9417e55 100644 --- a/plumbing/format/packfile/delta_test.go +++ b/plumbing/format/packfile/delta_test.go @@ -4,6 +4,7 @@ import ( "bytes" "io" "math/rand" + "testing" "github.com/go-git/go-git/v5/plumbing" . "gopkg.in/check.v1" @@ -176,3 +177,14 @@ func (s *DeltaSuite) TestMaxCopySizeDeltaReader(c *C) { c.Assert(err, IsNil) c.Assert(result, DeepEquals, targetBuf) } + +func FuzzPatchDelta(f *testing.F) { + + f.Fuzz(func(t *testing.T, input []byte) { + + input_0 := input[:len(input)/2] + input_1 := input[len(input)/2:] + + PatchDelta(input_0, input_1) + }) +} diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go index 2c7a335..8898e58 100644 --- a/plumbing/format/packfile/diff_delta.go +++ b/plumbing/format/packfile/diff_delta.go @@ -17,8 +17,11 @@ const ( s = 16 // https://github.com/git/git/blob/f7466e94375b3be27f229c78873f0acf8301c0a5/diff-delta.c#L428 - // Max size of a copy operation (64KB) + // Max size of a copy operation (64KB). maxCopySize = 64 * 1024 + + // Min size of a copy operation. + minCopySize = 4 ) // GetDelta returns an EncodedObject of type OFSDeltaObject. Base and Target object, diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index edbc0e7..62f1d13 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -3,6 +3,7 @@ package packfile import ( "bytes" "errors" + "fmt" "io" "github.com/go-git/go-git/v5/plumbing" @@ -174,13 +175,25 @@ func (p *Parser) init() error { return nil } +type objectHeaderWriter func(typ plumbing.ObjectType, sz int64) error + +type lazyObjectWriter interface { + // LazyWriter enables an object to be lazily written. + // It returns: + // - w: a writer to receive the object's content. + // - lwh: a func to write the object header. + // - err: any error from the initial writer creation process. + // + // Note that if the object header is not written BEFORE the writer + // is used, this will result in an invalid object. + LazyWriter() (w io.WriteCloser, lwh objectHeaderWriter, err error) +} + func (p *Parser) indexObjects() error { buf := sync.GetBytesBuffer() defer sync.PutBytesBuffer(buf) for i := uint32(0); i < p.count; i++ { - buf.Reset() - oh, err := p.scanner.NextObjectHeader() if err != nil { return err @@ -220,21 +233,60 @@ func (p *Parser) indexObjects() error { ota = newBaseObject(oh.Offset, oh.Length, t) } - buf.Grow(int(oh.Length)) - _, crc, err := p.scanner.NextObject(buf) + hasher := plumbing.NewHasher(oh.Type, oh.Length) + writers := []io.Writer{hasher} + var obj *plumbing.MemoryObject + + // Lazy writing is only available for non-delta objects. + if p.storage != nil && !delta { + // When a storage is set and supports lazy writing, + // use that instead of creating a memory object. + if low, ok := p.storage.(lazyObjectWriter); ok { + ow, lwh, err := low.LazyWriter() + if err != nil { + return err + } + + if err = lwh(oh.Type, oh.Length); err != nil { + return err + } + + defer ow.Close() + writers = append(writers, ow) + } else { + obj = new(plumbing.MemoryObject) + obj.SetSize(oh.Length) + obj.SetType(oh.Type) + + writers = append(writers, obj) + } + } + if delta && !p.scanner.IsSeekable { + buf.Reset() + buf.Grow(int(oh.Length)) + writers = append(writers, buf) + } + + mw := io.MultiWriter(writers...) + + _, crc, err := p.scanner.NextObject(mw) if err != nil { return err } + // Non delta objects needs to be added into the storage. This + // is only required when lazy writing is not supported. + if obj != nil { + if _, err := p.storage.SetEncodedObject(obj); err != nil { + return err + } + } + ota.Crc32 = crc ota.Length = oh.Length - data := buf.Bytes() if !delta { - sha1, err := getSHA1(ota.Type, data) - if err != nil { - return err - } + sha1 := hasher.Sum() // Move children of placeholder parent into actual parent, in case this // was a non-external delta reference. @@ -249,20 +301,8 @@ func (p *Parser) indexObjects() error { p.oiByHash[ota.SHA1] = ota } - if p.storage != nil && !delta { - obj := new(plumbing.MemoryObject) - obj.SetSize(oh.Length) - obj.SetType(oh.Type) - if _, err := obj.Write(data); err != nil { - return err - } - - if _, err := p.storage.SetEncodedObject(obj); err != nil { - return err - } - } - if delta && !p.scanner.IsSeekable { + data := buf.Bytes() p.deltas[oh.Offset] = make([]byte, len(data)) copy(p.deltas[oh.Offset], data) } @@ -280,23 +320,29 @@ func (p *Parser) resolveDeltas() error { for _, obj := range p.oi { buf.Reset() + buf.Grow(int(obj.Length)) err := p.get(obj, buf) if err != nil { return err } - content := buf.Bytes() if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil { return err } - if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil { + if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, nil); err != nil { return err } if !obj.IsDelta() && len(obj.Children) > 0 { + // Dealing with an io.ReaderAt object, means we can + // create it once and reuse across all children. + r := bytes.NewReader(buf.Bytes()) for _, child := range obj.Children { - if err := p.resolveObject(io.Discard, child, content); err != nil { + // Even though we are discarding the output, we still need to read it to + // so that the scanner can advance to the next object, and the SHA1 can be + // calculated. + if err := p.resolveObject(io.Discard, child, r); err != nil { return err } p.resolveExternalRef(child) @@ -361,13 +407,13 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) { if o.DiskType.IsDelta() { b := sync.GetBytesBuffer() defer sync.PutBytesBuffer(b) + buf.Grow(int(o.Length)) err := p.get(o.Parent, b) if err != nil { return err } - base := b.Bytes() - err = p.resolveObject(buf, o, base) + err = p.resolveObject(buf, o, bytes.NewReader(b.Bytes())) if err != nil { return err } @@ -378,6 +424,13 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) { } } + // If the scanner is seekable, caching this data into + // memory by offset seems wasteful. + // There is a trade-off to be considered here in terms + // of execution time vs memory consumption. + // + // TODO: improve seekable execution time, so that we can + // skip this cache. if len(o.Children) > 0 { data := make([]byte, buf.Len()) copy(data, buf.Bytes()) @@ -386,10 +439,25 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) { return nil } +// resolveObject resolves an object from base, using information +// provided by o. +// +// This call has the side-effect of changing field values +// from the object info o: +// - Type: OFSDeltaObject may become the target type (e.g. Blob). +// - Size: The size may be update with the target size. +// - Hash: Zero hashes will be calculated as part of the object +// resolution. Hence why this process can't be avoided even when w +// is an io.Discard. +// +// base must be an io.ReaderAt, which is a requirement from +// patchDeltaStream. The main reason being that reversing an +// delta object may lead to going backs and forths within base, +// which is not supported by io.Reader. func (p *Parser) resolveObject( w io.Writer, o *objectInfo, - base []byte, + base io.ReaderAt, ) error { if !o.DiskType.IsDelta() { return nil @@ -400,26 +468,46 @@ func (p *Parser) resolveObject( if err != nil { return err } - data := buf.Bytes() - data, err = applyPatchBase(o, data, base) + writers := []io.Writer{w} + var obj *plumbing.MemoryObject + var lwh objectHeaderWriter + + if p.storage != nil { + if low, ok := p.storage.(lazyObjectWriter); ok { + ow, wh, err := low.LazyWriter() + if err != nil { + return err + } + lwh = wh + + defer ow.Close() + writers = append(writers, ow) + } else { + obj = new(plumbing.MemoryObject) + ow, err := obj.Writer() + if err != nil { + return err + } + + writers = append(writers, ow) + } + } + + mw := io.MultiWriter(writers...) + + err = applyPatchBase(o, base, buf, mw, lwh) if err != nil { return err } - if p.storage != nil { - obj := new(plumbing.MemoryObject) - obj.SetSize(o.Size()) + if obj != nil { obj.SetType(o.Type) - if _, err := obj.Write(data); err != nil { - return err - } - + obj.SetSize(o.Size()) // Size here is correct as it was populated by applyPatchBase. if _, err := p.storage.SetEncodedObject(obj); err != nil { return err } } - _, err = w.Write(data) return err } @@ -443,24 +531,31 @@ func (p *Parser) readData(w io.Writer, o *objectInfo) error { return nil } -func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { - patched, err := PatchDelta(base, data) - if err != nil { - return nil, err +// applyPatchBase applies the patch to target. +// +// Note that ota will be updated based on the description in resolveObject. +func applyPatchBase(ota *objectInfo, base io.ReaderAt, delta io.Reader, target io.Writer, wh objectHeaderWriter) error { + if target == nil { + return fmt.Errorf("cannot apply patch against nil target") } + typ := ota.Type if ota.SHA1 == plumbing.ZeroHash { - ota.Type = ota.Parent.Type - sha1, err := getSHA1(ota.Type, patched) - if err != nil { - return nil, err - } + typ = ota.Parent.Type + } + + sz, h, err := patchDeltaWriter(target, base, delta, typ, wh) + if err != nil { + return err + } - ota.SHA1 = sha1 - ota.Length = int64(len(patched)) + if ota.SHA1 == plumbing.ZeroHash { + ota.Type = typ + ota.Length = int64(sz) + ota.SHA1 = h } - return patched, nil + return nil } func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) { diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go index f00562d..960769c 100644 --- a/plumbing/format/packfile/patch_delta.go +++ b/plumbing/format/packfile/patch_delta.go @@ -4,6 +4,7 @@ import ( "bufio" "bytes" "errors" + "fmt" "io" "math" @@ -17,7 +18,33 @@ import ( // and https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js // for details about the delta format. -const deltaSizeMin = 4 +var ( + ErrInvalidDelta = errors.New("invalid delta") + ErrDeltaCmd = errors.New("wrong delta command") +) + +const ( + payload = 0x7f // 0111 1111 + continuation = 0x80 // 1000 0000 +) + +type offset struct { + mask byte + shift uint +} + +var offsets = []offset{ + {mask: 0x01, shift: 0}, + {mask: 0x02, shift: 8}, + {mask: 0x04, shift: 16}, + {mask: 0x08, shift: 24}, +} + +var sizes = []offset{ + {mask: 0x10, shift: 0}, + {mask: 0x20, shift: 8}, + {mask: 0x40, shift: 16}, +} // ApplyDelta writes to target the result of applying the modification deltas in delta to base. func ApplyDelta(target, base plumbing.EncodedObject, delta []byte) (err error) { @@ -58,11 +85,6 @@ func ApplyDelta(target, base plumbing.EncodedObject, delta []byte) (err error) { return err } -var ( - ErrInvalidDelta = errors.New("invalid delta") - ErrDeltaCmd = errors.New("wrong delta command") -) - // PatchDelta returns the result of applying the modification deltas in delta to src. // An error will be returned if delta is corrupted (ErrDeltaLen) or an action command // is not copy from source or copy from delta (ErrDeltaCmd). @@ -120,7 +142,8 @@ func ReaderFromDelta(base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadClo return } - if isCopyFromSrc(cmd) { + switch { + case isCopyFromSrc(cmd): offset, err := decodeOffsetByteReader(cmd, deltaBuf) if err != nil { _ = dstWr.CloseWithError(err) @@ -173,7 +196,8 @@ func ReaderFromDelta(base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadClo } remainingTargetSz -= sz basePos += sz - } else if isCopyFromDelta(cmd) { + + case isCopyFromDelta(cmd): sz := uint(cmd) // cmd is the size itself if invalidSize(sz, targetSz) { _ = dstWr.CloseWithError(ErrInvalidDelta) @@ -185,10 +209,12 @@ func ReaderFromDelta(base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadClo } remainingTargetSz -= sz - } else { + + default: _ = dstWr.CloseWithError(ErrDeltaCmd) return } + if remainingTargetSz <= 0 { _ = dstWr.Close() return @@ -200,7 +226,7 @@ func ReaderFromDelta(base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadClo } func patchDelta(dst *bytes.Buffer, src, delta []byte) error { - if len(delta) < deltaSizeMin { + if len(delta) < minCopySize { return ErrInvalidDelta } @@ -221,7 +247,9 @@ func patchDelta(dst *bytes.Buffer, src, delta []byte) error { cmd = delta[0] delta = delta[1:] - if isCopyFromSrc(cmd) { + + switch { + case isCopyFromSrc(cmd): var offset, sz uint var err error offset, delta, err = decodeOffset(cmd, delta) @@ -240,7 +268,8 @@ func patchDelta(dst *bytes.Buffer, src, delta []byte) error { } dst.Write(src[offset : offset+sz]) remainingTargetSz -= sz - } else if isCopyFromDelta(cmd) { + + case isCopyFromDelta(cmd): sz := uint(cmd) // cmd is the size itself if invalidSize(sz, targetSz) { return ErrInvalidDelta @@ -253,7 +282,8 @@ func patchDelta(dst *bytes.Buffer, src, delta []byte) error { dst.Write(delta[0:sz]) remainingTargetSz -= sz delta = delta[sz:] - } else { + + default: return ErrDeltaCmd } @@ -265,6 +295,107 @@ func patchDelta(dst *bytes.Buffer, src, delta []byte) error { return nil } +func patchDeltaWriter(dst io.Writer, base io.ReaderAt, delta io.Reader, + typ plumbing.ObjectType, writeHeader objectHeaderWriter) (uint, plumbing.Hash, error) { + deltaBuf := bufio.NewReaderSize(delta, 1024) + srcSz, err := decodeLEB128ByteReader(deltaBuf) + if err != nil { + if err == io.EOF { + return 0, plumbing.ZeroHash, ErrInvalidDelta + } + return 0, plumbing.ZeroHash, err + } + + if r, ok := base.(*bytes.Reader); ok && srcSz != uint(r.Size()) { + return 0, plumbing.ZeroHash, ErrInvalidDelta + } + + targetSz, err := decodeLEB128ByteReader(deltaBuf) + if err != nil { + if err == io.EOF { + return 0, plumbing.ZeroHash, ErrInvalidDelta + } + return 0, plumbing.ZeroHash, err + } + + // If header still needs to be written, caller will provide + // a LazyObjectWriterHeader. This seems to be the case when + // dealing with thin-packs. + if writeHeader != nil { + err = writeHeader(typ, int64(targetSz)) + if err != nil { + return 0, plumbing.ZeroHash, fmt.Errorf("could not lazy write header: %w", err) + } + } + + remainingTargetSz := targetSz + + hasher := plumbing.NewHasher(typ, int64(targetSz)) + mw := io.MultiWriter(dst, hasher) + + bufp := sync.GetByteSlice() + defer sync.PutByteSlice(bufp) + + sr := io.NewSectionReader(base, int64(0), int64(srcSz)) + // Keep both the io.LimitedReader types, so we can reset N. + baselr := io.LimitReader(sr, 0).(*io.LimitedReader) + deltalr := io.LimitReader(deltaBuf, 0).(*io.LimitedReader) + + for { + buf := *bufp + cmd, err := deltaBuf.ReadByte() + if err == io.EOF { + return 0, plumbing.ZeroHash, ErrInvalidDelta + } + if err != nil { + return 0, plumbing.ZeroHash, err + } + + if isCopyFromSrc(cmd) { + offset, err := decodeOffsetByteReader(cmd, deltaBuf) + if err != nil { + return 0, plumbing.ZeroHash, err + } + sz, err := decodeSizeByteReader(cmd, deltaBuf) + if err != nil { + return 0, plumbing.ZeroHash, err + } + + if invalidSize(sz, targetSz) || + invalidOffsetSize(offset, sz, srcSz) { + return 0, plumbing.ZeroHash, err + } + + if _, err := sr.Seek(int64(offset), io.SeekStart); err != nil { + return 0, plumbing.ZeroHash, err + } + baselr.N = int64(sz) + if _, err := io.CopyBuffer(mw, baselr, buf); err != nil { + return 0, plumbing.ZeroHash, err + } + remainingTargetSz -= sz + } else if isCopyFromDelta(cmd) { + sz := uint(cmd) // cmd is the size itself + if invalidSize(sz, targetSz) { + return 0, plumbing.ZeroHash, ErrInvalidDelta + } + deltalr.N = int64(sz) + if _, err := io.CopyBuffer(mw, deltalr, buf); err != nil { + return 0, plumbing.ZeroHash, err + } + + remainingTargetSz -= sz + } else { + return 0, plumbing.ZeroHash, err + } + if remainingTargetSz <= 0 { + break + } + } + + return targetSz, hasher.Sum(), nil +} + // Decodes a number encoded as an unsigned LEB128 at the start of some // binary data and returns the decoded number and the rest of the // stream. @@ -306,48 +437,24 @@ func decodeLEB128ByteReader(input io.ByteReader) (uint, error) { return num, nil } -const ( - payload = 0x7f // 0111 1111 - continuation = 0x80 // 1000 0000 -) - func isCopyFromSrc(cmd byte) bool { - return (cmd & 0x80) != 0 + return (cmd & continuation) != 0 } func isCopyFromDelta(cmd byte) bool { - return (cmd&0x80) == 0 && cmd != 0 + return (cmd&continuation) == 0 && cmd != 0 } func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) { var offset uint - if (cmd & 0x01) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - offset = uint(next) - } - if (cmd & 0x02) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - offset |= uint(next) << 8 - } - if (cmd & 0x04) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - offset |= uint(next) << 16 - } - if (cmd & 0x08) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err + for _, o := range offsets { + if (cmd & o.mask) != 0 { + next, err := delta.ReadByte() + if err != nil { + return 0, err + } + offset |= uint(next) << o.shift } - offset |= uint(next) << 24 } return offset, nil @@ -355,33 +462,14 @@ func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) { func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) { var offset uint - if (cmd & 0x01) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta - } - offset = uint(delta[0]) - delta = delta[1:] - } - if (cmd & 0x02) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta - } - offset |= uint(delta[0]) << 8 - delta = delta[1:] - } - if (cmd & 0x04) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta - } - offset |= uint(delta[0]) << 16 - delta = delta[1:] - } - if (cmd & 0x08) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta + for _, o := range offsets { + if (cmd & o.mask) != 0 { + if len(delta) == 0 { + return 0, nil, ErrInvalidDelta + } + offset |= uint(delta[0]) << o.shift + delta = delta[1:] } - offset |= uint(delta[0]) << 24 - delta = delta[1:] } return offset, delta, nil @@ -389,29 +477,18 @@ func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) { func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) { var sz uint - if (cmd & 0x10) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - sz = uint(next) - } - if (cmd & 0x20) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - sz |= uint(next) << 8 - } - if (cmd & 0x40) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err + for _, s := range sizes { + if (cmd & s.mask) != 0 { + next, err := delta.ReadByte() + if err != nil { + return 0, err + } + sz |= uint(next) << s.shift } - sz |= uint(next) << 16 } + if sz == 0 { - sz = 0x10000 + sz = maxCopySize } return sz, nil @@ -419,29 +496,17 @@ func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) { func decodeSize(cmd byte, delta []byte) (uint, []byte, error) { var sz uint - if (cmd & 0x10) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta - } - sz = uint(delta[0]) - delta = delta[1:] - } - if (cmd & 0x20) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta - } - sz |= uint(delta[0]) << 8 - delta = delta[1:] - } - if (cmd & 0x40) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta + for _, s := range sizes { + if (cmd & s.mask) != 0 { + if len(delta) == 0 { + return 0, nil, ErrInvalidDelta + } + sz |= uint(delta[0]) << s.shift + delta = delta[1:] } - sz |= uint(delta[0]) << 16 - delta = delta[1:] } if sz == 0 { - sz = 0x10000 + sz = maxCopySize } return sz, delta, nil diff --git a/plumbing/format/pktline/encoder.go b/plumbing/format/pktline/encoder.go index 6d40979..b6144fa 100644 --- a/plumbing/format/pktline/encoder.go +++ b/plumbing/format/pktline/encoder.go @@ -7,6 +7,8 @@ import ( "errors" "fmt" "io" + + "github.com/go-git/go-git/v5/utils/trace" ) // An Encoder writes pkt-lines to an output stream. @@ -43,6 +45,7 @@ func NewEncoder(w io.Writer) *Encoder { // Flush encodes a flush-pkt to the output stream. func (e *Encoder) Flush() error { + defer trace.Packet.Print("packet: > 0000") _, err := e.w.Write(FlushPkt) return err } @@ -70,6 +73,7 @@ func (e *Encoder) encodeLine(p []byte) error { } n := len(p) + 4 + defer trace.Packet.Printf("packet: > %04x %s", n, p) if _, err := e.w.Write(asciiHex16(n)); err != nil { return err } diff --git a/plumbing/format/pktline/scanner.go b/plumbing/format/pktline/scanner.go index 99aab46..5e85ed0 100644 --- a/plumbing/format/pktline/scanner.go +++ b/plumbing/format/pktline/scanner.go @@ -3,6 +3,8 @@ package pktline import ( "errors" "io" + + "github.com/go-git/go-git/v5/utils/trace" ) const ( @@ -65,6 +67,7 @@ func (s *Scanner) Scan() bool { return false } s.payload = s.payload[:l] + trace.Packet.Printf("packet: < %04x %s", l, s.payload) return true } |