diff options
Diffstat (limited to 'plumbing')
72 files changed, 4371 insertions, 943 deletions
diff --git a/plumbing/filemode/filemode.go b/plumbing/filemode/filemode.go index b848a97..ea1a457 100644 --- a/plumbing/filemode/filemode.go +++ b/plumbing/filemode/filemode.go @@ -133,7 +133,7 @@ func (m FileMode) IsMalformed() bool { m != Submodule } -// String returns the FileMode as a string in the standatd git format, +// String returns the FileMode as a string in the standard git format, // this is, an octal number padded with ceros to 7 digits. Malformed // modes are printed in that same format, for easier debugging. // diff --git a/plumbing/format/commitgraph/commitgraph.go b/plumbing/format/commitgraph/commitgraph.go index 3d59323..e772d26 100644 --- a/plumbing/format/commitgraph/commitgraph.go +++ b/plumbing/format/commitgraph/commitgraph.go @@ -8,6 +8,9 @@ import ( // CommitData is a reduced representation of Commit as presented in the commit graph
// file. It is merely useful as an optimization for walking the commit graphs.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type CommitData struct {
// TreeHash is the hash of the root tree of the commit.
TreeHash plumbing.Hash
@@ -24,6 +27,9 @@ type CommitData struct { // Index represents a representation of commit graph that allows indexed
// access to the nodes using commit object hash
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type Index interface {
// GetIndexByHash gets the index in the commit graph from commit hash, if available
GetIndexByHash(h plumbing.Hash) (int, error)
diff --git a/plumbing/format/commitgraph/doc.go b/plumbing/format/commitgraph/doc.go index 41cd8b1..c320e18 100644 --- a/plumbing/format/commitgraph/doc.go +++ b/plumbing/format/commitgraph/doc.go @@ -1,23 +1,26 @@ // Package commitgraph implements encoding and decoding of commit-graph files. // +// Deprecated: This package uses the wrong types for Generation and Index in CommitData. +// Use the v2 package instead. +// // Git commit graph format // ======================= // // The Git commit graph stores a list of commit OIDs and some associated // metadata, including: // -// - The generation number of the commit. Commits with no parents have -// generation number 1; commits with parents have generation number -// one more than the maximum generation number of its parents. We -// reserve zero as special, and can be used to mark a generation -// number invalid or as "not computed". +// - The generation number of the commit. Commits with no parents have +// generation number 1; commits with parents have generation number +// one more than the maximum generation number of its parents. We +// reserve zero as special, and can be used to mark a generation +// number invalid or as "not computed". // // - The root tree OID. // // - The commit date. // -// - The parents of the commit, stored using positional references within -// the graph file. +// - The parents of the commit, stored using positional references within +// the graph file. // // These positional references are stored as unsigned 32-bit integers // corresponding to the array position within the list of commit OIDs. Due @@ -35,68 +38,68 @@ // // HEADER: // -// 4-byte signature: -// The signature is: {'C', 'G', 'P', 'H'} +// 4-byte signature: +// The signature is: {'C', 'G', 'P', 'H'} // -// 1-byte version number: -// Currently, the only valid version is 1. +// 1-byte version number: +// Currently, the only valid version is 1. // -// 1-byte Hash Version (1 = SHA-1) -// We infer the hash length (H) from this value. +// 1-byte Hash Version (1 = SHA-1) +// We infer the hash length (H) from this value. // -// 1-byte number (C) of "chunks" +// 1-byte number (C) of "chunks" // -// 1-byte (reserved for later use) -// Current clients should ignore this value. +// 1-byte (reserved for later use) +// Current clients should ignore this value. // // CHUNK LOOKUP: // -// (C + 1) * 12 bytes listing the table of contents for the chunks: -// First 4 bytes describe the chunk id. Value 0 is a terminating label. -// Other 8 bytes provide the byte-offset in current file for chunk to -// start. (Chunks are ordered contiguously in the file, so you can infer -// the length using the next chunk position if necessary.) Each chunk -// ID appears at most once. +// (C + 1) * 12 bytes listing the table of contents for the chunks: +// First 4 bytes describe the chunk id. Value 0 is a terminating label. +// Other 8 bytes provide the byte-offset in current file for chunk to +// start. (Chunks are ordered contiguously in the file, so you can infer +// the length using the next chunk position if necessary.) Each chunk +// ID appears at most once. // -// The remaining data in the body is described one chunk at a time, and -// these chunks may be given in any order. Chunks are required unless -// otherwise specified. +// The remaining data in the body is described one chunk at a time, and +// these chunks may be given in any order. Chunks are required unless +// otherwise specified. // // CHUNK DATA: // -// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) -// The ith entry, F[i], stores the number of OIDs with first -// byte at most i. Thus F[255] stores the total -// number of commits (N). -// -// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) -// The OIDs for all commits in the graph, sorted in ascending order. -// -// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) -// * The first H bytes are for the OID of the root tree. -// * The next 8 bytes are for the positions of the first two parents -// of the ith commit. Stores value 0x7000000 if no parent in that -// position. If there are more than two parents, the second value -// has its most-significant bit on and the other bits store an array -// position into the Extra Edge List chunk. -// * The next 8 bytes store the generation number of the commit and -// the commit time in seconds since EPOCH. The generation number -// uses the higher 30 bits of the first 4 bytes, while the commit -// time uses the 32 bits of the second 4 bytes, along with the lowest -// 2 bits of the lowest byte, storing the 33rd and 34th bit of the -// commit time. -// -// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] -// This list of 4-byte values store the second through nth parents for -// all octopus merges. The second parent value in the commit data stores -// an array position within this list along with the most-significant bit -// on. Starting at that array position, iterate through this list of commit -// positions for the parents until reaching a value with the most-significant -// bit on. The other bits correspond to the position of the last parent. +// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) +// The ith entry, F[i], stores the number of OIDs with first +// byte at most i. Thus F[255] stores the total +// number of commits (N). +// +// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) +// The OIDs for all commits in the graph, sorted in ascending order. +// +// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) +// * The first H bytes are for the OID of the root tree. +// * The next 8 bytes are for the positions of the first two parents +// of the ith commit. Stores value 0x7000000 if no parent in that +// position. If there are more than two parents, the second value +// has its most-significant bit on and the other bits store an array +// position into the Extra Edge List chunk. +// * The next 8 bytes store the generation number of the commit and +// the commit time in seconds since EPOCH. The generation number +// uses the higher 30 bits of the first 4 bytes, while the commit +// time uses the 32 bits of the second 4 bytes, along with the lowest +// 2 bits of the lowest byte, storing the 33rd and 34th bit of the +// commit time. +// +// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] +// This list of 4-byte values store the second through nth parents for +// all octopus merges. The second parent value in the commit data stores +// an array position within this list along with the most-significant bit +// on. Starting at that array position, iterate through this list of commit +// positions for the parents until reaching a value with the most-significant +// bit on. The other bits correspond to the position of the last parent. // // TRAILER: // -// H-byte HASH-checksum of all of the above. +// H-byte HASH-checksum of all of the above. // // Source: // https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go index f61025b..3176353 100644 --- a/plumbing/format/commitgraph/encoder.go +++ b/plumbing/format/commitgraph/encoder.go @@ -1,6 +1,7 @@ package commitgraph
import (
+ "crypto"
"io"
"github.com/go-git/go-git/v5/plumbing"
@@ -9,12 +10,18 @@ import ( )
// Encoder writes MemoryIndex structs to an output stream.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type Encoder struct {
io.Writer
hash hash.Hash
}
// NewEncoder returns a new stream encoder that writes to w.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func NewEncoder(w io.Writer) *Encoder {
h := hash.New(hash.CryptoType)
mw := io.MultiWriter(w, h)
@@ -22,6 +29,9 @@ func NewEncoder(w io.Writer) *Encoder { }
// Encode writes an index into the commit-graph file
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func (e *Encoder) Encode(idx Index) error {
// Get all the hashes in the input index
hashes := idx.Hashes()
@@ -30,7 +40,7 @@ func (e *Encoder) Encode(idx Index) error { hashToIndex, fanout, extraEdgesCount := e.prepare(idx, hashes)
chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature}
- chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * hash.Size, uint64(len(hashes)) * 36}
+ chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * hash.Size, uint64(len(hashes)) * (hash.Size + commitDataSize)}
if extraEdgesCount > 0 {
chunkSignatures = append(chunkSignatures, extraEdgeListSignature)
chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*4)
@@ -88,7 +98,11 @@ func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[pl func (e *Encoder) encodeFileHeader(chunkCount int) (err error) {
if _, err = e.Write(commitFileSignature); err == nil {
- _, err = e.Write([]byte{1, 1, byte(chunkCount), 0})
+ version := byte(1)
+ if hash.CryptoType == crypto.SHA256 {
+ version = byte(2)
+ }
+ _, err = e.Write([]byte{1, version, byte(chunkCount), 0})
}
return
}
diff --git a/plumbing/format/commitgraph/file.go b/plumbing/format/commitgraph/file.go index 1d25238..ef8fb34 100644 --- a/plumbing/format/commitgraph/file.go +++ b/plumbing/format/commitgraph/file.go @@ -2,15 +2,20 @@ package commitgraph import (
"bytes"
+ "crypto"
encbin "encoding/binary"
"errors"
"io"
"time"
"github.com/go-git/go-git/v5/plumbing"
+ "github.com/go-git/go-git/v5/plumbing/hash"
"github.com/go-git/go-git/v5/utils/binary"
)
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
+
var (
// ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph
// file version is not supported.
@@ -36,6 +41,8 @@ var ( parentLast = uint32(0x80000000)
)
+const commitDataSize = 16
+
type fileIndex struct {
reader io.ReaderAt
fanout [256]int
@@ -47,6 +54,9 @@ type fileIndex struct { // OpenFileIndex opens a serialized commit graph file in the format described at
// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func OpenFileIndex(reader io.ReaderAt) (Index, error) {
fi := &fileIndex{reader: reader}
@@ -65,7 +75,7 @@ func OpenFileIndex(reader io.ReaderAt) (Index, error) { func (fi *fileIndex) verifyFileHeader() error {
// Verify file signature
- var signature = make([]byte, 4)
+ signature := make([]byte, 4)
if _, err := fi.reader.ReadAt(signature, 0); err != nil {
return err
}
@@ -74,22 +84,31 @@ func (fi *fileIndex) verifyFileHeader() error { }
// Read and verify the file header
- var header = make([]byte, 4)
+ header := make([]byte, 4)
if _, err := fi.reader.ReadAt(header, 4); err != nil {
return err
}
if header[0] != 1 {
return ErrUnsupportedVersion
}
- if header[1] != 1 {
- return ErrUnsupportedHash
+ if hash.CryptoType == crypto.SHA1 {
+ if header[1] != 1 {
+ return ErrUnsupportedVersion
+ }
+ } else if hash.CryptoType == crypto.SHA256 {
+ if header[1] != 2 {
+ return ErrUnsupportedVersion
+ }
+ } else {
+ // Unknown hash type
+ return ErrUnsupportedVersion
}
return nil
}
func (fi *fileIndex) readChunkHeaders() error {
- var chunkID = make([]byte, 4)
+ chunkID := make([]byte, 4)
for i := 0; ; i++ {
chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12)
if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil {
@@ -148,7 +167,7 @@ func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (int, error) { high := fi.fanout[h[0]]
for low < high {
mid := (low + high) >> 1
- offset := fi.oidLookupOffset + int64(mid)*20
+ offset := fi.oidLookupOffset + int64(mid)*hash.Size
if _, err := fi.reader.ReadAt(oid[:], offset); err != nil {
return 0, err
}
@@ -170,8 +189,8 @@ func (fi *fileIndex) GetCommitDataByIndex(idx int) (*CommitData, error) { return nil, plumbing.ErrObjectNotFound
}
- offset := fi.commitDataOffset + int64(idx)*36
- commitDataReader := io.NewSectionReader(fi.reader, offset, 36)
+ offset := fi.commitDataOffset + int64(idx)*(hash.Size+commitDataSize)
+ commitDataReader := io.NewSectionReader(fi.reader, offset, hash.Size+commitDataSize)
treeHash, err := binary.ReadHash(commitDataReader)
if err != nil {
@@ -237,7 +256,7 @@ func (fi *fileIndex) getHashesFromIndexes(indexes []int) ([]plumbing.Hash, error return nil, ErrMalformedCommitGraphFile
}
- offset := fi.oidLookupOffset + int64(idx)*20
+ offset := fi.oidLookupOffset + int64(idx)*hash.Size
if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil {
return nil, err
}
@@ -250,8 +269,8 @@ func (fi *fileIndex) getHashesFromIndexes(indexes []int) ([]plumbing.Hash, error func (fi *fileIndex) Hashes() []plumbing.Hash {
hashes := make([]plumbing.Hash, fi.fanout[0xff])
for i := 0; i < fi.fanout[0xff]; i++ {
- offset := fi.oidLookupOffset + int64(i)*20
- if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < 20 {
+ offset := fi.oidLookupOffset + int64(i)*hash.Size
+ if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < hash.Size {
return nil
}
}
diff --git a/plumbing/format/commitgraph/memory.go b/plumbing/format/commitgraph/memory.go index b24ce36..06415e5 100644 --- a/plumbing/format/commitgraph/memory.go +++ b/plumbing/format/commitgraph/memory.go @@ -6,12 +6,18 @@ import ( // MemoryIndex provides a way to build the commit-graph in memory
// for later encoding to file.
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
type MemoryIndex struct {
commitData []*CommitData
indexMap map[plumbing.Hash]int
}
// NewMemoryIndex creates in-memory commit graph representation
+//
+// Deprecated: This package uses the wrong types for Generation and Index in CommitData.
+// Use the v2 package instead.
func NewMemoryIndex() *MemoryIndex {
return &MemoryIndex{
indexMap: make(map[plumbing.Hash]int),
diff --git a/plumbing/format/commitgraph/v2/chain.go b/plumbing/format/commitgraph/v2/chain.go new file mode 100644 index 0000000..8da60d0 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chain.go @@ -0,0 +1,100 @@ +package v2 + +import ( + "bufio" + "io" + "path" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-git/v5/plumbing" +) + +// OpenChainFile reads a commit chain file and returns a slice of the hashes within it +// +// Commit-Graph chains are described at https://git-scm.com/docs/commit-graph +// and are new line separated list of graph file hashes, oldest to newest. +// +// This function simply reads the file and returns the hashes as a slice. +func OpenChainFile(r io.Reader) ([]string, error) { + if r == nil { + return nil, io.ErrUnexpectedEOF + } + bufRd := bufio.NewReader(r) + chain := make([]string, 0, 8) + for { + line, err := bufRd.ReadSlice('\n') + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + + hashStr := string(line[:len(line)-1]) + if !plumbing.IsHash(hashStr) { + return nil, ErrMalformedCommitGraphFile + } + chain = append(chain, hashStr) + } + return chain, nil +} + +// OpenChainOrFileIndex expects a billy.Filesystem representing a .git directory. +// It will first attempt to read a commit-graph index file, before trying to read a +// commit-graph chain file and its index files. If neither are present, an error is returned. +// Otherwise an Index will be returned. +// +// See: https://git-scm.com/docs/commit-graph +func OpenChainOrFileIndex(fs billy.Filesystem) (Index, error) { + file, err := fs.Open(path.Join("objects", "info", "commit-graph")) + if err != nil { + // try to open a chain file + return OpenChainIndex(fs) + } + + index, err := OpenFileIndex(file) + if err != nil { + // Ignore any file closing errors and return the error from OpenFileIndex instead + _ = file.Close() + return nil, err + } + return index, nil +} + +// OpenChainIndex expects a billy.Filesystem representing a .git directory. +// It will read a commit-graph chain file and return a coalesced index. +// If the chain file or a graph in that chain is not present, an error is returned. +// +// See: https://git-scm.com/docs/commit-graph +func OpenChainIndex(fs billy.Filesystem) (Index, error) { + chainFile, err := fs.Open(path.Join("objects", "info", "commit-graphs", "commit-graph-chain")) + if err != nil { + return nil, err + } + + chain, err := OpenChainFile(chainFile) + _ = chainFile.Close() + if err != nil { + return nil, err + } + + var index Index + for _, hash := range chain { + + file, err := fs.Open(path.Join("objects", "info", "commit-graphs", "graph-"+hash+".graph")) + if err != nil { + // Ignore all other file closing errors and return the error from opening the last file in the graph + _ = index.Close() + return nil, err + } + + index, err = OpenFileIndexWithParent(file, index) + if err != nil { + // Ignore file closing errors and return the error from OpenFileIndex instead + _ = index.Close() + return nil, err + } + } + + return index, nil +} diff --git a/plumbing/format/commitgraph/v2/chain_test.go b/plumbing/format/commitgraph/v2/chain_test.go new file mode 100644 index 0000000..32ffd69 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chain_test.go @@ -0,0 +1,100 @@ +package v2_test + +import ( + "bytes" + "crypto" + "strings" + + commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2" + "github.com/go-git/go-git/v5/plumbing/hash" + + . "gopkg.in/check.v1" +) + +func (s *CommitgraphSuite) TestOpenChainFile(c *C) { + sha1Data := []string{ + "c336d16298a017486c4164c40f8acb28afe64e84", + "31eae7b619d166c366bf5df4991f04ba8cebea0a", + "b977a025ca21e3b5ca123d8093bd7917694f6da7", + "d2a38b4a5965d529566566640519d03d2bd10f6c", + "35b585759cbf29f8ec428ef89da20705d59f99ec", + "c2bbf9fe8009b22d0f390f3c8c3f13937067590f", + "fc9f0643b21cfe571046e27e0c4565f3a1ee96c8", + "c088fd6a7e1a38e9d5a9815265cb575bb08d08ff", + "5fddbeb678bd2c36c5e5c891ab8f2b143ced5baf", + "5d7303c49ac984a9fec60523f2d5297682e16646", + } + + sha256Data := []string{ + "b9efda7160f2647e0974ca623f8a8f8e25fb6944f1b8f78f4db1bf07932de8eb", + "7095c59f8bf46e12c21d2d9da344cfe383fae18d26f3ae4d4ab7b71e3d0ddfae", + "25a395cb62f7656294e40a001ee19fefcdf3013d265dfcf4b744cd2549891dec", + "7fbd564813a82227507d9dd70f1fd21fc1f180223cd3f42e0c3090c9a8b6a7d0", + "aa95db1db2df91bd7200a892dd1c03bc2704c4793400d016b3ca08c148b0f7c1", + "2176988184b570565dc33823a02f474ad59f667a0e971c86063a7fea64776a87", + "d0afc0e64171140eb7902110f807a1beaa38a603d4312fd4bd14a5db2784ba62", + "2822136f60bfc58bbd9d624cc19fbef9f0fc0efe2a61729242e1e5f9b77fa3d0", + "6f207b5c43463af96bc38c43b0bf45275fa327e656a8bba8e7fc55c5ab6870d8", + "6cf33782619b6ff0af9c081e46323f423f8b49bf3d043887c0549bef47d60f55", + "60ea0753d2d4e828983528294be3f57e2a3ba37df4f59e3236133c9e2b17afc5", + "6b3c9f4ba5092e0807774097953ec6e9f58e8371d775bd8738a0fa98d728ba3d", + "c97cab8564054e30515dbe67dda4e14638aabf17b3f042d18dc8461cd098b362", + "9f7ece76fd2c9dae08e75176347efffc1446ad74af66004dd34680edb205dfb5", + "23e7a7e481b00571b63c2a7d0432f9733dd85d18a9841a3d7b96743100da5824", + "e684b1253fa8eb6572f35bab2fd3b6efecabf8472ede43497cd9c171973cc341", + "8b9f04080b0c40f7ad2a6bb5e5296cd6c06e730dffce87a0375ae7bd0f85f86e", + "384a745f3b14edc89526a98b96b3247b2b548541c755aadee7664352ed7f12ae", + "b68c8a82cd5b839917e1058570a0408819b81d16dbab81db118cc8dfc3def044", + "fbaf04f1a401335be57e172f4326102c658d857fde6cf2bc987520d11fc99770", + "57acf2aa5ac736337b120c951536c8a2b2cb23a4f0f198e86f3433370fa63105", + "dd7fcba4c13b6ced0b6190cdb5861adcd08446a92d67f7ec0f02f9533e09bbb0", + "744ef481c9b13ebd3b6e43d7e9ba25f7c7a5c8e453e6f0d50f5d71aae1591689", + "2c573142f1edd52b64dcd42a9c3b0ca5c9c615f757d80d25bfb02ff3eb2257e2", + "ea65cc58ef8520cd0335de4318a0d3b3a1ac257b7e9f82e12483fa3bce6cc0cd", + "1dfa626ff1523b82e21a4c29476edcdc9a89842f3c7181f63a28cd4f46cc9923", + "aa1153e71af836121e6f6cc716cf64880c19221d8dc367ff42359de1b8ef30e9", + "a7c6ec6f6569e22d2fa6e8281639d27c59b633ea00ad8ef27a43171cc985fbda", + "627b706d63d2cfd5a388deeaa76655ef09146fe492ee17cb0043578cef9c2800", + "d40eaf091ef8357b734d1047a552436eaf057d99a0c6f2068b097c324099d360", + "87f0ef81641da4fd3438dcaae4819f0c92a0ade54e262b21f9ded4575ff3f234", + "3a00a29e08d29454b5197662f70ccab5699b0ce8c85af7fbf511b8915d97cfd0", + } + + goodShas := sha1Data + badShas := sha256Data + if hash.CryptoType == crypto.SHA256 { + goodShas = sha256Data + badShas = sha1Data + } + chainData := strings.Join(goodShas, "\n") + "\n" + + chainReader := strings.NewReader(chainData) + + chain, err := commitgraph.OpenChainFile(chainReader) + c.Assert(err, IsNil) + c.Assert(goodShas, DeepEquals, chain) + + // Test with bad shas + chainData = strings.Join(badShas, "\n") + "\n" + + chainReader = strings.NewReader(chainData) + + chain, err = commitgraph.OpenChainFile(chainReader) + c.Assert(err, Equals, commitgraph.ErrMalformedCommitGraphFile) + c.Assert(chain, IsNil) + + // Test with empty file + emptyChainReader := bytes.NewReader(nil) + + chain, err = commitgraph.OpenChainFile(emptyChainReader) + c.Assert(err, IsNil) + c.Assert(chain, DeepEquals, []string{}) + + // Test with file containing only newlines + newlineChainData := []byte("\n\n\n") + newlineChainReader := bytes.NewReader(newlineChainData) + + chain, err = commitgraph.OpenChainFile(newlineChainReader) + c.Assert(err, Equals, commitgraph.ErrMalformedCommitGraphFile) + c.Assert(chain, IsNil) +} diff --git a/plumbing/format/commitgraph/v2/chunk.go b/plumbing/format/commitgraph/v2/chunk.go new file mode 100644 index 0000000..11f4d31 --- /dev/null +++ b/plumbing/format/commitgraph/v2/chunk.go @@ -0,0 +1,49 @@ +package v2 + +import "bytes" + +const ( + szChunkSig = 4 // Length of a chunk signature + chunkSigOffset = 4 // Offset of each chunk signature in chunkSignatures +) + +// chunkSignatures contains the coalesced byte signatures for each chunk type. +// The order of the signatures must match the order of the ChunkType constants. +// (When adding new chunk types you must avoid introducing ambiguity, and you may need to add padding separators to this list or reorder these signatures.) +// (i.e. it would not be possible to add a new chunk type with the signature "IDFO" without some reordering or the addition of separators.) +var chunkSignatures = []byte("OIDFOIDLCDATGDA2GDO2EDGEBIDXBDATBASE\000\000\000\000") + +// ChunkType represents the type of a chunk in the commit graph file. +type ChunkType int + +const ( + OIDFanoutChunk ChunkType = iota // "OIDF" + OIDLookupChunk // "OIDL" + CommitDataChunk // "CDAT" + GenerationDataChunk // "GDA2" + GenerationDataOverflowChunk // "GDO2" + ExtraEdgeListChunk // "EDGE" + BloomFilterIndexChunk // "BIDX" + BloomFilterDataChunk // "BDAT" + BaseGraphsListChunk // "BASE" + ZeroChunk // "\000\000\000\000" +) +const lenChunks = int(ZeroChunk) // ZeroChunk is not a valid chunk type, but it is used to determine the length of the chunk type list. + +// Signature returns the byte signature for the chunk type. +func (ct ChunkType) Signature() []byte { + if ct >= BaseGraphsListChunk || ct < 0 { // not a valid chunk type just return ZeroChunk + return chunkSignatures[ZeroChunk*chunkSigOffset : ZeroChunk*chunkSigOffset+szChunkSig] + } + + return chunkSignatures[ct*chunkSigOffset : ct*chunkSigOffset+szChunkSig] +} + +// ChunkTypeFromBytes returns the chunk type for the given byte signature. +func ChunkTypeFromBytes(b []byte) (ChunkType, bool) { + idx := bytes.Index(chunkSignatures, b) + if idx == -1 || idx%chunkSigOffset != 0 { // not found, or not aligned at chunkSigOffset + return -1, false + } + return ChunkType(idx / chunkSigOffset), true +} diff --git a/plumbing/format/commitgraph/v2/commitgraph.go b/plumbing/format/commitgraph/v2/commitgraph.go new file mode 100644 index 0000000..9c89cd9 --- /dev/null +++ b/plumbing/format/commitgraph/v2/commitgraph.go @@ -0,0 +1,57 @@ +package v2 + +import ( + "io" + "math" + "time" + + "github.com/go-git/go-git/v5/plumbing" +) + +// CommitData is a reduced representation of Commit as presented in the commit graph +// file. It is merely useful as an optimization for walking the commit graphs. +type CommitData struct { + // TreeHash is the hash of the root tree of the commit. + TreeHash plumbing.Hash + // ParentIndexes are the indexes of the parent commits of the commit. + ParentIndexes []uint32 + // ParentHashes are the hashes of the parent commits of the commit. + ParentHashes []plumbing.Hash + // Generation number is the pre-computed generation in the commit graph + // or zero if not available. + Generation uint64 + // GenerationV2 stores the corrected commit date for the commits + // It combines the contents of the GDA2 and GDO2 sections of the commit-graph + // with the commit time portion of the CDAT section. + GenerationV2 uint64 + // When is the timestamp of the commit. + When time.Time +} + +// GenerationV2Data returns the corrected commit date for the commits +func (c *CommitData) GenerationV2Data() uint64 { + if c.GenerationV2 == 0 || c.GenerationV2 == math.MaxUint64 { + return 0 + } + return c.GenerationV2 - uint64(c.When.Unix()) +} + +// Index represents a representation of commit graph that allows indexed +// access to the nodes using commit object hash +type Index interface { + // GetIndexByHash gets the index in the commit graph from commit hash, if available + GetIndexByHash(h plumbing.Hash) (uint32, error) + // GetHashByIndex gets the hash given an index in the commit graph + GetHashByIndex(i uint32) (plumbing.Hash, error) + // GetNodeByIndex gets the commit node from the commit graph using index + // obtained from child node, if available + GetCommitDataByIndex(i uint32) (*CommitData, error) + // Hashes returns all the hashes that are available in the index + Hashes() []plumbing.Hash + // HasGenerationV2 returns true if the commit graph has the corrected commit date data + HasGenerationV2() bool + // MaximumNumberOfHashes returns the maximum number of hashes within the index + MaximumNumberOfHashes() uint32 + + io.Closer +} diff --git a/plumbing/format/commitgraph/v2/commitgraph_test.go b/plumbing/format/commitgraph/v2/commitgraph_test.go new file mode 100644 index 0000000..1278405 --- /dev/null +++ b/plumbing/format/commitgraph/v2/commitgraph_test.go @@ -0,0 +1,200 @@ +package v2_test
+
+import (
+ "os"
+ "testing"
+
+ "github.com/go-git/go-billy/v5"
+ "github.com/go-git/go-billy/v5/util"
+ "github.com/go-git/go-git/v5/plumbing"
+ "github.com/go-git/go-git/v5/plumbing/cache"
+ commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2"
+ "github.com/go-git/go-git/v5/plumbing/format/packfile"
+ "github.com/go-git/go-git/v5/plumbing/object"
+ "github.com/go-git/go-git/v5/storage/filesystem"
+
+ fixtures "github.com/go-git/go-git-fixtures/v4"
+ . "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) { TestingT(t) }
+
+type CommitgraphSuite struct {
+ fixtures.Suite
+}
+
+var _ = Suite(&CommitgraphSuite{})
+
+func testReadIndex(c *C, fs billy.Filesystem, path string) commitgraph.Index {
+ reader, err := fs.Open(path)
+ c.Assert(err, IsNil)
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+ c.Assert(index, NotNil)
+ return index
+}
+
+func testDecodeHelper(c *C, index commitgraph.Index) {
+ // Root commit
+ nodeIndex, err := index.GetIndexByHash(plumbing.NewHash("347c91919944a68e9413581a1bc15519550a3afe"))
+ c.Assert(err, IsNil)
+ commitData, err := index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 0)
+ c.Assert(len(commitData.ParentHashes), Equals, 0)
+
+ // Regular commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("e713b52d7e13807e87a002e812041f248db3f643"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 1)
+ c.Assert(len(commitData.ParentHashes), Equals, 1)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "347c91919944a68e9413581a1bc15519550a3afe")
+
+ // Merge commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("b29328491a0682c259bcce28741eac71f3499f7d"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 2)
+ c.Assert(len(commitData.ParentHashes), Equals, 2)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
+ c.Assert(commitData.ParentHashes[1].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
+
+ // Octopus merge commit
+ nodeIndex, err = index.GetIndexByHash(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560"))
+ c.Assert(err, IsNil)
+ commitData, err = index.GetCommitDataByIndex(nodeIndex)
+ c.Assert(err, IsNil)
+ c.Assert(len(commitData.ParentIndexes), Equals, 3)
+ c.Assert(len(commitData.ParentHashes), Equals, 3)
+ c.Assert(commitData.ParentHashes[0].String(), Equals, "ce275064ad67d51e99f026084e20827901a8361c")
+ c.Assert(commitData.ParentHashes[1].String(), Equals, "bb13916df33ed23004c3ce9ed3b8487528e655c1")
+ c.Assert(commitData.ParentHashes[2].String(), Equals, "a45273fe2d63300e1962a9e26a6b15c276cd7082")
+
+ // Check all hashes
+ hashes := index.Hashes()
+ c.Assert(len(hashes), Equals, 11)
+ c.Assert(hashes[0].String(), Equals, "03d2c021ff68954cf3ef0a36825e194a4b98f981")
+ c.Assert(hashes[10].String(), Equals, "e713b52d7e13807e87a002e812041f248db3f643")
+}
+
+func (s *CommitgraphSuite) TestDecodeMultiChain(c *C) {
+ fixtures.ByTag("commit-graph-chain-2").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ index, err := commitgraph.OpenChainOrFileIndex(dotgit)
+ c.Assert(err, IsNil)
+ defer index.Close()
+ storer := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault())
+ p := f.Packfile()
+ defer p.Close()
+ packfile.UpdateObjectStorage(storer, p)
+
+ for idx, hash := range index.Hashes() {
+ idx2, err := index.GetIndexByHash(hash)
+ c.Assert(err, IsNil)
+ c.Assert(idx2, Equals, uint32(idx))
+ hash2, err := index.GetHashByIndex(idx2)
+ c.Assert(err, IsNil)
+ c.Assert(hash2.String(), Equals, hash.String())
+
+ commitData, err := index.GetCommitDataByIndex(uint32(idx))
+ c.Assert(err, IsNil)
+ commit, err := object.GetCommit(storer, hash)
+ c.Assert(err, IsNil)
+
+ for i, parent := range commit.ParentHashes {
+ c.Assert(hash.String()+":"+parent.String(), Equals, hash.String()+":"+commitData.ParentHashes[i].String())
+ }
+ }
+ })
+}
+
+func (s *CommitgraphSuite) TestDecode(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ index := testReadIndex(c, dotgit, dotgit.Join("objects", "info", "commit-graph"))
+ defer index.Close()
+ testDecodeHelper(c, index)
+ })
+}
+
+func (s *CommitgraphSuite) TestDecodeChain(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ index, err := commitgraph.OpenChainOrFileIndex(dotgit)
+ c.Assert(err, IsNil)
+ defer index.Close()
+ testDecodeHelper(c, index)
+ })
+
+ fixtures.ByTag("commit-graph-chain").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+ index, err := commitgraph.OpenChainOrFileIndex(dotgit)
+ c.Assert(err, IsNil)
+ defer index.Close()
+ testDecodeHelper(c, index)
+ })
+}
+
+func (s *CommitgraphSuite) TestReencode(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+
+ reader, err := dotgit.Open(dotgit.Join("objects", "info", "commit-graph"))
+ c.Assert(err, IsNil)
+ defer reader.Close()
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+ defer index.Close()
+
+ writer, err := util.TempFile(dotgit, "", "commit-graph")
+ c.Assert(err, IsNil)
+ tmpName := writer.Name()
+ defer os.Remove(tmpName)
+
+ encoder := commitgraph.NewEncoder(writer)
+ err = encoder.Encode(index)
+ c.Assert(err, IsNil)
+ writer.Close()
+
+ tmpIndex := testReadIndex(c, dotgit, tmpName)
+ defer tmpIndex.Close()
+ testDecodeHelper(c, tmpIndex)
+ })
+}
+
+func (s *CommitgraphSuite) TestReencodeInMemory(c *C) {
+ fixtures.ByTag("commit-graph").Test(c, func(f *fixtures.Fixture) {
+ dotgit := f.DotGit()
+
+ reader, err := dotgit.Open(dotgit.Join("objects", "info", "commit-graph"))
+ c.Assert(err, IsNil)
+ index, err := commitgraph.OpenFileIndex(reader)
+ c.Assert(err, IsNil)
+
+ memoryIndex := commitgraph.NewMemoryIndex()
+ defer memoryIndex.Close()
+ for i, hash := range index.Hashes() {
+ commitData, err := index.GetCommitDataByIndex(uint32(i))
+ c.Assert(err, IsNil)
+ memoryIndex.Add(hash, commitData)
+ }
+ index.Close()
+
+ writer, err := util.TempFile(dotgit, "", "commit-graph")
+ c.Assert(err, IsNil)
+ tmpName := writer.Name()
+ defer os.Remove(tmpName)
+
+ encoder := commitgraph.NewEncoder(writer)
+ err = encoder.Encode(memoryIndex)
+ c.Assert(err, IsNil)
+ writer.Close()
+
+ tmpIndex := testReadIndex(c, dotgit, tmpName)
+ defer tmpIndex.Close()
+ testDecodeHelper(c, tmpIndex)
+ })
+}
diff --git a/plumbing/format/commitgraph/v2/doc.go b/plumbing/format/commitgraph/v2/doc.go new file mode 100644 index 0000000..157621d --- /dev/null +++ b/plumbing/format/commitgraph/v2/doc.go @@ -0,0 +1,106 @@ +// Package v2 implements encoding and decoding of commit-graph files. +// +// This package was created to work around the issues of the incorrect types in +// the commitgraph package. +// +// Git commit graph format +// ======================= +// +// The Git commit graph stores a list of commit OIDs and some associated +// metadata, including: +// +// - The generation number of the commit. Commits with no parents have +// generation number 1; commits with parents have generation number +// one more than the maximum generation number of its parents. We +// reserve zero as special, and can be used to mark a generation +// number invalid or as "not computed". +// +// - The root tree OID. +// +// - The commit date. +// +// - The parents of the commit, stored using positional references within +// the graph file. +// +// These positional references are stored as unsigned 32-bit integers +// corresponding to the array position within the list of commit OIDs. Due +// to some special constants we use to track parents, we can store at most +// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits. +// +// == Commit graph files have the following format: +// +// In order to allow extensions that add extra data to the graph, we organize +// the body into "chunks" and provide a binary lookup table at the beginning +// of the body. The header includes certain values, such as number of chunks +// and hash type. +// +// All 4-byte numbers are in network order. +// +// HEADER: +// +// 4-byte signature: +// The signature is: {'C', 'G', 'P', 'H'} +// +// 1-byte version number: +// Currently, the only valid version is 1. +// +// 1-byte Hash Version (1 = SHA-1) +// We infer the hash length (H) from this value. +// +// 1-byte number (C) of "chunks" +// +// 1-byte (reserved for later use) +// Current clients should ignore this value. +// +// CHUNK LOOKUP: +// +// (C + 1) * 12 bytes listing the table of contents for the chunks: +// First 4 bytes describe the chunk id. Value 0 is a terminating label. +// Other 8 bytes provide the byte-offset in current file for chunk to +// start. (Chunks are ordered contiguously in the file, so you can infer +// the length using the next chunk position if necessary.) Each chunk +// ID appears at most once. +// +// The remaining data in the body is described one chunk at a time, and +// these chunks may be given in any order. Chunks are required unless +// otherwise specified. +// +// CHUNK DATA: +// +// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) +// The ith entry, F[i], stores the number of OIDs with first +// byte at most i. Thus F[255] stores the total +// number of commits (N). +// +// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) +// The OIDs for all commits in the graph, sorted in ascending order. +// +// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) +// * The first H bytes are for the OID of the root tree. +// * The next 8 bytes are for the positions of the first two parents +// of the ith commit. Stores value 0x7000000 if no parent in that +// position. If there are more than two parents, the second value +// has its most-significant bit on and the other bits store an array +// position into the Extra Edge List chunk. +// * The next 8 bytes store the generation number of the commit and +// the commit time in seconds since EPOCH. The generation number +// uses the higher 30 bits of the first 4 bytes, while the commit +// time uses the 32 bits of the second 4 bytes, along with the lowest +// 2 bits of the lowest byte, storing the 33rd and 34th bit of the +// commit time. +// +// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] +// This list of 4-byte values store the second through nth parents for +// all octopus merges. The second parent value in the commit data stores +// an array position within this list along with the most-significant bit +// on. Starting at that array position, iterate through this list of commit +// positions for the parents until reaching a value with the most-significant +// bit on. The other bits correspond to the position of the last parent. +// +// TRAILER: +// +// H-byte HASH-checksum of all of the above. +// +// Source: +// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt +package v2 diff --git a/plumbing/format/commitgraph/v2/encoder.go b/plumbing/format/commitgraph/v2/encoder.go new file mode 100644 index 0000000..b79bc77 --- /dev/null +++ b/plumbing/format/commitgraph/v2/encoder.go @@ -0,0 +1,250 @@ +package v2 + +import ( + "crypto" + "io" + "math" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/hash" + "github.com/go-git/go-git/v5/utils/binary" +) + +// Encoder writes MemoryIndex structs to an output stream. +type Encoder struct { + io.Writer + hash hash.Hash +} + +// NewEncoder returns a new stream encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := hash.New(hash.CryptoType) + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +// Encode writes an index into the commit-graph file +func (e *Encoder) Encode(idx Index) error { + // Get all the hashes in the input index + hashes := idx.Hashes() + + // Sort the inout and prepare helper structures we'll need for encoding + hashToIndex, fanout, extraEdgesCount, generationV2OverflowCount := e.prepare(idx, hashes) + + chunkSignatures := [][]byte{OIDFanoutChunk.Signature(), OIDLookupChunk.Signature(), CommitDataChunk.Signature()} + chunkSizes := []uint64{szUint32 * lenFanout, uint64(len(hashes)) * hash.Size, uint64(len(hashes)) * (hash.Size + szCommitData)} + if extraEdgesCount > 0 { + chunkSignatures = append(chunkSignatures, ExtraEdgeListChunk.Signature()) + chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*szUint32) + } + if idx.HasGenerationV2() { + chunkSignatures = append(chunkSignatures, GenerationDataChunk.Signature()) + chunkSizes = append(chunkSizes, uint64(len(hashes))*szUint32) + if generationV2OverflowCount > 0 { + chunkSignatures = append(chunkSignatures, GenerationDataOverflowChunk.Signature()) + chunkSizes = append(chunkSizes, uint64(generationV2OverflowCount)*szUint64) + } + } + + if err := e.encodeFileHeader(len(chunkSignatures)); err != nil { + return err + } + if err := e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { + return err + } + if err := e.encodeFanout(fanout); err != nil { + return err + } + if err := e.encodeOidLookup(hashes); err != nil { + return err + } + + extraEdges, generationV2Data, err := e.encodeCommitData(hashes, hashToIndex, idx) + if err != nil { + return err + } + if err = e.encodeExtraEdges(extraEdges); err != nil { + return err + } + if idx.HasGenerationV2() { + overflows, err := e.encodeGenerationV2Data(generationV2Data) + if err != nil { + return err + } + if err = e.encodeGenerationV2Overflow(overflows); err != nil { + return err + } + } + + return e.encodeChecksum() +} + +func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, extraEdgesCount uint32, generationV2OverflowCount uint32) { + // Sort the hashes and build our index + plumbing.HashesSort(hashes) + hashToIndex = make(map[plumbing.Hash]uint32) + fanout = make([]uint32, lenFanout) + for i, hash := range hashes { + hashToIndex[hash] = uint32(i) + fanout[hash[0]]++ + } + + // Convert the fanout to cumulative values + for i := 1; i < lenFanout; i++ { + fanout[i] += fanout[i-1] + } + + hasGenerationV2 := idx.HasGenerationV2() + + // Find out if we will need extra edge table + for i := 0; i < len(hashes); i++ { + v, _ := idx.GetCommitDataByIndex(uint32(i)) + if len(v.ParentHashes) > 2 { + extraEdgesCount += uint32(len(v.ParentHashes) - 1) + } + if hasGenerationV2 && v.GenerationV2Data() > math.MaxUint32 { + generationV2OverflowCount++ + } + } + + return +} + +func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { + if _, err = e.Write(commitFileSignature); err == nil { + version := byte(1) + if hash.CryptoType == crypto.SHA256 { + version = byte(2) + } + _, err = e.Write([]byte{1, version, byte(chunkCount), 0}) + } + return +} + +func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { + // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator + offset := uint64(szSignature + szHeader + (len(chunkSignatures)+1)*(szChunkSig+szUint64)) + for i, signature := range chunkSignatures { + if _, err = e.Write(signature); err == nil { + err = binary.WriteUint64(e, offset) + } + if err != nil { + return + } + offset += chunkSizes[i] + } + if _, err = e.Write(ZeroChunk.Signature()); err == nil { + err = binary.WriteUint64(e, offset) + } + return +} + +func (e *Encoder) encodeFanout(fanout []uint32) (err error) { + for i := 0; i <= 0xff; i++ { + if err = binary.WriteUint32(e, fanout[i]); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { + for _, hash := range hashes { + if _, err = e.Write(hash[:]); err != nil { + return err + } + } + return +} + +func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (extraEdges []uint32, generationV2Data []uint64, err error) { + if idx.HasGenerationV2() { + generationV2Data = make([]uint64, 0, len(hashes)) + } + for _, hash := range hashes { + origIndex, _ := idx.GetIndexByHash(hash) + commitData, _ := idx.GetCommitDataByIndex(origIndex) + if _, err = e.Write(commitData.TreeHash[:]); err != nil { + return + } + + var parent1, parent2 uint32 + if len(commitData.ParentHashes) == 0 { + parent1 = parentNone + parent2 = parentNone + } else if len(commitData.ParentHashes) == 1 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = parentNone + } else if len(commitData.ParentHashes) == 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = hashToIndex[commitData.ParentHashes[1]] + } else if len(commitData.ParentHashes) > 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = uint32(len(extraEdges)) | parentOctopusUsed + for _, parentHash := range commitData.ParentHashes[1:] { + extraEdges = append(extraEdges, hashToIndex[parentHash]) + } + extraEdges[len(extraEdges)-1] |= parentLast + } + + if err = binary.WriteUint32(e, parent1); err == nil { + err = binary.WriteUint32(e, parent2) + } + if err != nil { + return + } + + unixTime := uint64(commitData.When.Unix()) + unixTime |= uint64(commitData.Generation) << 34 + if err = binary.WriteUint64(e, unixTime); err != nil { + return + } + if generationV2Data != nil { + generationV2Data = append(generationV2Data, commitData.GenerationV2Data()) + } + } + return +} + +func (e *Encoder) encodeExtraEdges(extraEdges []uint32) (err error) { + for _, parent := range extraEdges { + if err = binary.WriteUint32(e, parent); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeGenerationV2Data(generationV2Data []uint64) (overflows []uint64, err error) { + head := 0 + for _, data := range generationV2Data { + if data >= 0x80000000 { + // overflow + if err = binary.WriteUint32(e, uint32(head)|0x80000000); err != nil { + return nil, err + } + generationV2Data[head] = data + head++ + continue + } + if err = binary.WriteUint32(e, uint32(data)); err != nil { + return nil, err + } + } + + return generationV2Data[:head], nil +} + +func (e *Encoder) encodeGenerationV2Overflow(overflows []uint64) (err error) { + for _, overflow := range overflows { + if err = binary.WriteUint64(e, overflow); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeChecksum() error { + _, err := e.Write(e.hash.Sum(nil)[:hash.Size]) + return err +} diff --git a/plumbing/format/commitgraph/v2/file.go b/plumbing/format/commitgraph/v2/file.go new file mode 100644 index 0000000..c5f61e4 --- /dev/null +++ b/plumbing/format/commitgraph/v2/file.go @@ -0,0 +1,412 @@ +package v2 + +import ( + "bytes" + "crypto" + encbin "encoding/binary" + "errors" + "io" + "time" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/hash" + "github.com/go-git/go-git/v5/utils/binary" +) + +var ( + // ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph + // file version is not supported. + ErrUnsupportedVersion = errors.New("unsupported version") + // ErrUnsupportedHash is returned by OpenFileIndex when the commit graph + // hash function is not supported. Currently only SHA-1 is defined and + // supported. + ErrUnsupportedHash = errors.New("unsupported hash algorithm") + // ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit + // graph file is corrupted. + ErrMalformedCommitGraphFile = errors.New("malformed commit graph file") + + commitFileSignature = []byte{'C', 'G', 'P', 'H'} + + parentNone = uint32(0x70000000) + parentOctopusUsed = uint32(0x80000000) + parentOctopusMask = uint32(0x7fffffff) + parentLast = uint32(0x80000000) +) + +const ( + szUint32 = 4 + szUint64 = 8 + + szSignature = 4 + szHeader = 4 + szCommitData = 2*szUint32 + szUint64 + + lenFanout = 256 +) + +type fileIndex struct { + reader ReaderAtCloser + fanout [lenFanout]uint32 + offsets [lenChunks]int64 + parent Index + hasGenerationV2 bool + minimumNumberOfHashes uint32 +} + +// ReaderAtCloser is an interface that combines io.ReaderAt and io.Closer. +type ReaderAtCloser interface { + io.ReaderAt + io.Closer +} + +// OpenFileIndex opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndex(reader ReaderAtCloser) (Index, error) { + return OpenFileIndexWithParent(reader, nil) +} + +// OpenFileIndexWithParent opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndexWithParent(reader ReaderAtCloser, parent Index) (Index, error) { + if reader == nil { + return nil, io.ErrUnexpectedEOF + } + fi := &fileIndex{reader: reader, parent: parent} + + if err := fi.verifyFileHeader(); err != nil { + return nil, err + } + if err := fi.readChunkHeaders(); err != nil { + return nil, err + } + if err := fi.readFanout(); err != nil { + return nil, err + } + + fi.hasGenerationV2 = fi.offsets[GenerationDataChunk] > 0 + if fi.parent != nil { + fi.hasGenerationV2 = fi.hasGenerationV2 && fi.parent.HasGenerationV2() + } + + if fi.parent != nil { + fi.minimumNumberOfHashes = fi.parent.MaximumNumberOfHashes() + } + + return fi, nil +} + +// Close closes the underlying reader and the parent index if it exists. +func (fi *fileIndex) Close() (err error) { + if fi.parent != nil { + defer func() { + parentErr := fi.parent.Close() + // only report the error from the parent if there is no error from the reader + if err == nil { + err = parentErr + } + }() + } + err = fi.reader.Close() + return +} + +func (fi *fileIndex) verifyFileHeader() error { + // Verify file signature + signature := make([]byte, szSignature) + if _, err := fi.reader.ReadAt(signature, 0); err != nil { + return err + } + if !bytes.Equal(signature, commitFileSignature) { + return ErrMalformedCommitGraphFile + } + + // Read and verify the file header + header := make([]byte, szHeader) + if _, err := fi.reader.ReadAt(header, szHeader); err != nil { + return err + } + if header[0] != 1 { + return ErrUnsupportedVersion + } + if !(hash.CryptoType == crypto.SHA1 && header[1] == 1) && + !(hash.CryptoType == crypto.SHA256 && header[1] == 2) { + // Unknown hash type / unsupported hash type + return ErrUnsupportedHash + } + + return nil +} + +func (fi *fileIndex) readChunkHeaders() error { + // The chunk table is a list of 4-byte chunk signatures and uint64 offsets into the file + chunkID := make([]byte, szChunkSig) + for i := 0; ; i++ { + chunkHeader := io.NewSectionReader(fi.reader, szSignature+szHeader+(int64(i)*(szChunkSig+szUint64)), szChunkSig+szUint64) + if _, err := io.ReadAtLeast(chunkHeader, chunkID, szChunkSig); err != nil { + return err + } + chunkOffset, err := binary.ReadUint64(chunkHeader) + if err != nil { + return err + } + + chunkType, ok := ChunkTypeFromBytes(chunkID) + if !ok { + continue + } + if chunkType == ZeroChunk || int(chunkType) >= len(fi.offsets) { + break + } + fi.offsets[chunkType] = int64(chunkOffset) + } + + if fi.offsets[OIDFanoutChunk] <= 0 || fi.offsets[OIDLookupChunk] <= 0 || fi.offsets[CommitDataChunk] <= 0 { + return ErrMalformedCommitGraphFile + } + + return nil +} + +func (fi *fileIndex) readFanout() error { + // The Fanout table is a 256 entry table of the number (as uint32) of OIDs with first byte at most i. + // Thus F[255] stores the total number of commits (N) + fanoutReader := io.NewSectionReader(fi.reader, fi.offsets[OIDFanoutChunk], lenFanout*szUint32) + for i := 0; i < 256; i++ { + fanoutValue, err := binary.ReadUint32(fanoutReader) + if err != nil { + return err + } + if fanoutValue > 0x7fffffff { + return ErrMalformedCommitGraphFile + } + fi.fanout[i] = fanoutValue + } + return nil +} + +// GetIndexByHash looks up the provided hash in the commit-graph fanout and returns the index of the commit data for the given hash. +func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (uint32, error) { + var oid plumbing.Hash + + // Find the hash in the oid lookup table + var low uint32 + if h[0] == 0 { + low = 0 + } else { + low = fi.fanout[h[0]-1] + } + high := fi.fanout[h[0]] + for low < high { + mid := (low + high) >> 1 + offset := fi.offsets[OIDLookupChunk] + int64(mid)*hash.Size + if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { + return 0, err + } + cmp := bytes.Compare(h[:], oid[:]) + if cmp < 0 { + high = mid + } else if cmp == 0 { + return mid + fi.minimumNumberOfHashes, nil + } else { + low = mid + 1 + } + } + + if fi.parent != nil { + idx, err := fi.parent.GetIndexByHash(h) + if err != nil { + return 0, err + } + return idx, nil + } + + return 0, plumbing.ErrObjectNotFound +} + +// GetCommitDataByIndex returns the commit data for the given index in the commit-graph. +func (fi *fileIndex) GetCommitDataByIndex(idx uint32) (*CommitData, error) { + if idx < fi.minimumNumberOfHashes { + if fi.parent != nil { + data, err := fi.parent.GetCommitDataByIndex(idx) + if err != nil { + return nil, err + } + return data, nil + } + + return nil, plumbing.ErrObjectNotFound + } + idx -= fi.minimumNumberOfHashes + if idx >= fi.fanout[0xff] { + return nil, plumbing.ErrObjectNotFound + } + + offset := fi.offsets[CommitDataChunk] + int64(idx)*(hash.Size+szCommitData) + commitDataReader := io.NewSectionReader(fi.reader, offset, hash.Size+szCommitData) + + treeHash, err := binary.ReadHash(commitDataReader) + if err != nil { + return nil, err + } + parent1, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + parent2, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + genAndTime, err := binary.ReadUint64(commitDataReader) + if err != nil { + return nil, err + } + + var parentIndexes []uint32 + if parent2&parentOctopusUsed == parentOctopusUsed { + // Octopus merge - Look-up the extra parents from the extra edge list + // The extra edge list is a list of uint32s, each of which is an index into the Commit Data table, terminated by a index with the most significant bit on. + parentIndexes = []uint32{parent1 & parentOctopusMask} + offset := fi.offsets[ExtraEdgeListChunk] + szUint32*int64(parent2&parentOctopusMask) + buf := make([]byte, szUint32) + for { + _, err := fi.reader.ReadAt(buf, offset) + if err != nil { + return nil, err + } + + parent := encbin.BigEndian.Uint32(buf) + offset += szUint32 + parentIndexes = append(parentIndexes, parent&parentOctopusMask) + if parent&parentLast == parentLast { + break + } + } + } else if parent2 != parentNone { + parentIndexes = []uint32{parent1 & parentOctopusMask, parent2 & parentOctopusMask} + } else if parent1 != parentNone { + parentIndexes = []uint32{parent1 & parentOctopusMask} + } + + parentHashes, err := fi.getHashesFromIndexes(parentIndexes) + if err != nil { + return nil, err + } + + generationV2 := uint64(0) + + if fi.hasGenerationV2 { + // set the GenerationV2 result to the commit time + generationV2 = uint64(genAndTime & 0x3FFFFFFFF) + + // Next read the generation (offset) data from the generation data chunk + offset := fi.offsets[GenerationDataChunk] + int64(idx)*szUint32 + buf := make([]byte, szUint32) + if _, err := fi.reader.ReadAt(buf, offset); err != nil { + return nil, err + } + genV2Data := encbin.BigEndian.Uint32(buf) + + // check if the data is an overflow that needs to be looked up in the overflow chunk + if genV2Data&0x80000000 > 0 { + // Overflow + offset := fi.offsets[GenerationDataOverflowChunk] + int64(genV2Data&0x7fffffff)*szUint64 + buf := make([]byte, 8) + if _, err := fi.reader.ReadAt(buf, offset); err != nil { + return nil, err + } + + generationV2 += encbin.BigEndian.Uint64(buf) + } else { + generationV2 += uint64(genV2Data) + } + } + + return &CommitData{ + TreeHash: treeHash, + ParentIndexes: parentIndexes, + ParentHashes: parentHashes, + Generation: genAndTime >> 34, + GenerationV2: generationV2, + When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0), + }, nil +} + +// GetHashByIndex looks up the hash for the given index in the commit-graph. +func (fi *fileIndex) GetHashByIndex(idx uint32) (found plumbing.Hash, err error) { + if idx < fi.minimumNumberOfHashes { + if fi.parent != nil { + return fi.parent.GetHashByIndex(idx) + } + return found, ErrMalformedCommitGraphFile + } + idx -= fi.minimumNumberOfHashes + if idx >= fi.fanout[0xff] { + return found, ErrMalformedCommitGraphFile + } + + offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size + if _, err := fi.reader.ReadAt(found[:], offset); err != nil { + return found, err + } + + return found, nil +} + +func (fi *fileIndex) getHashesFromIndexes(indexes []uint32) ([]plumbing.Hash, error) { + hashes := make([]plumbing.Hash, len(indexes)) + + for i, idx := range indexes { + if idx < fi.minimumNumberOfHashes { + if fi.parent != nil { + hash, err := fi.parent.GetHashByIndex(idx) + if err != nil { + return nil, err + } + hashes[i] = hash + continue + } + + return nil, ErrMalformedCommitGraphFile + } + + idx -= fi.minimumNumberOfHashes + if idx >= fi.fanout[0xff] { + return nil, ErrMalformedCommitGraphFile + } + + offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size + if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil { + return nil, err + } + } + + return hashes, nil +} + +// Hashes returns all the hashes that are available in the index. +func (fi *fileIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, fi.fanout[0xff]+fi.minimumNumberOfHashes) + for i := uint32(0); i < fi.minimumNumberOfHashes; i++ { + hash, err := fi.parent.GetHashByIndex(i) + if err != nil { + return nil + } + hashes[i] = hash + } + + for i := uint32(0); i < fi.fanout[0xff]; i++ { + offset := fi.offsets[OIDLookupChunk] + int64(i)*hash.Size + if n, err := fi.reader.ReadAt(hashes[i+fi.minimumNumberOfHashes][:], offset); err != nil || n < hash.Size { + return nil + } + } + return hashes +} + +func (fi *fileIndex) HasGenerationV2() bool { + return fi.hasGenerationV2 +} + +func (fi *fileIndex) MaximumNumberOfHashes() uint32 { + return fi.minimumNumberOfHashes + fi.fanout[0xff] +} diff --git a/plumbing/format/commitgraph/v2/memory.go b/plumbing/format/commitgraph/v2/memory.go new file mode 100644 index 0000000..8de0c5f --- /dev/null +++ b/plumbing/format/commitgraph/v2/memory.go @@ -0,0 +1,107 @@ +package v2 + +import ( + "math" + + "github.com/go-git/go-git/v5/plumbing" +) + +// MemoryIndex provides a way to build the commit-graph in memory +// for later encoding to file. +type MemoryIndex struct { + commitData []commitData + indexMap map[plumbing.Hash]uint32 + hasGenerationV2 bool +} + +type commitData struct { + Hash plumbing.Hash + *CommitData +} + +// NewMemoryIndex creates in-memory commit graph representation +func NewMemoryIndex() *MemoryIndex { + return &MemoryIndex{ + indexMap: make(map[plumbing.Hash]uint32), + hasGenerationV2: true, + } +} + +// GetIndexByHash gets the index in the commit graph from commit hash, if available +func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (uint32, error) { + i, ok := mi.indexMap[h] + if ok { + return i, nil + } + + return 0, plumbing.ErrObjectNotFound +} + +// GetHashByIndex gets the hash given an index in the commit graph +func (mi *MemoryIndex) GetHashByIndex(i uint32) (plumbing.Hash, error) { + if i >= uint32(len(mi.commitData)) { + return plumbing.ZeroHash, plumbing.ErrObjectNotFound + } + + return mi.commitData[i].Hash, nil +} + +// GetCommitDataByIndex gets the commit node from the commit graph using index +// obtained from child node, if available +func (mi *MemoryIndex) GetCommitDataByIndex(i uint32) (*CommitData, error) { + if i >= uint32(len(mi.commitData)) { + return nil, plumbing.ErrObjectNotFound + } + + commitData := mi.commitData[i] + + // Map parent hashes to parent indexes + if commitData.ParentIndexes == nil { + parentIndexes := make([]uint32, len(commitData.ParentHashes)) + for i, parentHash := range commitData.ParentHashes { + var err error + if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil { + return nil, err + } + } + commitData.ParentIndexes = parentIndexes + } + + return commitData.CommitData, nil +} + +// Hashes returns all the hashes that are available in the index +func (mi *MemoryIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, 0, len(mi.indexMap)) + for k := range mi.indexMap { + hashes = append(hashes, k) + } + return hashes +} + +// Add adds new node to the memory index +func (mi *MemoryIndex) Add(hash plumbing.Hash, data *CommitData) { + // The parent indexes are calculated lazily in GetNodeByIndex + // which allows adding nodes out of order as long as all parents + // are eventually resolved + data.ParentIndexes = nil + mi.indexMap[hash] = uint32(len(mi.commitData)) + mi.commitData = append(mi.commitData, commitData{Hash: hash, CommitData: data}) + if data.GenerationV2 == math.MaxUint64 { // if GenerationV2 is not available reset it to zero + data.GenerationV2 = 0 + } + mi.hasGenerationV2 = mi.hasGenerationV2 && data.GenerationV2 != 0 +} + +func (mi *MemoryIndex) HasGenerationV2() bool { + return mi.hasGenerationV2 +} + +// Close closes the index +func (mi *MemoryIndex) Close() error { + return nil +} + +func (mi *MemoryIndex) MaximumNumberOfHashes() uint32 { + return uint32(len(mi.indexMap)) +} diff --git a/plumbing/format/config/decoder_test.go b/plumbing/format/config/decoder_test.go index 0a8e92c..6283f5e 100644 --- a/plumbing/format/config/decoder_test.go +++ b/plumbing/format/config/decoder_test.go @@ -2,6 +2,7 @@ package config import ( "bytes" + "testing" . "gopkg.in/check.v1" ) @@ -91,3 +92,13 @@ func decodeFails(c *C, text string) { err := d.Decode(cfg) c.Assert(err, NotNil) } + +func FuzzDecoder(f *testing.F) { + + f.Fuzz(func(t *testing.T, input []byte) { + + d := NewDecoder(bytes.NewReader(input)) + cfg := &Config{} + d.Decode(cfg) + }) +} diff --git a/plumbing/format/packfile/delta_test.go b/plumbing/format/packfile/delta_test.go index e8f5ea6..9417e55 100644 --- a/plumbing/format/packfile/delta_test.go +++ b/plumbing/format/packfile/delta_test.go @@ -4,6 +4,7 @@ import ( "bytes" "io" "math/rand" + "testing" "github.com/go-git/go-git/v5/plumbing" . "gopkg.in/check.v1" @@ -176,3 +177,14 @@ func (s *DeltaSuite) TestMaxCopySizeDeltaReader(c *C) { c.Assert(err, IsNil) c.Assert(result, DeepEquals, targetBuf) } + +func FuzzPatchDelta(f *testing.F) { + + f.Fuzz(func(t *testing.T, input []byte) { + + input_0 := input[:len(input)/2] + input_1 := input[len(input)/2:] + + PatchDelta(input_0, input_1) + }) +} diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go index 2c7a335..8898e58 100644 --- a/plumbing/format/packfile/diff_delta.go +++ b/plumbing/format/packfile/diff_delta.go @@ -17,8 +17,11 @@ const ( s = 16 // https://github.com/git/git/blob/f7466e94375b3be27f229c78873f0acf8301c0a5/diff-delta.c#L428 - // Max size of a copy operation (64KB) + // Max size of a copy operation (64KB). maxCopySize = 64 * 1024 + + // Min size of a copy operation. + minCopySize = 4 ) // GetDelta returns an EncodedObject of type OFSDeltaObject. Base and Target object, diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index edbc0e7..62f1d13 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -3,6 +3,7 @@ package packfile import ( "bytes" "errors" + "fmt" "io" "github.com/go-git/go-git/v5/plumbing" @@ -174,13 +175,25 @@ func (p *Parser) init() error { return nil } +type objectHeaderWriter func(typ plumbing.ObjectType, sz int64) error + +type lazyObjectWriter interface { + // LazyWriter enables an object to be lazily written. + // It returns: + // - w: a writer to receive the object's content. + // - lwh: a func to write the object header. + // - err: any error from the initial writer creation process. + // + // Note that if the object header is not written BEFORE the writer + // is used, this will result in an invalid object. + LazyWriter() (w io.WriteCloser, lwh objectHeaderWriter, err error) +} + func (p *Parser) indexObjects() error { buf := sync.GetBytesBuffer() defer sync.PutBytesBuffer(buf) for i := uint32(0); i < p.count; i++ { - buf.Reset() - oh, err := p.scanner.NextObjectHeader() if err != nil { return err @@ -220,21 +233,60 @@ func (p *Parser) indexObjects() error { ota = newBaseObject(oh.Offset, oh.Length, t) } - buf.Grow(int(oh.Length)) - _, crc, err := p.scanner.NextObject(buf) + hasher := plumbing.NewHasher(oh.Type, oh.Length) + writers := []io.Writer{hasher} + var obj *plumbing.MemoryObject + + // Lazy writing is only available for non-delta objects. + if p.storage != nil && !delta { + // When a storage is set and supports lazy writing, + // use that instead of creating a memory object. + if low, ok := p.storage.(lazyObjectWriter); ok { + ow, lwh, err := low.LazyWriter() + if err != nil { + return err + } + + if err = lwh(oh.Type, oh.Length); err != nil { + return err + } + + defer ow.Close() + writers = append(writers, ow) + } else { + obj = new(plumbing.MemoryObject) + obj.SetSize(oh.Length) + obj.SetType(oh.Type) + + writers = append(writers, obj) + } + } + if delta && !p.scanner.IsSeekable { + buf.Reset() + buf.Grow(int(oh.Length)) + writers = append(writers, buf) + } + + mw := io.MultiWriter(writers...) + + _, crc, err := p.scanner.NextObject(mw) if err != nil { return err } + // Non delta objects needs to be added into the storage. This + // is only required when lazy writing is not supported. + if obj != nil { + if _, err := p.storage.SetEncodedObject(obj); err != nil { + return err + } + } + ota.Crc32 = crc ota.Length = oh.Length - data := buf.Bytes() if !delta { - sha1, err := getSHA1(ota.Type, data) - if err != nil { - return err - } + sha1 := hasher.Sum() // Move children of placeholder parent into actual parent, in case this // was a non-external delta reference. @@ -249,20 +301,8 @@ func (p *Parser) indexObjects() error { p.oiByHash[ota.SHA1] = ota } - if p.storage != nil && !delta { - obj := new(plumbing.MemoryObject) - obj.SetSize(oh.Length) - obj.SetType(oh.Type) - if _, err := obj.Write(data); err != nil { - return err - } - - if _, err := p.storage.SetEncodedObject(obj); err != nil { - return err - } - } - if delta && !p.scanner.IsSeekable { + data := buf.Bytes() p.deltas[oh.Offset] = make([]byte, len(data)) copy(p.deltas[oh.Offset], data) } @@ -280,23 +320,29 @@ func (p *Parser) resolveDeltas() error { for _, obj := range p.oi { buf.Reset() + buf.Grow(int(obj.Length)) err := p.get(obj, buf) if err != nil { return err } - content := buf.Bytes() if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil { return err } - if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil { + if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, nil); err != nil { return err } if !obj.IsDelta() && len(obj.Children) > 0 { + // Dealing with an io.ReaderAt object, means we can + // create it once and reuse across all children. + r := bytes.NewReader(buf.Bytes()) for _, child := range obj.Children { - if err := p.resolveObject(io.Discard, child, content); err != nil { + // Even though we are discarding the output, we still need to read it to + // so that the scanner can advance to the next object, and the SHA1 can be + // calculated. + if err := p.resolveObject(io.Discard, child, r); err != nil { return err } p.resolveExternalRef(child) @@ -361,13 +407,13 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) { if o.DiskType.IsDelta() { b := sync.GetBytesBuffer() defer sync.PutBytesBuffer(b) + buf.Grow(int(o.Length)) err := p.get(o.Parent, b) if err != nil { return err } - base := b.Bytes() - err = p.resolveObject(buf, o, base) + err = p.resolveObject(buf, o, bytes.NewReader(b.Bytes())) if err != nil { return err } @@ -378,6 +424,13 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) { } } + // If the scanner is seekable, caching this data into + // memory by offset seems wasteful. + // There is a trade-off to be considered here in terms + // of execution time vs memory consumption. + // + // TODO: improve seekable execution time, so that we can + // skip this cache. if len(o.Children) > 0 { data := make([]byte, buf.Len()) copy(data, buf.Bytes()) @@ -386,10 +439,25 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) { return nil } +// resolveObject resolves an object from base, using information +// provided by o. +// +// This call has the side-effect of changing field values +// from the object info o: +// - Type: OFSDeltaObject may become the target type (e.g. Blob). +// - Size: The size may be update with the target size. +// - Hash: Zero hashes will be calculated as part of the object +// resolution. Hence why this process can't be avoided even when w +// is an io.Discard. +// +// base must be an io.ReaderAt, which is a requirement from +// patchDeltaStream. The main reason being that reversing an +// delta object may lead to going backs and forths within base, +// which is not supported by io.Reader. func (p *Parser) resolveObject( w io.Writer, o *objectInfo, - base []byte, + base io.ReaderAt, ) error { if !o.DiskType.IsDelta() { return nil @@ -400,26 +468,46 @@ func (p *Parser) resolveObject( if err != nil { return err } - data := buf.Bytes() - data, err = applyPatchBase(o, data, base) + writers := []io.Writer{w} + var obj *plumbing.MemoryObject + var lwh objectHeaderWriter + + if p.storage != nil { + if low, ok := p.storage.(lazyObjectWriter); ok { + ow, wh, err := low.LazyWriter() + if err != nil { + return err + } + lwh = wh + + defer ow.Close() + writers = append(writers, ow) + } else { + obj = new(plumbing.MemoryObject) + ow, err := obj.Writer() + if err != nil { + return err + } + + writers = append(writers, ow) + } + } + + mw := io.MultiWriter(writers...) + + err = applyPatchBase(o, base, buf, mw, lwh) if err != nil { return err } - if p.storage != nil { - obj := new(plumbing.MemoryObject) - obj.SetSize(o.Size()) + if obj != nil { obj.SetType(o.Type) - if _, err := obj.Write(data); err != nil { - return err - } - + obj.SetSize(o.Size()) // Size here is correct as it was populated by applyPatchBase. if _, err := p.storage.SetEncodedObject(obj); err != nil { return err } } - _, err = w.Write(data) return err } @@ -443,24 +531,31 @@ func (p *Parser) readData(w io.Writer, o *objectInfo) error { return nil } -func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { - patched, err := PatchDelta(base, data) - if err != nil { - return nil, err +// applyPatchBase applies the patch to target. +// +// Note that ota will be updated based on the description in resolveObject. +func applyPatchBase(ota *objectInfo, base io.ReaderAt, delta io.Reader, target io.Writer, wh objectHeaderWriter) error { + if target == nil { + return fmt.Errorf("cannot apply patch against nil target") } + typ := ota.Type if ota.SHA1 == plumbing.ZeroHash { - ota.Type = ota.Parent.Type - sha1, err := getSHA1(ota.Type, patched) - if err != nil { - return nil, err - } + typ = ota.Parent.Type + } + + sz, h, err := patchDeltaWriter(target, base, delta, typ, wh) + if err != nil { + return err + } - ota.SHA1 = sha1 - ota.Length = int64(len(patched)) + if ota.SHA1 == plumbing.ZeroHash { + ota.Type = typ + ota.Length = int64(sz) + ota.SHA1 = h } - return patched, nil + return nil } func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) { diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go index f00562d..960769c 100644 --- a/plumbing/format/packfile/patch_delta.go +++ b/plumbing/format/packfile/patch_delta.go @@ -4,6 +4,7 @@ import ( "bufio" "bytes" "errors" + "fmt" "io" "math" @@ -17,7 +18,33 @@ import ( // and https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js // for details about the delta format. -const deltaSizeMin = 4 +var ( + ErrInvalidDelta = errors.New("invalid delta") + ErrDeltaCmd = errors.New("wrong delta command") +) + +const ( + payload = 0x7f // 0111 1111 + continuation = 0x80 // 1000 0000 +) + +type offset struct { + mask byte + shift uint +} + +var offsets = []offset{ + {mask: 0x01, shift: 0}, + {mask: 0x02, shift: 8}, + {mask: 0x04, shift: 16}, + {mask: 0x08, shift: 24}, +} + +var sizes = []offset{ + {mask: 0x10, shift: 0}, + {mask: 0x20, shift: 8}, + {mask: 0x40, shift: 16}, +} // ApplyDelta writes to target the result of applying the modification deltas in delta to base. func ApplyDelta(target, base plumbing.EncodedObject, delta []byte) (err error) { @@ -58,11 +85,6 @@ func ApplyDelta(target, base plumbing.EncodedObject, delta []byte) (err error) { return err } -var ( - ErrInvalidDelta = errors.New("invalid delta") - ErrDeltaCmd = errors.New("wrong delta command") -) - // PatchDelta returns the result of applying the modification deltas in delta to src. // An error will be returned if delta is corrupted (ErrDeltaLen) or an action command // is not copy from source or copy from delta (ErrDeltaCmd). @@ -120,7 +142,8 @@ func ReaderFromDelta(base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadClo return } - if isCopyFromSrc(cmd) { + switch { + case isCopyFromSrc(cmd): offset, err := decodeOffsetByteReader(cmd, deltaBuf) if err != nil { _ = dstWr.CloseWithError(err) @@ -173,7 +196,8 @@ func ReaderFromDelta(base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadClo } remainingTargetSz -= sz basePos += sz - } else if isCopyFromDelta(cmd) { + + case isCopyFromDelta(cmd): sz := uint(cmd) // cmd is the size itself if invalidSize(sz, targetSz) { _ = dstWr.CloseWithError(ErrInvalidDelta) @@ -185,10 +209,12 @@ func ReaderFromDelta(base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadClo } remainingTargetSz -= sz - } else { + + default: _ = dstWr.CloseWithError(ErrDeltaCmd) return } + if remainingTargetSz <= 0 { _ = dstWr.Close() return @@ -200,7 +226,7 @@ func ReaderFromDelta(base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadClo } func patchDelta(dst *bytes.Buffer, src, delta []byte) error { - if len(delta) < deltaSizeMin { + if len(delta) < minCopySize { return ErrInvalidDelta } @@ -221,7 +247,9 @@ func patchDelta(dst *bytes.Buffer, src, delta []byte) error { cmd = delta[0] delta = delta[1:] - if isCopyFromSrc(cmd) { + + switch { + case isCopyFromSrc(cmd): var offset, sz uint var err error offset, delta, err = decodeOffset(cmd, delta) @@ -240,7 +268,8 @@ func patchDelta(dst *bytes.Buffer, src, delta []byte) error { } dst.Write(src[offset : offset+sz]) remainingTargetSz -= sz - } else if isCopyFromDelta(cmd) { + + case isCopyFromDelta(cmd): sz := uint(cmd) // cmd is the size itself if invalidSize(sz, targetSz) { return ErrInvalidDelta @@ -253,7 +282,8 @@ func patchDelta(dst *bytes.Buffer, src, delta []byte) error { dst.Write(delta[0:sz]) remainingTargetSz -= sz delta = delta[sz:] - } else { + + default: return ErrDeltaCmd } @@ -265,6 +295,107 @@ func patchDelta(dst *bytes.Buffer, src, delta []byte) error { return nil } +func patchDeltaWriter(dst io.Writer, base io.ReaderAt, delta io.Reader, + typ plumbing.ObjectType, writeHeader objectHeaderWriter) (uint, plumbing.Hash, error) { + deltaBuf := bufio.NewReaderSize(delta, 1024) + srcSz, err := decodeLEB128ByteReader(deltaBuf) + if err != nil { + if err == io.EOF { + return 0, plumbing.ZeroHash, ErrInvalidDelta + } + return 0, plumbing.ZeroHash, err + } + + if r, ok := base.(*bytes.Reader); ok && srcSz != uint(r.Size()) { + return 0, plumbing.ZeroHash, ErrInvalidDelta + } + + targetSz, err := decodeLEB128ByteReader(deltaBuf) + if err != nil { + if err == io.EOF { + return 0, plumbing.ZeroHash, ErrInvalidDelta + } + return 0, plumbing.ZeroHash, err + } + + // If header still needs to be written, caller will provide + // a LazyObjectWriterHeader. This seems to be the case when + // dealing with thin-packs. + if writeHeader != nil { + err = writeHeader(typ, int64(targetSz)) + if err != nil { + return 0, plumbing.ZeroHash, fmt.Errorf("could not lazy write header: %w", err) + } + } + + remainingTargetSz := targetSz + + hasher := plumbing.NewHasher(typ, int64(targetSz)) + mw := io.MultiWriter(dst, hasher) + + bufp := sync.GetByteSlice() + defer sync.PutByteSlice(bufp) + + sr := io.NewSectionReader(base, int64(0), int64(srcSz)) + // Keep both the io.LimitedReader types, so we can reset N. + baselr := io.LimitReader(sr, 0).(*io.LimitedReader) + deltalr := io.LimitReader(deltaBuf, 0).(*io.LimitedReader) + + for { + buf := *bufp + cmd, err := deltaBuf.ReadByte() + if err == io.EOF { + return 0, plumbing.ZeroHash, ErrInvalidDelta + } + if err != nil { + return 0, plumbing.ZeroHash, err + } + + if isCopyFromSrc(cmd) { + offset, err := decodeOffsetByteReader(cmd, deltaBuf) + if err != nil { + return 0, plumbing.ZeroHash, err + } + sz, err := decodeSizeByteReader(cmd, deltaBuf) + if err != nil { + return 0, plumbing.ZeroHash, err + } + + if invalidSize(sz, targetSz) || + invalidOffsetSize(offset, sz, srcSz) { + return 0, plumbing.ZeroHash, err + } + + if _, err := sr.Seek(int64(offset), io.SeekStart); err != nil { + return 0, plumbing.ZeroHash, err + } + baselr.N = int64(sz) + if _, err := io.CopyBuffer(mw, baselr, buf); err != nil { + return 0, plumbing.ZeroHash, err + } + remainingTargetSz -= sz + } else if isCopyFromDelta(cmd) { + sz := uint(cmd) // cmd is the size itself + if invalidSize(sz, targetSz) { + return 0, plumbing.ZeroHash, ErrInvalidDelta + } + deltalr.N = int64(sz) + if _, err := io.CopyBuffer(mw, deltalr, buf); err != nil { + return 0, plumbing.ZeroHash, err + } + + remainingTargetSz -= sz + } else { + return 0, plumbing.ZeroHash, err + } + if remainingTargetSz <= 0 { + break + } + } + + return targetSz, hasher.Sum(), nil +} + // Decodes a number encoded as an unsigned LEB128 at the start of some // binary data and returns the decoded number and the rest of the // stream. @@ -306,48 +437,24 @@ func decodeLEB128ByteReader(input io.ByteReader) (uint, error) { return num, nil } -const ( - payload = 0x7f // 0111 1111 - continuation = 0x80 // 1000 0000 -) - func isCopyFromSrc(cmd byte) bool { - return (cmd & 0x80) != 0 + return (cmd & continuation) != 0 } func isCopyFromDelta(cmd byte) bool { - return (cmd&0x80) == 0 && cmd != 0 + return (cmd&continuation) == 0 && cmd != 0 } func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) { var offset uint - if (cmd & 0x01) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - offset = uint(next) - } - if (cmd & 0x02) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - offset |= uint(next) << 8 - } - if (cmd & 0x04) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - offset |= uint(next) << 16 - } - if (cmd & 0x08) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err + for _, o := range offsets { + if (cmd & o.mask) != 0 { + next, err := delta.ReadByte() + if err != nil { + return 0, err + } + offset |= uint(next) << o.shift } - offset |= uint(next) << 24 } return offset, nil @@ -355,33 +462,14 @@ func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) { func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) { var offset uint - if (cmd & 0x01) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta - } - offset = uint(delta[0]) - delta = delta[1:] - } - if (cmd & 0x02) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta - } - offset |= uint(delta[0]) << 8 - delta = delta[1:] - } - if (cmd & 0x04) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta - } - offset |= uint(delta[0]) << 16 - delta = delta[1:] - } - if (cmd & 0x08) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta + for _, o := range offsets { + if (cmd & o.mask) != 0 { + if len(delta) == 0 { + return 0, nil, ErrInvalidDelta + } + offset |= uint(delta[0]) << o.shift + delta = delta[1:] } - offset |= uint(delta[0]) << 24 - delta = delta[1:] } return offset, delta, nil @@ -389,29 +477,18 @@ func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) { func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) { var sz uint - if (cmd & 0x10) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - sz = uint(next) - } - if (cmd & 0x20) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err - } - sz |= uint(next) << 8 - } - if (cmd & 0x40) != 0 { - next, err := delta.ReadByte() - if err != nil { - return 0, err + for _, s := range sizes { + if (cmd & s.mask) != 0 { + next, err := delta.ReadByte() + if err != nil { + return 0, err + } + sz |= uint(next) << s.shift } - sz |= uint(next) << 16 } + if sz == 0 { - sz = 0x10000 + sz = maxCopySize } return sz, nil @@ -419,29 +496,17 @@ func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) { func decodeSize(cmd byte, delta []byte) (uint, []byte, error) { var sz uint - if (cmd & 0x10) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta - } - sz = uint(delta[0]) - delta = delta[1:] - } - if (cmd & 0x20) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta - } - sz |= uint(delta[0]) << 8 - delta = delta[1:] - } - if (cmd & 0x40) != 0 { - if len(delta) == 0 { - return 0, nil, ErrInvalidDelta + for _, s := range sizes { + if (cmd & s.mask) != 0 { + if len(delta) == 0 { + return 0, nil, ErrInvalidDelta + } + sz |= uint(delta[0]) << s.shift + delta = delta[1:] } - sz |= uint(delta[0]) << 16 - delta = delta[1:] } if sz == 0 { - sz = 0x10000 + sz = maxCopySize } return sz, delta, nil diff --git a/plumbing/format/pktline/encoder.go b/plumbing/format/pktline/encoder.go index 6d40979..b6144fa 100644 --- a/plumbing/format/pktline/encoder.go +++ b/plumbing/format/pktline/encoder.go @@ -7,6 +7,8 @@ import ( "errors" "fmt" "io" + + "github.com/go-git/go-git/v5/utils/trace" ) // An Encoder writes pkt-lines to an output stream. @@ -43,6 +45,7 @@ func NewEncoder(w io.Writer) *Encoder { // Flush encodes a flush-pkt to the output stream. func (e *Encoder) Flush() error { + defer trace.Packet.Print("packet: > 0000") _, err := e.w.Write(FlushPkt) return err } @@ -70,6 +73,7 @@ func (e *Encoder) encodeLine(p []byte) error { } n := len(p) + 4 + defer trace.Packet.Printf("packet: > %04x %s", n, p) if _, err := e.w.Write(asciiHex16(n)); err != nil { return err } diff --git a/plumbing/format/pktline/error.go b/plumbing/format/pktline/error.go new file mode 100644 index 0000000..2c0e5a7 --- /dev/null +++ b/plumbing/format/pktline/error.go @@ -0,0 +1,51 @@ +package pktline + +import ( + "bytes" + "errors" + "io" + "strings" +) + +var ( + // ErrInvalidErrorLine is returned by Decode when the packet line is not an + // error line. + ErrInvalidErrorLine = errors.New("expected an error-line") + + errPrefix = []byte("ERR ") +) + +// ErrorLine is a packet line that contains an error message. +// Once this packet is sent by client or server, the data transfer process is +// terminated. +// See https://git-scm.com/docs/pack-protocol#_pkt_line_format +type ErrorLine struct { + Text string +} + +// Error implements the error interface. +func (e *ErrorLine) Error() string { + return e.Text +} + +// Encode encodes the ErrorLine into a packet line. +func (e *ErrorLine) Encode(w io.Writer) error { + p := NewEncoder(w) + return p.Encodef("%s%s\n", string(errPrefix), e.Text) +} + +// Decode decodes a packet line into an ErrorLine. +func (e *ErrorLine) Decode(r io.Reader) error { + s := NewScanner(r) + if !s.Scan() { + return s.Err() + } + + line := s.Bytes() + if !bytes.HasPrefix(line, errPrefix) { + return ErrInvalidErrorLine + } + + e.Text = strings.TrimSpace(string(line[4:])) + return nil +} diff --git a/plumbing/format/pktline/error_test.go b/plumbing/format/pktline/error_test.go new file mode 100644 index 0000000..3cffd20 --- /dev/null +++ b/plumbing/format/pktline/error_test.go @@ -0,0 +1,68 @@ +package pktline + +import ( + "bytes" + "errors" + "io" + "testing" +) + +func TestEncodeEmptyErrorLine(t *testing.T) { + e := &ErrorLine{} + err := e.Encode(io.Discard) + if err != nil { + t.Fatal(err) + } +} + +func TestEncodeErrorLine(t *testing.T) { + e := &ErrorLine{ + Text: "something", + } + var buf bytes.Buffer + err := e.Encode(&buf) + if err != nil { + t.Fatal(err) + } + if buf.String() != "0012ERR something\n" { + t.Fatalf("unexpected encoded error line: %q", buf.String()) + } +} + +func TestDecodeEmptyErrorLine(t *testing.T) { + var buf bytes.Buffer + e := &ErrorLine{} + err := e.Decode(&buf) + if err != nil { + t.Fatal(err) + } + if e.Text != "" { + t.Fatalf("unexpected error line: %q", e.Text) + } +} + +func TestDecodeErrorLine(t *testing.T) { + var buf bytes.Buffer + buf.WriteString("000eERR foobar") + var e *ErrorLine + err := e.Decode(&buf) + if !errors.As(err, &e) { + t.Fatalf("expected error line, got: %T: %v", err, err) + } + if e.Text != "foobar" { + t.Fatalf("unexpected error line: %q", e.Text) + } +} + +func TestDecodeErrorLineLn(t *testing.T) { + var buf bytes.Buffer + buf.WriteString("000fERR foobar\n") + var e *ErrorLine + err := e.Decode(&buf) + if !errors.As(err, &e) { + t.Fatalf("expected error line, got: %T: %v", err, err) + } + if e.Text != "foobar" { + t.Fatalf("unexpected error line: %q", e.Text) + } +} diff --git a/plumbing/format/pktline/scanner.go b/plumbing/format/pktline/scanner.go index 99aab46..fbb137d 100644 --- a/plumbing/format/pktline/scanner.go +++ b/plumbing/format/pktline/scanner.go @@ -1,8 +1,12 @@ package pktline import ( + "bytes" "errors" "io" + "strings" + + "github.com/go-git/go-git/v5/utils/trace" ) const ( @@ -65,6 +69,14 @@ func (s *Scanner) Scan() bool { return false } s.payload = s.payload[:l] + trace.Packet.Printf("packet: < %04x %s", l, s.payload) + + if bytes.HasPrefix(s.payload, errPrefix) { + s.err = &ErrorLine{ + Text: strings.TrimSpace(string(s.payload[4:])), + } + return false + } return true } diff --git a/plumbing/hash/hash.go b/plumbing/hash/hash.go index 82d1856..8609848 100644 --- a/plumbing/hash/hash.go +++ b/plumbing/hash/hash.go @@ -24,7 +24,7 @@ func reset() { algos[crypto.SHA256] = crypto.SHA256.New } -// RegisterHash allows for the hash algorithm used to be overriden. +// RegisterHash allows for the hash algorithm used to be overridden. // This ensures the hash selection for go-git must be explicit, when // overriding the default value. func RegisterHash(h crypto.Hash, f func() hash.Hash) error { diff --git a/plumbing/object.go b/plumbing/object.go index 2655dee..3ee9de9 100644 --- a/plumbing/object.go +++ b/plumbing/object.go @@ -82,7 +82,7 @@ func (t ObjectType) Valid() bool { return t >= CommitObject && t <= REFDeltaObject } -// IsDelta returns true for any ObjectTyoe that represents a delta (i.e. +// IsDelta returns true for any ObjectType that represents a delta (i.e. // REFDeltaObject or OFSDeltaObject). func (t ObjectType) IsDelta() bool { return t == REFDeltaObject || t == OFSDeltaObject diff --git a/plumbing/object/commit.go b/plumbing/object/commit.go index 8a0f35c..ceed5d0 100644 --- a/plumbing/object/commit.go +++ b/plumbing/object/commit.go @@ -17,14 +17,25 @@ import ( ) const ( - beginpgp string = "-----BEGIN PGP SIGNATURE-----" - endpgp string = "-----END PGP SIGNATURE-----" - headerpgp string = "gpgsig" + beginpgp string = "-----BEGIN PGP SIGNATURE-----" + endpgp string = "-----END PGP SIGNATURE-----" + headerpgp string = "gpgsig" + headerencoding string = "encoding" + + // https://github.com/git/git/blob/bcb6cae2966cc407ca1afc77413b3ef11103c175/Documentation/gitformat-signature.txt#L153 + // When a merge commit is created from a signed tag, the tag is embedded in + // the commit with the "mergetag" header. + headermergetag string = "mergetag" + + defaultUtf8CommitMesageEncoding MessageEncoding = "UTF-8" ) // Hash represents the hash of an object type Hash plumbing.Hash +// MessageEncoding represents the encoding of a commit +type MessageEncoding string + // Commit points to a single tree, marking it as what the project looked like // at a certain point in time. It contains meta-information about that point // in time, such as a timestamp, the author of the changes since the last @@ -38,6 +49,9 @@ type Commit struct { // Committer is the one performing the commit, might be different from // Author. Committer Signature + // MergeTag is the embedded tag object when a merge commit is created by + // merging a signed tag. + MergeTag string // PGPSignature is the PGP signature of the commit. PGPSignature string // Message is the commit message, contains arbitrary text. @@ -46,6 +60,8 @@ type Commit struct { TreeHash plumbing.Hash // ParentHashes are the hashes of the parent commits of the commit. ParentHashes []plumbing.Hash + // Encoding is the encoding of the commit. + Encoding MessageEncoding s storer.EncodedObjectStorer } @@ -173,6 +189,7 @@ func (c *Commit) Decode(o plumbing.EncodedObject) (err error) { } c.Hash = o.Hash() + c.Encoding = defaultUtf8CommitMesageEncoding reader, err := o.Reader() if err != nil { @@ -184,6 +201,7 @@ func (c *Commit) Decode(o plumbing.EncodedObject) (err error) { defer sync.PutBufioReader(r) var message bool + var mergetag bool var pgpsig bool var msgbuf bytes.Buffer for { @@ -192,6 +210,16 @@ func (c *Commit) Decode(o plumbing.EncodedObject) (err error) { return err } + if mergetag { + if len(line) > 0 && line[0] == ' ' { + line = bytes.TrimLeft(line, " ") + c.MergeTag += string(line) + continue + } else { + mergetag = false + } + } + if pgpsig { if len(line) > 0 && line[0] == ' ' { line = bytes.TrimLeft(line, " ") @@ -225,6 +253,11 @@ func (c *Commit) Decode(o plumbing.EncodedObject) (err error) { c.Author.Decode(data) case "committer": c.Committer.Decode(data) + case headermergetag: + c.MergeTag += string(data) + "\n" + mergetag = true + case headerencoding: + c.Encoding = MessageEncoding(data) case headerpgp: c.PGPSignature += string(data) + "\n" pgpsig = true @@ -286,6 +319,28 @@ func (c *Commit) encode(o plumbing.EncodedObject, includeSig bool) (err error) { return err } + if c.MergeTag != "" { + if _, err = fmt.Fprint(w, "\n"+headermergetag+" "); err != nil { + return err + } + + // Split tag information lines and re-write with a left padding and + // newline. Use join for this so it's clear that a newline should not be + // added after this section. The newline will be added either as part of + // the PGP signature or the commit message. + mergetag := strings.TrimSuffix(c.MergeTag, "\n") + lines := strings.Split(mergetag, "\n") + if _, err = fmt.Fprint(w, strings.Join(lines, "\n ")); err != nil { + return err + } + } + + if string(c.Encoding) != "" && c.Encoding != defaultUtf8CommitMesageEncoding { + if _, err = fmt.Fprintf(w, "\n%s %s", headerencoding, c.Encoding); err != nil { + return err + } + } + if c.PGPSignature != "" && includeSig { if _, err = fmt.Fprint(w, "\n"+headerpgp+" "); err != nil { return err diff --git a/plumbing/object/commit_test.go b/plumbing/object/commit_test.go index 4b0f6b4..3e1fe1b 100644 --- a/plumbing/object/commit_test.go +++ b/plumbing/object/commit_test.go @@ -3,6 +3,7 @@ package object import ( "bytes" "context" + "fmt" "io" "strings" "time" @@ -197,6 +198,27 @@ func (s *SuiteCommit) TestPatchContext_ToNil(c *C) { } func (s *SuiteCommit) TestCommitEncodeDecodeIdempotent(c *C) { + pgpsignature := `-----BEGIN PGP SIGNATURE----- + +iQEcBAABAgAGBQJTZbQlAAoJEF0+sviABDDrZbQH/09PfE51KPVPlanr6q1v4/Ut +LQxfojUWiLQdg2ESJItkcuweYg+kc3HCyFejeDIBw9dpXt00rY26p05qrpnG+85b +hM1/PswpPLuBSr+oCIDj5GMC2r2iEKsfv2fJbNW8iWAXVLoWZRF8B0MfqX/YTMbm +ecorc4iXzQu7tupRihslbNkfvfciMnSDeSvzCpWAHl7h8Wj6hhqePmLm9lAYqnKp +8S5B/1SSQuEAjRZgI4IexpZoeKGVDptPHxLLS38fozsyi0QyDyzEgJxcJQVMXxVi +RUysgqjcpT8+iQM1PblGfHR4XAhuOqN5Fx06PSaFZhqvWFezJ28/CLyX5q+oIVk= +=EFTF +-----END PGP SIGNATURE----- +` + + tag := fmt.Sprintf(`object f000000000000000000000000000000000000000 +type commit +tag change +tagger Foo <foo@example.local> 1695827841 -0400 + +change +%s +`, pgpsignature) + ts, err := time.Parse(time.RFC3339, "2006-01-02T15:04:05-07:00") c.Assert(err, IsNil) commits := []*Commit{ @@ -206,6 +228,7 @@ func (s *SuiteCommit) TestCommitEncodeDecodeIdempotent(c *C) { Message: "Message\n\nFoo\nBar\nWith trailing blank lines\n\n", TreeHash: plumbing.NewHash("f000000000000000000000000000000000000001"), ParentHashes: []plumbing.Hash{plumbing.NewHash("f000000000000000000000000000000000000002")}, + Encoding: defaultUtf8CommitMesageEncoding, }, { Author: Signature{Name: "Foo", Email: "foo@example.local", When: ts}, @@ -218,6 +241,32 @@ func (s *SuiteCommit) TestCommitEncodeDecodeIdempotent(c *C) { plumbing.NewHash("f000000000000000000000000000000000000006"), plumbing.NewHash("f000000000000000000000000000000000000007"), }, + Encoding: MessageEncoding("ISO-8859-1"), + }, + { + Author: Signature{Name: "Foo", Email: "foo@example.local", When: ts}, + Committer: Signature{Name: "Bar", Email: "bar@example.local", When: ts}, + Message: "Testing mergetag\n\nHere, commit is not signed", + TreeHash: plumbing.NewHash("f000000000000000000000000000000000000001"), + ParentHashes: []plumbing.Hash{ + plumbing.NewHash("f000000000000000000000000000000000000002"), + plumbing.NewHash("f000000000000000000000000000000000000003"), + }, + MergeTag: tag, + Encoding: defaultUtf8CommitMesageEncoding, + }, + { + Author: Signature{Name: "Foo", Email: "foo@example.local", When: ts}, + Committer: Signature{Name: "Bar", Email: "bar@example.local", When: ts}, + Message: "Testing mergetag\n\nHere, commit is also signed", + TreeHash: plumbing.NewHash("f000000000000000000000000000000000000001"), + ParentHashes: []plumbing.Hash{ + plumbing.NewHash("f000000000000000000000000000000000000002"), + plumbing.NewHash("f000000000000000000000000000000000000003"), + }, + MergeTag: tag, + PGPSignature: pgpsignature, + Encoding: defaultUtf8CommitMesageEncoding, }, } for _, commit := range commits { @@ -485,7 +534,7 @@ func (s *SuiteCommit) TestMalformedHeader(c *C) { } func (s *SuiteCommit) TestEncodeWithoutSignature(c *C) { - //Similar to TestString since no signature + // Similar to TestString since no signature encoded := &plumbing.MemoryObject{} err := s.Commit.EncodeWithoutSignature(encoded) c.Assert(err, IsNil) diff --git a/plumbing/object/commitgraph/commitnode.go b/plumbing/object/commitgraph/commitnode.go index 7abc58b..47227d4 100644 --- a/plumbing/object/commitgraph/commitnode.go +++ b/plumbing/object/commitgraph/commitnode.go @@ -1,98 +1,102 @@ -package commitgraph
-
-import (
- "io"
- "time"
-
- "github.com/go-git/go-git/v5/plumbing"
- "github.com/go-git/go-git/v5/plumbing/object"
- "github.com/go-git/go-git/v5/plumbing/storer"
-)
-
-// CommitNode is generic interface encapsulating a lightweight commit object retrieved
-// from CommitNodeIndex
-type CommitNode interface {
- // ID returns the Commit object id referenced by the commit graph node.
- ID() plumbing.Hash
- // Tree returns the Tree referenced by the commit graph node.
- Tree() (*object.Tree, error)
- // CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node.
- CommitTime() time.Time
- // NumParents returns the number of parents in a commit.
- NumParents() int
- // ParentNodes return a CommitNodeIter for parents of specified node.
- ParentNodes() CommitNodeIter
- // ParentNode returns the ith parent of a commit.
- ParentNode(i int) (CommitNode, error)
- // ParentHashes returns hashes of the parent commits for a specified node
- ParentHashes() []plumbing.Hash
- // Generation returns the generation of the commit for reachability analysis.
- // Objects with newer generation are not reachable from objects of older generation.
- Generation() uint64
- // Commit returns the full commit object from the node
- Commit() (*object.Commit, error)
-}
-
-// CommitNodeIndex is generic interface encapsulating an index of CommitNode objects
-type CommitNodeIndex interface {
- // Get returns a commit node from a commit hash
- Get(hash plumbing.Hash) (CommitNode, error)
-}
-
-// CommitNodeIter is a generic closable interface for iterating over commit nodes.
-type CommitNodeIter interface {
- Next() (CommitNode, error)
- ForEach(func(CommitNode) error) error
- Close()
-}
-
-// parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex.
-type parentCommitNodeIter struct {
- node CommitNode
- i int
-}
-
-func newParentgraphCommitNodeIter(node CommitNode) CommitNodeIter {
- return &parentCommitNodeIter{node, 0}
-}
-
-// Next moves the iterator to the next commit and returns a pointer to it. If
-// there are no more commits, it returns io.EOF.
-func (iter *parentCommitNodeIter) Next() (CommitNode, error) {
- obj, err := iter.node.ParentNode(iter.i)
- if err == object.ErrParentNotFound {
- return nil, io.EOF
- }
- if err == nil {
- iter.i++
- }
-
- return obj, err
-}
-
-// ForEach call the cb function for each commit contained on this iter until
-// an error appends or the end of the iter is reached. If ErrStop is sent
-// the iteration is stopped but no error is returned. The iterator is closed.
-func (iter *parentCommitNodeIter) ForEach(cb func(CommitNode) error) error {
- for {
- obj, err := iter.Next()
- if err != nil {
- if err == io.EOF {
- return nil
- }
-
- return err
- }
-
- if err := cb(obj); err != nil {
- if err == storer.ErrStop {
- return nil
- }
-
- return err
- }
- }
-}
-
-func (iter *parentCommitNodeIter) Close() {
-}
+package commitgraph + +import ( + "io" + "time" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/plumbing/storer" +) + +// CommitNode is generic interface encapsulating a lightweight commit object retrieved +// from CommitNodeIndex +type CommitNode interface { + // ID returns the Commit object id referenced by the commit graph node. + ID() plumbing.Hash + // Tree returns the Tree referenced by the commit graph node. + Tree() (*object.Tree, error) + // CommitTime returns the Committer.When time of the Commit referenced by the commit graph node. + CommitTime() time.Time + // NumParents returns the number of parents in a commit. + NumParents() int + // ParentNodes return a CommitNodeIter for parents of specified node. + ParentNodes() CommitNodeIter + // ParentNode returns the ith parent of a commit. + ParentNode(i int) (CommitNode, error) + // ParentHashes returns hashes of the parent commits for a specified node + ParentHashes() []plumbing.Hash + // Generation returns the generation of the commit for reachability analysis. + // Objects with newer generation are not reachable from objects of older generation. + Generation() uint64 + // GenerationV2 stores the corrected commit date for the commits + // It combines the contents of the GDA2 and GDO2 sections of the commit-graph + // with the commit time portion of the CDAT section. + GenerationV2() uint64 + // Commit returns the full commit object from the node + Commit() (*object.Commit, error) +} + +// CommitNodeIndex is generic interface encapsulating an index of CommitNode objects +type CommitNodeIndex interface { + // Get returns a commit node from a commit hash + Get(hash plumbing.Hash) (CommitNode, error) +} + +// CommitNodeIter is a generic closable interface for iterating over commit nodes. +type CommitNodeIter interface { + Next() (CommitNode, error) + ForEach(func(CommitNode) error) error + Close() +} + +// parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex. +type parentCommitNodeIter struct { + node CommitNode + i int +} + +func newParentgraphCommitNodeIter(node CommitNode) CommitNodeIter { + return &parentCommitNodeIter{node, 0} +} + +// Next moves the iterator to the next commit and returns a pointer to it. If +// there are no more commits, it returns io.EOF. +func (iter *parentCommitNodeIter) Next() (CommitNode, error) { + obj, err := iter.node.ParentNode(iter.i) + if err == object.ErrParentNotFound { + return nil, io.EOF + } + if err == nil { + iter.i++ + } + + return obj, err +} + +// ForEach call the cb function for each commit contained on this iter until +// an error appends or the end of the iter is reached. If ErrStop is sent +// the iteration is stopped but no error is returned. The iterator is closed. +func (iter *parentCommitNodeIter) ForEach(cb func(CommitNode) error) error { + for { + obj, err := iter.Next() + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + + if err := cb(obj); err != nil { + if err == storer.ErrStop { + return nil + } + + return err + } + } +} + +func (iter *parentCommitNodeIter) Close() { +} diff --git a/plumbing/object/commitgraph/commitnode_graph.go b/plumbing/object/commitgraph/commitnode_graph.go index 8e5d4e3..0f51e3b 100644 --- a/plumbing/object/commitgraph/commitnode_graph.go +++ b/plumbing/object/commitgraph/commitnode_graph.go @@ -1,131 +1,140 @@ -package commitgraph
-
-import (
- "fmt"
- "time"
-
- "github.com/go-git/go-git/v5/plumbing"
- "github.com/go-git/go-git/v5/plumbing/format/commitgraph"
- "github.com/go-git/go-git/v5/plumbing/object"
- "github.com/go-git/go-git/v5/plumbing/storer"
-)
-
-// graphCommitNode is a reduced representation of Commit as presented in the commit
-// graph file (commitgraph.Node). It is merely useful as an optimization for walking
-// the commit graphs.
-//
-// graphCommitNode implements the CommitNode interface.
-type graphCommitNode struct {
- // Hash for the Commit object
- hash plumbing.Hash
- // Index of the node in the commit graph file
- index int
-
- commitData *commitgraph.CommitData
- gci *graphCommitNodeIndex
-}
-
-// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit
-// graph files and the object store.
-//
-// graphCommitNodeIndex implements the CommitNodeIndex interface
-type graphCommitNodeIndex struct {
- commitGraph commitgraph.Index
- s storer.EncodedObjectStorer
-}
-
-// NewGraphCommitNodeIndex returns CommitNodeIndex implementation that uses commit-graph
-// files as backing storage and falls back to object storage when necessary
-func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex {
- return &graphCommitNodeIndex{commitGraph, s}
-}
-
-func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) {
- // Check the commit graph first
- parentIndex, err := gci.commitGraph.GetIndexByHash(hash)
- if err == nil {
- parent, err := gci.commitGraph.GetCommitDataByIndex(parentIndex)
- if err != nil {
- return nil, err
- }
-
- return &graphCommitNode{
- hash: hash,
- index: parentIndex,
- commitData: parent,
- gci: gci,
- }, nil
- }
-
- // Fallback to loading full commit object
- commit, err := object.GetCommit(gci.s, hash)
- if err != nil {
- return nil, err
- }
-
- return &objectCommitNode{
- nodeIndex: gci,
- commit: commit,
- }, nil
-}
-
-func (c *graphCommitNode) ID() plumbing.Hash {
- return c.hash
-}
-
-func (c *graphCommitNode) Tree() (*object.Tree, error) {
- return object.GetTree(c.gci.s, c.commitData.TreeHash)
-}
-
-func (c *graphCommitNode) CommitTime() time.Time {
- return c.commitData.When
-}
-
-func (c *graphCommitNode) NumParents() int {
- return len(c.commitData.ParentIndexes)
-}
-
-func (c *graphCommitNode) ParentNodes() CommitNodeIter {
- return newParentgraphCommitNodeIter(c)
-}
-
-func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) {
- if i < 0 || i >= len(c.commitData.ParentIndexes) {
- return nil, object.ErrParentNotFound
- }
-
- parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i])
- if err != nil {
- return nil, err
- }
-
- return &graphCommitNode{
- hash: c.commitData.ParentHashes[i],
- index: c.commitData.ParentIndexes[i],
- commitData: parent,
- gci: c.gci,
- }, nil
-}
-
-func (c *graphCommitNode) ParentHashes() []plumbing.Hash {
- return c.commitData.ParentHashes
-}
-
-func (c *graphCommitNode) Generation() uint64 {
- // If the commit-graph file was generated with older Git version that
- // set the generation to zero for every commit the generation assumption
- // is still valid. It is just less useful.
- return uint64(c.commitData.Generation)
-}
-
-func (c *graphCommitNode) Commit() (*object.Commit, error) {
- return object.GetCommit(c.gci.s, c.hash)
-}
-
-func (c *graphCommitNode) String() string {
- return fmt.Sprintf(
- "%s %s\nDate: %s",
- plumbing.CommitObject, c.ID(),
- c.CommitTime().Format(object.DateFormat),
- )
-}
+package commitgraph + +import ( + "fmt" + "time" + + "github.com/go-git/go-git/v5/plumbing" + commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/plumbing/storer" +) + +// graphCommitNode is a reduced representation of Commit as presented in the commit +// graph file (commitgraph.Node). It is merely useful as an optimization for walking +// the commit graphs. +// +// graphCommitNode implements the CommitNode interface. +type graphCommitNode struct { + // Hash for the Commit object + hash plumbing.Hash + // Index of the node in the commit graph file + index uint32 + + commitData *commitgraph.CommitData + gci *graphCommitNodeIndex +} + +// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit +// graph files and the object store. +// +// graphCommitNodeIndex implements the CommitNodeIndex interface +type graphCommitNodeIndex struct { + commitGraph commitgraph.Index + s storer.EncodedObjectStorer +} + +// NewGraphCommitNodeIndex returns CommitNodeIndex implementation that uses commit-graph +// files as backing storage and falls back to object storage when necessary +func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { + return &graphCommitNodeIndex{commitGraph, s} +} + +func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + if gci.commitGraph != nil { + // Check the commit graph first + parentIndex, err := gci.commitGraph.GetIndexByHash(hash) + if err == nil { + parent, err := gci.commitGraph.GetCommitDataByIndex(parentIndex) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: hash, + index: parentIndex, + commitData: parent, + gci: gci, + }, nil + } + } + + // Fallback to loading full commit object + commit, err := object.GetCommit(gci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: gci, + commit: commit, + }, nil +} + +func (c *graphCommitNode) ID() plumbing.Hash { + return c.hash +} + +func (c *graphCommitNode) Tree() (*object.Tree, error) { + return object.GetTree(c.gci.s, c.commitData.TreeHash) +} + +func (c *graphCommitNode) CommitTime() time.Time { + return c.commitData.When +} + +func (c *graphCommitNode) NumParents() int { + return len(c.commitData.ParentIndexes) +} + +func (c *graphCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commitData.ParentIndexes) { + return nil, object.ErrParentNotFound + } + + parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i]) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: c.commitData.ParentHashes[i], + index: c.commitData.ParentIndexes[i], + commitData: parent, + gci: c.gci, + }, nil +} + +func (c *graphCommitNode) ParentHashes() []plumbing.Hash { + return c.commitData.ParentHashes +} + +func (c *graphCommitNode) Generation() uint64 { + // If the commit-graph file was generated with older Git version that + // set the generation to zero for every commit the generation assumption + // is still valid. It is just less useful. + return c.commitData.Generation +} + +func (c *graphCommitNode) GenerationV2() uint64 { + // If the commit-graph file was generated with older Git version that + // set the generation to zero for every commit the generation assumption + // is still valid. It is just less useful. + return c.commitData.GenerationV2 +} + +func (c *graphCommitNode) Commit() (*object.Commit, error) { + return object.GetCommit(c.gci.s, c.hash) +} + +func (c *graphCommitNode) String() string { + return fmt.Sprintf( + "%s %s\nDate: %s", + plumbing.CommitObject, c.ID(), + c.CommitTime().Format(object.DateFormat), + ) +} diff --git a/plumbing/object/commitgraph/commitnode_object.go b/plumbing/object/commitgraph/commitnode_object.go index bdf8cb7..7256bed 100644 --- a/plumbing/object/commitgraph/commitnode_object.go +++ b/plumbing/object/commitgraph/commitnode_object.go @@ -1,90 +1,97 @@ -package commitgraph
-
-import (
- "math"
- "time"
-
- "github.com/go-git/go-git/v5/plumbing"
- "github.com/go-git/go-git/v5/plumbing/object"
- "github.com/go-git/go-git/v5/plumbing/storer"
-)
-
-// objectCommitNode is a representation of Commit as presented in the GIT object format.
-//
-// objectCommitNode implements the CommitNode interface.
-type objectCommitNode struct {
- nodeIndex CommitNodeIndex
- commit *object.Commit
-}
-
-// NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses
-// only object storage to load the nodes
-func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex {
- return &objectCommitNodeIndex{s}
-}
-
-func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) {
- commit, err := object.GetCommit(oci.s, hash)
- if err != nil {
- return nil, err
- }
-
- return &objectCommitNode{
- nodeIndex: oci,
- commit: commit,
- }, nil
-}
-
-// objectCommitNodeIndex is an index that can load CommitNode objects only from the
-// object store.
-//
-// objectCommitNodeIndex implements the CommitNodeIndex interface
-type objectCommitNodeIndex struct {
- s storer.EncodedObjectStorer
-}
-
-func (c *objectCommitNode) CommitTime() time.Time {
- return c.commit.Committer.When
-}
-
-func (c *objectCommitNode) ID() plumbing.Hash {
- return c.commit.ID()
-}
-
-func (c *objectCommitNode) Tree() (*object.Tree, error) {
- return c.commit.Tree()
-}
-
-func (c *objectCommitNode) NumParents() int {
- return c.commit.NumParents()
-}
-
-func (c *objectCommitNode) ParentNodes() CommitNodeIter {
- return newParentgraphCommitNodeIter(c)
-}
-
-func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) {
- if i < 0 || i >= len(c.commit.ParentHashes) {
- return nil, object.ErrParentNotFound
- }
-
- // Note: It's necessary to go through CommitNodeIndex here to ensure
- // that if the commit-graph file covers only part of the history we
- // start using it when that part is reached.
- return c.nodeIndex.Get(c.commit.ParentHashes[i])
-}
-
-func (c *objectCommitNode) ParentHashes() []plumbing.Hash {
- return c.commit.ParentHashes
-}
-
-func (c *objectCommitNode) Generation() uint64 {
- // Commit nodes representing objects outside of the commit graph can never
- // be reached by objects from the commit-graph thus we return the highest
- // possible value.
- return math.MaxUint64
-}
-
-func (c *objectCommitNode) Commit() (*object.Commit, error) {
- return c.commit, nil
-}
+package commitgraph + +import ( + "math" + "time" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/plumbing/storer" +) + +// objectCommitNode is a representation of Commit as presented in the GIT object format. +// +// objectCommitNode implements the CommitNode interface. +type objectCommitNode struct { + nodeIndex CommitNodeIndex + commit *object.Commit +} + +// NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses +// only object storage to load the nodes +func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { + return &objectCommitNodeIndex{s} +} + +func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + commit, err := object.GetCommit(oci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: oci, + commit: commit, + }, nil +} + +// objectCommitNodeIndex is an index that can load CommitNode objects only from the +// object store. +// +// objectCommitNodeIndex implements the CommitNodeIndex interface +type objectCommitNodeIndex struct { + s storer.EncodedObjectStorer +} + +func (c *objectCommitNode) CommitTime() time.Time { + return c.commit.Committer.When +} + +func (c *objectCommitNode) ID() plumbing.Hash { + return c.commit.ID() +} + +func (c *objectCommitNode) Tree() (*object.Tree, error) { + return c.commit.Tree() +} + +func (c *objectCommitNode) NumParents() int { + return c.commit.NumParents() +} + +func (c *objectCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commit.ParentHashes) { + return nil, object.ErrParentNotFound + } + + // Note: It's necessary to go through CommitNodeIndex here to ensure + // that if the commit-graph file covers only part of the history we + // start using it when that part is reached. + return c.nodeIndex.Get(c.commit.ParentHashes[i]) +} + +func (c *objectCommitNode) ParentHashes() []plumbing.Hash { + return c.commit.ParentHashes +} + +func (c *objectCommitNode) Generation() uint64 { + // Commit nodes representing objects outside of the commit graph can never + // be reached by objects from the commit-graph thus we return the highest + // possible value. + return math.MaxUint64 +} + +func (c *objectCommitNode) GenerationV2() uint64 { + // Commit nodes representing objects outside of the commit graph can never + // be reached by objects from the commit-graph thus we return the highest + // possible value. + return math.MaxUint64 +} + +func (c *objectCommitNode) Commit() (*object.Commit, error) { + return c.commit, nil +} diff --git a/plumbing/object/commitgraph/commitnode_test.go b/plumbing/object/commitgraph/commitnode_test.go index 6c9a643..441ff6f 100644 --- a/plumbing/object/commitgraph/commitnode_test.go +++ b/plumbing/object/commitgraph/commitnode_test.go @@ -1,148 +1,153 @@ -package commitgraph
-
-import (
- "path"
- "testing"
-
- "github.com/go-git/go-git/v5/plumbing"
- "github.com/go-git/go-git/v5/plumbing/cache"
- "github.com/go-git/go-git/v5/plumbing/format/commitgraph"
- "github.com/go-git/go-git/v5/plumbing/format/packfile"
- "github.com/go-git/go-git/v5/storage/filesystem"
-
- fixtures "github.com/go-git/go-git-fixtures/v4"
- . "gopkg.in/check.v1"
-)
-
-func Test(t *testing.T) { TestingT(t) }
-
-type CommitNodeSuite struct {
- fixtures.Suite
-}
-
-var _ = Suite(&CommitNodeSuite{})
-
-func unpackRepositry(f *fixtures.Fixture) *filesystem.Storage {
- storer := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault())
- p := f.Packfile()
- defer p.Close()
- packfile.UpdateObjectStorage(storer, p)
- return storer
-}
-
-func testWalker(c *C, nodeIndex CommitNodeIndex) {
- head, err := nodeIndex.Get(plumbing.NewHash("b9d69064b190e7aedccf84731ca1d917871f8a1c"))
- c.Assert(err, IsNil)
-
- iter := NewCommitNodeIterCTime(
- head,
- nil,
- nil,
- )
-
- var commits []CommitNode
- iter.ForEach(func(c CommitNode) error {
- commits = append(commits, c)
- return nil
- })
-
- c.Assert(commits, HasLen, 9)
-
- expected := []string{
- "b9d69064b190e7aedccf84731ca1d917871f8a1c",
- "6f6c5d2be7852c782be1dd13e36496dd7ad39560",
- "a45273fe2d63300e1962a9e26a6b15c276cd7082",
- "c0edf780dd0da6a65a7a49a86032fcf8a0c2d467",
- "bb13916df33ed23004c3ce9ed3b8487528e655c1",
- "03d2c021ff68954cf3ef0a36825e194a4b98f981",
- "ce275064ad67d51e99f026084e20827901a8361c",
- "e713b52d7e13807e87a002e812041f248db3f643",
- "347c91919944a68e9413581a1bc15519550a3afe",
- }
- for i, commit := range commits {
- c.Assert(commit.ID().String(), Equals, expected[i])
- }
-}
-
-func testParents(c *C, nodeIndex CommitNodeIndex) {
- merge3, err := nodeIndex.Get(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560"))
- c.Assert(err, IsNil)
-
- var parents []CommitNode
- merge3.ParentNodes().ForEach(func(c CommitNode) error {
- parents = append(parents, c)
- return nil
- })
-
- c.Assert(parents, HasLen, 3)
-
- expected := []string{
- "ce275064ad67d51e99f026084e20827901a8361c",
- "bb13916df33ed23004c3ce9ed3b8487528e655c1",
- "a45273fe2d63300e1962a9e26a6b15c276cd7082",
- }
- for i, parent := range parents {
- c.Assert(parent.ID().String(), Equals, expected[i])
- }
-}
-
-func testCommitAndTree(c *C, nodeIndex CommitNodeIndex) {
- merge3node, err := nodeIndex.Get(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560"))
- c.Assert(err, IsNil)
- merge3commit, err := merge3node.Commit()
- c.Assert(err, IsNil)
- c.Assert(merge3node.ID().String(), Equals, merge3commit.ID().String())
- tree, err := merge3node.Tree()
- c.Assert(err, IsNil)
- c.Assert(tree.ID().String(), Equals, merge3commit.TreeHash.String())
-}
-
-func (s *CommitNodeSuite) TestObjectGraph(c *C) {
- f := fixtures.ByTag("commit-graph").One()
- storer := unpackRepositry(f)
-
- nodeIndex := NewObjectCommitNodeIndex(storer)
- testWalker(c, nodeIndex)
- testParents(c, nodeIndex)
- testCommitAndTree(c, nodeIndex)
-}
-
-func (s *CommitNodeSuite) TestCommitGraph(c *C) {
- f := fixtures.ByTag("commit-graph").One()
- storer := unpackRepositry(f)
- reader, err := storer.Filesystem().Open(path.Join("objects", "info", "commit-graph"))
- c.Assert(err, IsNil)
- defer reader.Close()
- index, err := commitgraph.OpenFileIndex(reader)
- c.Assert(err, IsNil)
-
- nodeIndex := NewGraphCommitNodeIndex(index, storer)
- testWalker(c, nodeIndex)
- testParents(c, nodeIndex)
- testCommitAndTree(c, nodeIndex)
-}
-
-func (s *CommitNodeSuite) TestMixedGraph(c *C) {
- f := fixtures.ByTag("commit-graph").One()
- storer := unpackRepositry(f)
-
- // Take the commit-graph file and copy it to memory index without the last commit
- reader, err := storer.Filesystem().Open(path.Join("objects", "info", "commit-graph"))
- c.Assert(err, IsNil)
- defer reader.Close()
- fileIndex, err := commitgraph.OpenFileIndex(reader)
- c.Assert(err, IsNil)
- memoryIndex := commitgraph.NewMemoryIndex()
- for i, hash := range fileIndex.Hashes() {
- if hash.String() != "b9d69064b190e7aedccf84731ca1d917871f8a1c" {
- node, err := fileIndex.GetCommitDataByIndex(i)
- c.Assert(err, IsNil)
- memoryIndex.Add(hash, node)
- }
- }
-
- nodeIndex := NewGraphCommitNodeIndex(memoryIndex, storer)
- testWalker(c, nodeIndex)
- testParents(c, nodeIndex)
- testCommitAndTree(c, nodeIndex)
-}
+package commitgraph + +import ( + "path" + "testing" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/cache" + commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2" + "github.com/go-git/go-git/v5/plumbing/format/packfile" + "github.com/go-git/go-git/v5/storage/filesystem" + + fixtures "github.com/go-git/go-git-fixtures/v4" + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type CommitNodeSuite struct { + fixtures.Suite +} + +var _ = Suite(&CommitNodeSuite{}) + +func unpackRepository(f *fixtures.Fixture) *filesystem.Storage { + storer := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) + p := f.Packfile() + defer p.Close() + packfile.UpdateObjectStorage(storer, p) + return storer +} + +func testWalker(c *C, nodeIndex CommitNodeIndex) { + head, err := nodeIndex.Get(plumbing.NewHash("b9d69064b190e7aedccf84731ca1d917871f8a1c")) + c.Assert(err, IsNil) + + iter := NewCommitNodeIterCTime( + head, + nil, + nil, + ) + + var commits []CommitNode + iter.ForEach(func(c CommitNode) error { + commits = append(commits, c) + return nil + }) + + c.Assert(commits, HasLen, 9) + + expected := []string{ + "b9d69064b190e7aedccf84731ca1d917871f8a1c", + "6f6c5d2be7852c782be1dd13e36496dd7ad39560", + "a45273fe2d63300e1962a9e26a6b15c276cd7082", + "c0edf780dd0da6a65a7a49a86032fcf8a0c2d467", + "bb13916df33ed23004c3ce9ed3b8487528e655c1", + "03d2c021ff68954cf3ef0a36825e194a4b98f981", + "ce275064ad67d51e99f026084e20827901a8361c", + "e713b52d7e13807e87a002e812041f248db3f643", + "347c91919944a68e9413581a1bc15519550a3afe", + } + for i, commit := range commits { + c.Assert(commit.ID().String(), Equals, expected[i]) + } +} + +func testParents(c *C, nodeIndex CommitNodeIndex) { + merge3, err := nodeIndex.Get(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560")) + c.Assert(err, IsNil) + + var parents []CommitNode + merge3.ParentNodes().ForEach(func(c CommitNode) error { + parents = append(parents, c) + return nil + }) + + c.Assert(parents, HasLen, 3) + + expected := []string{ + "ce275064ad67d51e99f026084e20827901a8361c", + "bb13916df33ed23004c3ce9ed3b8487528e655c1", + "a45273fe2d63300e1962a9e26a6b15c276cd7082", + } + for i, parent := range parents { + c.Assert(parent.ID().String(), Equals, expected[i]) + } +} + +func testCommitAndTree(c *C, nodeIndex CommitNodeIndex) { + merge3node, err := nodeIndex.Get(plumbing.NewHash("6f6c5d2be7852c782be1dd13e36496dd7ad39560")) + c.Assert(err, IsNil) + merge3commit, err := merge3node.Commit() + c.Assert(err, IsNil) + c.Assert(merge3node.ID().String(), Equals, merge3commit.ID().String()) + tree, err := merge3node.Tree() + c.Assert(err, IsNil) + c.Assert(tree.ID().String(), Equals, merge3commit.TreeHash.String()) +} + +func (s *CommitNodeSuite) TestObjectGraph(c *C) { + f := fixtures.ByTag("commit-graph").One() + storer := unpackRepository(f) + + nodeIndex := NewObjectCommitNodeIndex(storer) + testWalker(c, nodeIndex) + testParents(c, nodeIndex) + testCommitAndTree(c, nodeIndex) +} + +func (s *CommitNodeSuite) TestCommitGraph(c *C) { + f := fixtures.ByTag("commit-graph").One() + storer := unpackRepository(f) + reader, err := storer.Filesystem().Open(path.Join("objects", "info", "commit-graph")) + c.Assert(err, IsNil) + defer reader.Close() + index, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + defer index.Close() + + nodeIndex := NewGraphCommitNodeIndex(index, storer) + testWalker(c, nodeIndex) + testParents(c, nodeIndex) + testCommitAndTree(c, nodeIndex) +} + +func (s *CommitNodeSuite) TestMixedGraph(c *C) { + f := fixtures.ByTag("commit-graph").One() + storer := unpackRepository(f) + + // Take the commit-graph file and copy it to memory index without the last commit + reader, err := storer.Filesystem().Open(path.Join("objects", "info", "commit-graph")) + c.Assert(err, IsNil) + defer reader.Close() + fileIndex, err := commitgraph.OpenFileIndex(reader) + c.Assert(err, IsNil) + defer fileIndex.Close() + + memoryIndex := commitgraph.NewMemoryIndex() + defer memoryIndex.Close() + + for i, hash := range fileIndex.Hashes() { + if hash.String() != "b9d69064b190e7aedccf84731ca1d917871f8a1c" { + node, err := fileIndex.GetCommitDataByIndex(uint32(i)) + c.Assert(err, IsNil) + memoryIndex.Add(hash, node) + } + } + + nodeIndex := NewGraphCommitNodeIndex(memoryIndex, storer) + testWalker(c, nodeIndex) + testParents(c, nodeIndex) + testCommitAndTree(c, nodeIndex) +} diff --git a/plumbing/object/commitgraph/commitnode_walker_author_order.go b/plumbing/object/commitgraph/commitnode_walker_author_order.go new file mode 100644 index 0000000..f5b23cc --- /dev/null +++ b/plumbing/object/commitgraph/commitnode_walker_author_order.go @@ -0,0 +1,61 @@ +package commitgraph + +import ( + "github.com/go-git/go-git/v5/plumbing" + + "github.com/emirpasic/gods/trees/binaryheap" +) + +// NewCommitNodeIterAuthorDateOrder returns a CommitNodeIter that walks the commit history, +// starting at the given commit and visiting its parents in Author Time order but with the +// constraint that no parent is emitted before its children are emitted. +// +// This matches `git log --author-order` +// +// This ordering requires that commit objects need to be loaded into memory - thus this +// ordering is likely to be slower than other orderings. +func NewCommitNodeIterAuthorDateOrder(c CommitNode, + seenExternal map[plumbing.Hash]bool, + ignore []plumbing.Hash, +) CommitNodeIter { + seen := make(map[plumbing.Hash]struct{}) + for _, h := range ignore { + seen[h] = struct{}{} + } + for h, ext := range seenExternal { + if ext { + seen[h] = struct{}{} + } + } + inCounts := make(map[plumbing.Hash]int) + + exploreHeap := &commitNodeHeap{binaryheap.NewWith(generationAndDateOrderComparator)} + exploreHeap.Push(c) + + visitHeap := &commitNodeHeap{binaryheap.NewWith(func(left, right interface{}) int { + leftCommit, err := left.(CommitNode).Commit() + if err != nil { + return -1 + } + rightCommit, err := right.(CommitNode).Commit() + if err != nil { + return -1 + } + + switch { + case rightCommit.Author.When.Before(leftCommit.Author.When): + return -1 + case leftCommit.Author.When.Before(rightCommit.Author.When): + return 1 + } + return 0 + })} + visitHeap.Push(c) + + return &commitNodeIteratorTopological{ + exploreStack: exploreHeap, + visitStack: visitHeap, + inCounts: inCounts, + ignore: seen, + } +} diff --git a/plumbing/object/commitgraph/commitnode_walker_ctime.go b/plumbing/object/commitgraph/commitnode_walker_ctime.go index 281f10b..3ab9e6e 100644 --- a/plumbing/object/commitgraph/commitnode_walker_ctime.go +++ b/plumbing/object/commitgraph/commitnode_walker_ctime.go @@ -1,105 +1,106 @@ -package commitgraph
-
-import (
- "io"
-
- "github.com/go-git/go-git/v5/plumbing"
- "github.com/go-git/go-git/v5/plumbing/storer"
-
- "github.com/emirpasic/gods/trees/binaryheap"
-)
-
-type commitNodeIteratorByCTime struct {
- heap *binaryheap.Heap
- seenExternal map[plumbing.Hash]bool
- seen map[plumbing.Hash]bool
-}
-
-// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history,
-// starting at the given commit and visiting its parents while preserving Committer Time order.
-// this appears to be the closest order to `git log`
-// The given callback will be called for each visited commit. Each commit will
-// be visited only once. If the callback returns an error, walking will stop
-// and will return the error. Other errors might be returned if the history
-// cannot be traversed (e.g. missing objects). Ignore allows to skip some
-// commits from being iterated.
-func NewCommitNodeIterCTime(
- c CommitNode,
- seenExternal map[plumbing.Hash]bool,
- ignore []plumbing.Hash,
-) CommitNodeIter {
- seen := make(map[plumbing.Hash]bool)
- for _, h := range ignore {
- seen[h] = true
- }
-
- heap := binaryheap.NewWith(func(a, b interface{}) int {
- if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) {
- return 1
- }
- return -1
- })
-
- heap.Push(c)
-
- return &commitNodeIteratorByCTime{
- heap: heap,
- seenExternal: seenExternal,
- seen: seen,
- }
-}
-
-func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) {
- var c CommitNode
- for {
- cIn, ok := w.heap.Pop()
- if !ok {
- return nil, io.EOF
- }
- c = cIn.(CommitNode)
- cID := c.ID()
-
- if w.seen[cID] || w.seenExternal[cID] {
- continue
- }
-
- w.seen[cID] = true
-
- for i, h := range c.ParentHashes() {
- if w.seen[h] || w.seenExternal[h] {
- continue
- }
- pc, err := c.ParentNode(i)
- if err != nil {
- return nil, err
- }
- w.heap.Push(pc)
- }
-
- return c, nil
- }
-}
-
-func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error {
- for {
- c, err := w.Next()
- if err == io.EOF {
- break
- }
- if err != nil {
- return err
- }
-
- err = cb(c)
- if err == storer.ErrStop {
- break
- }
- if err != nil {
- return err
- }
- }
-
- return nil
-}
-
-func (w *commitNodeIteratorByCTime) Close() {}
+package commitgraph + +import ( + "io" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/storer" + + "github.com/emirpasic/gods/trees/binaryheap" +) + +type commitNodeIteratorByCTime struct { + heap *binaryheap.Heap + seenExternal map[plumbing.Hash]bool + seen map[plumbing.Hash]bool +} + +// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history, +// starting at the given commit and visiting its parents while preserving Committer Time order. +// this is close in order to `git log` but does not guarantee topological order and will +// order things incorrectly occasionally. +// The given callback will be called for each visited commit. Each commit will +// be visited only once. If the callback returns an error, walking will stop +// and will return the error. Other errors might be returned if the history +// cannot be traversed (e.g. missing objects). Ignore allows to skip some +// commits from being iterated. +func NewCommitNodeIterCTime( + c CommitNode, + seenExternal map[plumbing.Hash]bool, + ignore []plumbing.Hash, +) CommitNodeIter { + seen := make(map[plumbing.Hash]bool) + for _, h := range ignore { + seen[h] = true + } + + heap := binaryheap.NewWith(func(a, b interface{}) int { + if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) { + return 1 + } + return -1 + }) + + heap.Push(c) + + return &commitNodeIteratorByCTime{ + heap: heap, + seenExternal: seenExternal, + seen: seen, + } +} + +func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) { + var c CommitNode + for { + cIn, ok := w.heap.Pop() + if !ok { + return nil, io.EOF + } + c = cIn.(CommitNode) + cID := c.ID() + + if w.seen[cID] || w.seenExternal[cID] { + continue + } + + w.seen[cID] = true + + for i, h := range c.ParentHashes() { + if w.seen[h] || w.seenExternal[h] { + continue + } + pc, err := c.ParentNode(i) + if err != nil { + return nil, err + } + w.heap.Push(pc) + } + + return c, nil + } +} + +func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error { + for { + c, err := w.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + err = cb(c) + if err == storer.ErrStop { + break + } + if err != nil { + return err + } + } + + return nil +} + +func (w *commitNodeIteratorByCTime) Close() {} diff --git a/plumbing/object/commitgraph/commitnode_walker_date_order.go b/plumbing/object/commitgraph/commitnode_walker_date_order.go new file mode 100644 index 0000000..659a4fa --- /dev/null +++ b/plumbing/object/commitgraph/commitnode_walker_date_order.go @@ -0,0 +1,41 @@ +package commitgraph + +import ( + "github.com/go-git/go-git/v5/plumbing" + + "github.com/emirpasic/gods/trees/binaryheap" +) + +// NewCommitNodeIterDateOrder returns a CommitNodeIter that walks the commit history, +// starting at the given commit and visiting its parents in Committer Time and Generation order, +// but with the constraint that no parent is emitted before its children are emitted. +// +// This matches `git log --date-order` +func NewCommitNodeIterDateOrder(c CommitNode, + seenExternal map[plumbing.Hash]bool, + ignore []plumbing.Hash, +) CommitNodeIter { + seen := make(map[plumbing.Hash]struct{}) + for _, h := range ignore { + seen[h] = struct{}{} + } + for h, ext := range seenExternal { + if ext { + seen[h] = struct{}{} + } + } + inCounts := make(map[plumbing.Hash]int) + + exploreHeap := &commitNodeHeap{binaryheap.NewWith(generationAndDateOrderComparator)} + exploreHeap.Push(c) + + visitHeap := &commitNodeHeap{binaryheap.NewWith(generationAndDateOrderComparator)} + visitHeap.Push(c) + + return &commitNodeIteratorTopological{ + exploreStack: exploreHeap, + visitStack: visitHeap, + inCounts: inCounts, + ignore: seen, + } +} diff --git a/plumbing/object/commitgraph/commitnode_walker_helper.go b/plumbing/object/commitgraph/commitnode_walker_helper.go new file mode 100644 index 0000000..c54f6ca --- /dev/null +++ b/plumbing/object/commitgraph/commitnode_walker_helper.go @@ -0,0 +1,164 @@ +package commitgraph + +import ( + "math" + + "github.com/go-git/go-git/v5/plumbing" + + "github.com/emirpasic/gods/trees/binaryheap" +) + +// commitNodeStackable represents a common interface between heaps and stacks +type commitNodeStackable interface { + Push(c CommitNode) + Pop() (CommitNode, bool) + Peek() (CommitNode, bool) + Size() int +} + +// commitNodeLifo is a stack implementation using an underlying slice +type commitNodeLifo struct { + l []CommitNode +} + +// Push pushes a new CommitNode to the stack +func (l *commitNodeLifo) Push(c CommitNode) { + l.l = append(l.l, c) +} + +// Pop pops the most recently added CommitNode from the stack +func (l *commitNodeLifo) Pop() (CommitNode, bool) { + if len(l.l) == 0 { + return nil, false + } + c := l.l[len(l.l)-1] + l.l = l.l[:len(l.l)-1] + return c, true +} + +// Peek returns the most recently added CommitNode from the stack without removing it +func (l *commitNodeLifo) Peek() (CommitNode, bool) { + if len(l.l) == 0 { + return nil, false + } + return l.l[len(l.l)-1], true +} + +// Size returns the number of CommitNodes in the stack +func (l *commitNodeLifo) Size() int { + return len(l.l) +} + +// commitNodeHeap is a stack implementation using an underlying binary heap +type commitNodeHeap struct { + *binaryheap.Heap +} + +// Push pushes a new CommitNode to the heap +func (h *commitNodeHeap) Push(c CommitNode) { + h.Heap.Push(c) +} + +// Pop removes top element on heap and returns it, or nil if heap is empty. +// Second return parameter is true, unless the heap was empty and there was nothing to pop. +func (h *commitNodeHeap) Pop() (CommitNode, bool) { + c, ok := h.Heap.Pop() + if !ok { + return nil, false + } + return c.(CommitNode), true +} + +// Peek returns top element on the heap without removing it, or nil if heap is empty. +// Second return parameter is true, unless the heap was empty and there was nothing to peek. +func (h *commitNodeHeap) Peek() (CommitNode, bool) { + c, ok := h.Heap.Peek() + if !ok { + return nil, false + } + return c.(CommitNode), true +} + +// Size returns number of elements within the heap. +func (h *commitNodeHeap) Size() int { + return h.Heap.Size() +} + +// generationAndDateOrderComparator compares two CommitNode objects based on their generation and commit time. +// If the left CommitNode object is in a higher generation or is newer than the right one, it returns a -1. +// If the left CommitNode object is in a lower generation or is older than the right one, it returns a 1. +// If the two CommitNode objects have the same commit time and generation, it returns 0. +func generationAndDateOrderComparator(left, right interface{}) int { + leftCommit := left.(CommitNode) + rightCommit := right.(CommitNode) + + // if GenerationV2 is MaxUint64, then the node is not in the graph + if leftCommit.GenerationV2() == math.MaxUint64 { + if rightCommit.GenerationV2() == math.MaxUint64 { + switch { + case rightCommit.CommitTime().Before(leftCommit.CommitTime()): + return -1 + case leftCommit.CommitTime().Before(rightCommit.CommitTime()): + return 1 + } + return 0 + } + // left is not in the graph, but right is, so it is newer than the right + return -1 + } + + if rightCommit.GenerationV2() == math.MaxInt64 { + // the right is not in the graph, therefore the left is before the right + return 1 + } + + if leftCommit.GenerationV2() == 0 || rightCommit.GenerationV2() == 0 { + // We need to assess generation and date + if leftCommit.Generation() < rightCommit.Generation() { + return 1 + } + if leftCommit.Generation() > rightCommit.Generation() { + return -1 + } + switch { + case rightCommit.CommitTime().Before(leftCommit.CommitTime()): + return -1 + case leftCommit.CommitTime().Before(rightCommit.CommitTime()): + return 1 + } + return 0 + } + + if leftCommit.GenerationV2() < rightCommit.GenerationV2() { + return 1 + } + if leftCommit.GenerationV2() > rightCommit.GenerationV2() { + return -1 + } + + return 0 +} + +// composeIgnores composes the ignore list with the provided seenExternal list +func composeIgnores(ignore []plumbing.Hash, seenExternal map[plumbing.Hash]bool) map[plumbing.Hash]struct{} { + if len(ignore) == 0 { + seen := make(map[plumbing.Hash]struct{}) + for h, ext := range seenExternal { + if ext { + seen[h] = struct{}{} + } + } + return seen + } + + seen := make(map[plumbing.Hash]struct{}) + for _, h := range ignore { + seen[h] = struct{}{} + } + for h, ext := range seenExternal { + if ext { + seen[h] = struct{}{} + } + } + return seen +} diff --git a/plumbing/object/commitgraph/commitnode_walker_test.go b/plumbing/object/commitgraph/commitnode_walker_test.go new file mode 100644 index 0000000..1e09c0b --- /dev/null +++ b/plumbing/object/commitgraph/commitnode_walker_test.go @@ -0,0 +1,187 @@ +package commitgraph + +import ( + "strings" + + "github.com/go-git/go-git/v5/plumbing" + commitgraph "github.com/go-git/go-git/v5/plumbing/format/commitgraph/v2" + + fixtures "github.com/go-git/go-git-fixtures/v4" + . "gopkg.in/check.v1" +) + +func (s *CommitNodeSuite) TestCommitNodeIter(c *C) { + f := fixtures.ByTag("commit-graph-chain-2").One() + + storer := unpackRepository(f) + + index, err := commitgraph.OpenChainOrFileIndex(storer.Filesystem()) + c.Assert(err, IsNil) + + nodeIndex := NewGraphCommitNodeIndex(index, storer) + + head, err := nodeIndex.Get(plumbing.NewHash("ec6f456c0e8c7058a29611429965aa05c190b54b")) + c.Assert(err, IsNil) + + testTopoOrder(c, head) + testDateOrder(c, head) + testAuthorDateOrder(c, head) +} + +func testTopoOrder(c *C, head CommitNode) { + iter := NewCommitNodeIterTopoOrder( + head, + nil, + nil, + ) + + var commits []string + iter.ForEach(func(c CommitNode) error { + commits = append(commits, c.ID().String()) + return nil + }) + c.Assert(commits, DeepEquals, strings.Split(`ec6f456c0e8c7058a29611429965aa05c190b54b +d82f291cde9987322c8a0c81a325e1ba6159684c +3048d280d2d5b258d9e582a226ff4bbed34fd5c9 +27aa8cdd2431068606741a589383c02c149ea625 +fa058d42fa3bc53f39108a56dad67157169b2191 +6c629843a1750a27c9af01ed2985f362f619c47a +d10a0e7c1f340a6cfc14540a5f8c508ce7e2eabf +d0a18ccd8eea3bdabc76d6dc5420af1ea30aae9f +cf2874632223220e0445abf0a7806dc772c0b37a +758ac33217f092bfcded4ad4774954ac054c9609 +214e1dca024fb6da5ed65564d2de734df5dc2127 +70923099e61fa33f0bc5256d2f938fa44c4df10e +bcaa1ac5644b16f1febb72f31e204720b7bb8934 +e1d8866ffa78fa16d2f39b0ba5344a7269ee5371 +2275fa7d0c75d20103f90b0e1616937d5a9fc5e6 +bdd9a92789d4a86b20a8d3df462df373f41acf23 +b359f11ea09e642695edcd114b463da4395b10c1 +6f43e8933ba3c04072d5d104acc6118aac3e52ee +ccafe8bd5f9dbfb8b98b0da03ced29608dcfdeec +939814f341fdd5d35e81a3845a33c4fedb19d2d2 +5f5ad88bf2babe506f927d64d2b7a1e1493dc2ae +a2014124ca3b3f9ff28fbab0a83ce3c71bf4622e +77906b653c3eb8a1cd5bd7254e161c00c6086d83 +465cba710284204f9851854587c2887c247222db +b9471b13256703d3f5eb88b280b4a16ce325ec1b +62925030859646daeeaf5a4d386a0c41e00dda8a +5f56aea0ca8b74215a5b982bca32236e1e28c76b +23148841baa5dbce48f6adcb7ddf83dcd97debb3 +c336d16298a017486c4164c40f8acb28afe64e84 +31eae7b619d166c366bf5df4991f04ba8cebea0a +d2a38b4a5965d529566566640519d03d2bd10f6c +b977a025ca21e3b5ca123d8093bd7917694f6da7 +35b585759cbf29f8ec428ef89da20705d59f99ec +c2bbf9fe8009b22d0f390f3c8c3f13937067590f +fc9f0643b21cfe571046e27e0c4565f3a1ee96c8 +c088fd6a7e1a38e9d5a9815265cb575bb08d08ff +5fddbeb678bd2c36c5e5c891ab8f2b143ced5baf +5d7303c49ac984a9fec60523f2d5297682e16646`, "\n")) +} + +func testDateOrder(c *C, head CommitNode) { + iter := NewCommitNodeIterDateOrder( + head, + nil, + nil, + ) + + var commits []string + iter.ForEach(func(c CommitNode) error { + commits = append(commits, c.ID().String()) + return nil + }) + + c.Assert(commits, DeepEquals, strings.Split(`ec6f456c0e8c7058a29611429965aa05c190b54b +3048d280d2d5b258d9e582a226ff4bbed34fd5c9 +d82f291cde9987322c8a0c81a325e1ba6159684c +27aa8cdd2431068606741a589383c02c149ea625 +fa058d42fa3bc53f39108a56dad67157169b2191 +d0a18ccd8eea3bdabc76d6dc5420af1ea30aae9f +6c629843a1750a27c9af01ed2985f362f619c47a +cf2874632223220e0445abf0a7806dc772c0b37a +d10a0e7c1f340a6cfc14540a5f8c508ce7e2eabf +758ac33217f092bfcded4ad4774954ac054c9609 +214e1dca024fb6da5ed65564d2de734df5dc2127 +70923099e61fa33f0bc5256d2f938fa44c4df10e +bcaa1ac5644b16f1febb72f31e204720b7bb8934 +e1d8866ffa78fa16d2f39b0ba5344a7269ee5371 +2275fa7d0c75d20103f90b0e1616937d5a9fc5e6 +bdd9a92789d4a86b20a8d3df462df373f41acf23 +b359f11ea09e642695edcd114b463da4395b10c1 +6f43e8933ba3c04072d5d104acc6118aac3e52ee +ccafe8bd5f9dbfb8b98b0da03ced29608dcfdeec +939814f341fdd5d35e81a3845a33c4fedb19d2d2 +5f5ad88bf2babe506f927d64d2b7a1e1493dc2ae +a2014124ca3b3f9ff28fbab0a83ce3c71bf4622e +77906b653c3eb8a1cd5bd7254e161c00c6086d83 +465cba710284204f9851854587c2887c247222db +b9471b13256703d3f5eb88b280b4a16ce325ec1b +62925030859646daeeaf5a4d386a0c41e00dda8a +5f56aea0ca8b74215a5b982bca32236e1e28c76b +23148841baa5dbce48f6adcb7ddf83dcd97debb3 +c336d16298a017486c4164c40f8acb28afe64e84 +31eae7b619d166c366bf5df4991f04ba8cebea0a +b977a025ca21e3b5ca123d8093bd7917694f6da7 +d2a38b4a5965d529566566640519d03d2bd10f6c +35b585759cbf29f8ec428ef89da20705d59f99ec +c2bbf9fe8009b22d0f390f3c8c3f13937067590f +fc9f0643b21cfe571046e27e0c4565f3a1ee96c8 +c088fd6a7e1a38e9d5a9815265cb575bb08d08ff +5fddbeb678bd2c36c5e5c891ab8f2b143ced5baf +5d7303c49ac984a9fec60523f2d5297682e16646`, "\n")) +} + +func testAuthorDateOrder(c *C, head CommitNode) { + iter := NewCommitNodeIterAuthorDateOrder( + head, + nil, + nil, + ) + + var commits []string + iter.ForEach(func(c CommitNode) error { + commits = append(commits, c.ID().String()) + return nil + }) + + c.Assert(commits, DeepEquals, strings.Split(`ec6f456c0e8c7058a29611429965aa05c190b54b +3048d280d2d5b258d9e582a226ff4bbed34fd5c9 +d82f291cde9987322c8a0c81a325e1ba6159684c +27aa8cdd2431068606741a589383c02c149ea625 +fa058d42fa3bc53f39108a56dad67157169b2191 +d0a18ccd8eea3bdabc76d6dc5420af1ea30aae9f +6c629843a1750a27c9af01ed2985f362f619c47a +cf2874632223220e0445abf0a7806dc772c0b37a +d10a0e7c1f340a6cfc14540a5f8c508ce7e2eabf +758ac33217f092bfcded4ad4774954ac054c9609 +214e1dca024fb6da5ed65564d2de734df5dc2127 +70923099e61fa33f0bc5256d2f938fa44c4df10e +bcaa1ac5644b16f1febb72f31e204720b7bb8934 +e1d8866ffa78fa16d2f39b0ba5344a7269ee5371 +2275fa7d0c75d20103f90b0e1616937d5a9fc5e6 +bdd9a92789d4a86b20a8d3df462df373f41acf23 +b359f11ea09e642695edcd114b463da4395b10c1 +6f43e8933ba3c04072d5d104acc6118aac3e52ee +ccafe8bd5f9dbfb8b98b0da03ced29608dcfdeec +939814f341fdd5d35e81a3845a33c4fedb19d2d2 +5f5ad88bf2babe506f927d64d2b7a1e1493dc2ae +a2014124ca3b3f9ff28fbab0a83ce3c71bf4622e +77906b653c3eb8a1cd5bd7254e161c00c6086d83 +465cba710284204f9851854587c2887c247222db +b9471b13256703d3f5eb88b280b4a16ce325ec1b +5f56aea0ca8b74215a5b982bca32236e1e28c76b +62925030859646daeeaf5a4d386a0c41e00dda8a +23148841baa5dbce48f6adcb7ddf83dcd97debb3 +c336d16298a017486c4164c40f8acb28afe64e84 +31eae7b619d166c366bf5df4991f04ba8cebea0a +b977a025ca21e3b5ca123d8093bd7917694f6da7 +d2a38b4a5965d529566566640519d03d2bd10f6c +35b585759cbf29f8ec428ef89da20705d59f99ec +c2bbf9fe8009b22d0f390f3c8c3f13937067590f +fc9f0643b21cfe571046e27e0c4565f3a1ee96c8 +c088fd6a7e1a38e9d5a9815265cb575bb08d08ff +5fddbeb678bd2c36c5e5c891ab8f2b143ced5baf +5d7303c49ac984a9fec60523f2d5297682e16646`, "\n")) +} diff --git a/plumbing/object/commitgraph/commitnode_walker_topo_order.go b/plumbing/object/commitgraph/commitnode_walker_topo_order.go new file mode 100644 index 0000000..29f4bb7 --- /dev/null +++ b/plumbing/object/commitgraph/commitnode_walker_topo_order.go @@ -0,0 +1,161 @@ +package commitgraph + +import ( + "io" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/storer" + + "github.com/emirpasic/gods/trees/binaryheap" +) + +type commitNodeIteratorTopological struct { + exploreStack commitNodeStackable + visitStack commitNodeStackable + inCounts map[plumbing.Hash]int + + ignore map[plumbing.Hash]struct{} +} + +// NewCommitNodeIterTopoOrder returns a CommitNodeIter that walks the commit history, +// starting at the given commit and visiting its parents in a topological order but +// with the constraint that no parent is emitted before its children are emitted. +// +// This matches `git log --topo-order` +func NewCommitNodeIterTopoOrder(c CommitNode, + seenExternal map[plumbing.Hash]bool, + ignore []plumbing.Hash, +) CommitNodeIter { + seen := composeIgnores(ignore, seenExternal) + inCounts := make(map[plumbing.Hash]int) + + heap := &commitNodeHeap{binaryheap.NewWith(generationAndDateOrderComparator)} + heap.Push(c) + + lifo := &commitNodeLifo{make([]CommitNode, 0, 8)} + lifo.Push(c) + + return &commitNodeIteratorTopological{ + exploreStack: heap, + visitStack: lifo, + inCounts: inCounts, + ignore: seen, + } +} + +func (iter *commitNodeIteratorTopological) Next() (CommitNode, error) { + var next CommitNode + for { + var ok bool + next, ok = iter.visitStack.Pop() + if !ok { + return nil, io.EOF + } + + if iter.inCounts[next.ID()] == 0 { + break + } + } + + minimumLevel, generationV2 := next.GenerationV2(), true + if minimumLevel == 0 { + minimumLevel, generationV2 = next.Generation(), false + } + + parents := make([]CommitNode, 0, len(next.ParentHashes())) + for i := range next.ParentHashes() { + pc, err := next.ParentNode(i) + if err != nil { + return nil, err + } + + parents = append(parents, pc) + + if generationV2 { + if pc.GenerationV2() < minimumLevel { + minimumLevel = pc.GenerationV2() + } + continue + } + + if pc.Generation() < minimumLevel { + minimumLevel = pc.Generation() + } + } + + // EXPLORE + for { + toExplore, ok := iter.exploreStack.Peek() + if !ok { + break + } + + if toExplore.ID() != next.ID() && iter.exploreStack.Size() == 1 { + break + } + if generationV2 { + if toExplore.GenerationV2() < minimumLevel { + break + } + } else { + if toExplore.Generation() < minimumLevel { + break + } + } + + iter.exploreStack.Pop() + for i, h := range toExplore.ParentHashes() { + if _, has := iter.ignore[h]; has { + continue + } + iter.inCounts[h]++ + + if iter.inCounts[h] == 1 { + pc, err := toExplore.ParentNode(i) + if err != nil { + return nil, err + } + iter.exploreStack.Push(pc) + } + } + } + + // VISIT + for i, h := range next.ParentHashes() { + if _, has := iter.ignore[h]; has { + continue + } + iter.inCounts[h]-- + + if iter.inCounts[h] == 0 { + iter.visitStack.Push(parents[i]) + } + } + delete(iter.inCounts, next.ID()) + + return next, nil +} + +func (iter *commitNodeIteratorTopological) ForEach(cb func(CommitNode) error) error { + for { + obj, err := iter.Next() + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + + if err := cb(obj); err != nil { + if err == storer.ErrStop { + return nil + } + + return err + } + } +} + +func (iter *commitNodeIteratorTopological) Close() { +} diff --git a/plumbing/object/patch.go b/plumbing/object/patch.go index 06bc35b..dd8fef4 100644 --- a/plumbing/object/patch.go +++ b/plumbing/object/patch.go @@ -317,8 +317,8 @@ func getFileStatsFromFilePatches(filePatches []fdiff.FilePatch) FileStats { // File is deleted. cs.Name = from.Path() } else if from.Path() != to.Path() { - // File is renamed. Not supported. - // cs.Name = fmt.Sprintf("%s => %s", from.Path(), to.Path()) + // File is renamed. + cs.Name = fmt.Sprintf("%s => %s", from.Path(), to.Path()) } else { cs.Name = from.Path() } diff --git a/plumbing/object/patch_stats_test.go b/plumbing/object/patch_stats_test.go new file mode 100644 index 0000000..f393c30 --- /dev/null +++ b/plumbing/object/patch_stats_test.go @@ -0,0 +1,54 @@ +package object_test + +import ( + "time" + + "github.com/go-git/go-billy/v5/memfs" + "github.com/go-git/go-billy/v5/util" + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/storage/memory" + + fixtures "github.com/go-git/go-git-fixtures/v4" + . "gopkg.in/check.v1" +) + +type PatchStatsSuite struct { + fixtures.Suite +} + +var _ = Suite(&PatchStatsSuite{}) + +func (s *PatchStatsSuite) TestStatsWithRename(c *C) { + cm := &git.CommitOptions{ + Author: &object.Signature{Name: "Foo", Email: "foo@example.local", When: time.Now()}, + } + + fs := memfs.New() + r, err := git.Init(memory.NewStorage(), fs) + c.Assert(err, IsNil) + + w, err := r.Worktree() + c.Assert(err, IsNil) + + util.WriteFile(fs, "foo", []byte("foo\nbar\n"), 0644) + + _, err = w.Add("foo") + c.Assert(err, IsNil) + + _, err = w.Commit("foo\n", cm) + c.Assert(err, IsNil) + + _, err = w.Move("foo", "bar") + c.Assert(err, IsNil) + + hash, err := w.Commit("rename foo to bar", cm) + c.Assert(err, IsNil) + + commit, err := r.CommitObject(hash) + c.Assert(err, IsNil) + + fileStats, err := commit.Stats() + c.Assert(err, IsNil) + c.Assert(fileStats[0].Name, Equals, "foo => bar") +} diff --git a/plumbing/object/signature_test.go b/plumbing/object/signature_test.go index 1bdb1d1..3b20cde 100644 --- a/plumbing/object/signature_test.go +++ b/plumbing/object/signature_test.go @@ -178,3 +178,10 @@ signed tag`), }) } } + +func FuzzParseSignedBytes(f *testing.F) { + + f.Fuzz(func(t *testing.T, input []byte) { + parseSignedBytes(input) + }) +} diff --git a/plumbing/object/tree_test.go b/plumbing/object/tree_test.go index d9dad47..bb5fc7a 100644 --- a/plumbing/object/tree_test.go +++ b/plumbing/object/tree_test.go @@ -4,6 +4,7 @@ import ( "context" "errors" "io" + "testing" fixtures "github.com/go-git/go-git-fixtures/v4" "github.com/go-git/go-git/v5/plumbing" @@ -1623,3 +1624,19 @@ func (s *TreeSuite) TestTreeDecodeReadBug(c *C) { c.Assert(err, IsNil) c.Assert(entriesEquals(obtained.Entries, expected.Entries), Equals, true) } + +func FuzzDecode(f *testing.F) { + + f.Fuzz(func(t *testing.T, input []byte) { + + obj := &SortReadObject{ + t: plumbing.TreeObject, + h: plumbing.ZeroHash, + cont: input, + sz: int64(len(input)), + } + + newTree := &Tree{} + newTree.Decode(obj) + }) +} diff --git a/plumbing/protocol/packp/common.go b/plumbing/protocol/packp/common.go index fef50a4..a858323 100644 --- a/plumbing/protocol/packp/common.go +++ b/plumbing/protocol/packp/common.go @@ -48,6 +48,11 @@ func isFlush(payload []byte) bool { return len(payload) == 0 } +var ( + // ErrNilWriter is returned when a nil writer is passed to the encoder. + ErrNilWriter = fmt.Errorf("nil writer") +) + // ErrUnexpectedData represents an unexpected data decoding a message type ErrUnexpectedData struct { Msg string diff --git a/plumbing/protocol/packp/gitproto.go b/plumbing/protocol/packp/gitproto.go new file mode 100644 index 0000000..0b7ff8f --- /dev/null +++ b/plumbing/protocol/packp/gitproto.go @@ -0,0 +1,120 @@ +package packp + +import ( + "fmt" + "io" + "strings" + + "github.com/go-git/go-git/v5/plumbing/format/pktline" +) + +var ( + // ErrInvalidGitProtoRequest is returned by Decode if the input is not a + // valid git protocol request. + ErrInvalidGitProtoRequest = fmt.Errorf("invalid git protocol request") +) + +// GitProtoRequest is a command request for the git protocol. +// It is used to send the command, endpoint, and extra parameters to the +// remote. +// See https://git-scm.com/docs/pack-protocol#_git_transport +type GitProtoRequest struct { + RequestCommand string + Pathname string + + // Optional + Host string + + // Optional + ExtraParams []string +} + +// validate validates the request. +func (g *GitProtoRequest) validate() error { + if g.RequestCommand == "" { + return fmt.Errorf("%w: empty request command", ErrInvalidGitProtoRequest) + } + + if g.Pathname == "" { + return fmt.Errorf("%w: empty pathname", ErrInvalidGitProtoRequest) + } + + return nil +} + +// Encode encodes the request into the writer. +func (g *GitProtoRequest) Encode(w io.Writer) error { + if w == nil { + return ErrNilWriter + } + + if err := g.validate(); err != nil { + return err + } + + p := pktline.NewEncoder(w) + req := fmt.Sprintf("%s %s\x00", g.RequestCommand, g.Pathname) + if host := g.Host; host != "" { + req += fmt.Sprintf("host=%s\x00", host) + } + + if len(g.ExtraParams) > 0 { + req += "\x00" + for _, param := range g.ExtraParams { + req += param + "\x00" + } + } + + if err := p.Encode([]byte(req)); err != nil { + return err + } + + return nil +} + +// Decode decodes the request from the reader. +func (g *GitProtoRequest) Decode(r io.Reader) error { + s := pktline.NewScanner(r) + if !s.Scan() { + err := s.Err() + if err == nil { + return ErrInvalidGitProtoRequest + } + return err + } + + line := string(s.Bytes()) + if len(line) == 0 { + return io.EOF + } + + if line[len(line)-1] != 0 { + return fmt.Errorf("%w: missing null terminator", ErrInvalidGitProtoRequest) + } + + parts := strings.SplitN(line, " ", 2) + if len(parts) != 2 { + return fmt.Errorf("%w: short request", ErrInvalidGitProtoRequest) + } + + g.RequestCommand = parts[0] + params := strings.Split(parts[1], string(null)) + if len(params) < 1 { + return fmt.Errorf("%w: missing pathname", ErrInvalidGitProtoRequest) + } + + g.Pathname = params[0] + if len(params) > 1 { + g.Host = strings.TrimPrefix(params[1], "host=") + } + + if len(params) > 2 { + for _, param := range params[2:] { + if param != "" { + g.ExtraParams = append(g.ExtraParams, param) + } + } + } + + return nil +} diff --git a/plumbing/protocol/packp/gitproto_test.go b/plumbing/protocol/packp/gitproto_test.go new file mode 100644 index 0000000..9cf1049 --- /dev/null +++ b/plumbing/protocol/packp/gitproto_test.go @@ -0,0 +1,99 @@ +package packp + +import ( + "bytes" + "testing" +) + +func TestEncodeEmptyGitProtoRequest(t *testing.T) { + var buf bytes.Buffer + var p GitProtoRequest + err := p.Encode(&buf) + if err == nil { + t.Fatal("expected error") + } +} + +func TestEncodeGitProtoRequest(t *testing.T) { + var buf bytes.Buffer + p := GitProtoRequest{ + RequestCommand: "command", + Pathname: "pathname", + Host: "host", + ExtraParams: []string{"param1", "param2"}, + } + err := p.Encode(&buf) + if err != nil { + t.Fatal(err) + } + expected := "002ecommand pathname\x00host=host\x00\x00param1\x00param2\x00" + if buf.String() != expected { + t.Fatalf("expected %q, got %q", expected, buf.String()) + } +} + +func TestEncodeInvalidGitProtoRequest(t *testing.T) { + var buf bytes.Buffer + p := GitProtoRequest{ + RequestCommand: "command", + } + err := p.Encode(&buf) + if err == nil { + t.Fatal("expected error") + } +} + +func TestDecodeEmptyGitProtoRequest(t *testing.T) { + var buf bytes.Buffer + var p GitProtoRequest + err := p.Decode(&buf) + if err == nil { + t.Fatal("expected error") + } +} + +func TestDecodeGitProtoRequest(t *testing.T) { + var buf bytes.Buffer + buf.WriteString("002ecommand pathname\x00host=host\x00\x00param1\x00param2\x00") + var p GitProtoRequest + err := p.Decode(&buf) + if err != nil { + t.Fatal(err) + } + expected := GitProtoRequest{ + RequestCommand: "command", + Pathname: "pathname", + Host: "host", + ExtraParams: []string{"param1", "param2"}, + } + if p.RequestCommand != expected.RequestCommand { + t.Fatalf("expected %q, got %q", expected.RequestCommand, p.RequestCommand) + } + if p.Pathname != expected.Pathname { + t.Fatalf("expected %q, got %q", expected.Pathname, p.Pathname) + } + if p.Host != expected.Host { + t.Fatalf("expected %q, got %q", expected.Host, p.Host) + } + if len(p.ExtraParams) != len(expected.ExtraParams) { + t.Fatalf("expected %d, got %d", len(expected.ExtraParams), len(p.ExtraParams)) + } +} + +func TestDecodeInvalidGitProtoRequest(t *testing.T) { + var buf bytes.Buffer + buf.WriteString("0026command \x00host=host\x00\x00param1\x00param2") + var p GitProtoRequest + err := p.Decode(&buf) + if err == nil { + t.Fatal("expected error") + } +} + +func TestValidateEmptyGitProtoRequest(t *testing.T) { + var p GitProtoRequest + err := p.validate() + if err == nil { + t.Fatal("expected error") + } +} diff --git a/plumbing/protocol/packp/srvresp.go b/plumbing/protocol/packp/srvresp.go index 8cd0a72..a9ddb53 100644 --- a/plumbing/protocol/packp/srvresp.go +++ b/plumbing/protocol/packp/srvresp.go @@ -101,12 +101,14 @@ func (r *ServerResponse) decodeLine(line []byte) error { return fmt.Errorf("unexpected flush") } - if bytes.Equal(line[0:3], ack) { - return r.decodeACKLine(line) - } + if len(line) >= 3 { + if bytes.Equal(line[0:3], ack) { + return r.decodeACKLine(line) + } - if bytes.Equal(line[0:3], nak) { - return nil + if bytes.Equal(line[0:3], nak) { + return nil + } } return fmt.Errorf("unexpected content %q", string(line)) diff --git a/plumbing/protocol/packp/srvresp_test.go b/plumbing/protocol/packp/srvresp_test.go index aa0af52..b7270e7 100644 --- a/plumbing/protocol/packp/srvresp_test.go +++ b/plumbing/protocol/packp/srvresp_test.go @@ -3,6 +3,7 @@ package packp import ( "bufio" "bytes" + "fmt" "github.com/go-git/go-git/v5/plumbing" @@ -23,6 +24,32 @@ func (s *ServerResponseSuite) TestDecodeNAK(c *C) { c.Assert(sr.ACKs, HasLen, 0) } +func (s *ServerResponseSuite) TestDecodeNewLine(c *C) { + raw := "\n" + + sr := &ServerResponse{} + err := sr.Decode(bufio.NewReader(bytes.NewBufferString(raw)), false) + c.Assert(err, NotNil) + c.Assert(err.Error(), Equals, "invalid pkt-len found") +} + +func (s *ServerResponseSuite) TestDecodeEmpty(c *C) { + raw := "" + + sr := &ServerResponse{} + err := sr.Decode(bufio.NewReader(bytes.NewBufferString(raw)), false) + c.Assert(err, IsNil) +} + +func (s *ServerResponseSuite) TestDecodePartial(c *C) { + raw := "000600\n" + + sr := &ServerResponse{} + err := sr.Decode(bufio.NewReader(bytes.NewBufferString(raw)), false) + c.Assert(err, NotNil) + c.Assert(err.Error(), Equals, fmt.Sprintf("unexpected content %q", "00")) +} + func (s *ServerResponseSuite) TestDecodeACK(c *C) { raw := "0031ACK 6ecf0ef2c2dffb796033e5a02219af86ec6584e5\n" diff --git a/plumbing/protocol/packp/ulreq_decode.go b/plumbing/protocol/packp/ulreq_decode.go index 895a3bf..3da2998 100644 --- a/plumbing/protocol/packp/ulreq_decode.go +++ b/plumbing/protocol/packp/ulreq_decode.go @@ -43,7 +43,7 @@ func (d *ulReqDecoder) Decode(v *UploadRequest) error { return d.err } -// fills out the parser stiky error +// fills out the parser sticky error func (d *ulReqDecoder) error(format string, a ...interface{}) { msg := fmt.Sprintf( "pkt-line %d: %s", d.nLine, diff --git a/plumbing/protocol/packp/ulreq_decode_test.go b/plumbing/protocol/packp/ulreq_decode_test.go index efcc7b4..7658922 100644 --- a/plumbing/protocol/packp/ulreq_decode_test.go +++ b/plumbing/protocol/packp/ulreq_decode_test.go @@ -398,7 +398,7 @@ func (s *UlReqDecodeSuite) TestDeepenCommits(c *C) { c.Assert(int(commits), Equals, 1234) } -func (s *UlReqDecodeSuite) TestDeepenCommitsInfiniteInplicit(c *C) { +func (s *UlReqDecodeSuite) TestDeepenCommitsInfiniteImplicit(c *C) { payloads := []string{ "want 3333333333333333333333333333333333333333 ofs-delta multi_ack", "deepen 0", diff --git a/plumbing/protocol/packp/uppackresp_test.go b/plumbing/protocol/packp/uppackresp_test.go index 8fbf924..ec56507 100644 --- a/plumbing/protocol/packp/uppackresp_test.go +++ b/plumbing/protocol/packp/uppackresp_test.go @@ -3,6 +3,7 @@ package packp import ( "bytes" "io" + "testing" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/protocol/packp/capability" @@ -128,3 +129,14 @@ func (s *UploadPackResponseSuite) TestEncodeMultiACK(c *C) { b := bytes.NewBuffer(nil) c.Assert(res.Encode(b), NotNil) } + +func FuzzDecoder(f *testing.F) { + + f.Fuzz(func(t *testing.T, input []byte) { + req := NewUploadPackRequest() + res := NewUploadPackResponse(req) + defer res.Close() + + res.Decode(io.NopCloser(bytes.NewReader(input))) + }) +} diff --git a/plumbing/reference.go b/plumbing/reference.go index 5a67f69..ddba930 100644 --- a/plumbing/reference.go +++ b/plumbing/reference.go @@ -3,6 +3,7 @@ package plumbing import ( "errors" "fmt" + "regexp" "strings" ) @@ -29,6 +30,9 @@ var RefRevParseRules = []string{ var ( ErrReferenceNotFound = errors.New("reference not found") + + // ErrInvalidReferenceName is returned when a reference name is invalid. + ErrInvalidReferenceName = errors.New("invalid reference name") ) // ReferenceType reference type's @@ -124,6 +128,91 @@ func (r ReferenceName) Short() string { return res } +var ( + ctrlSeqs = regexp.MustCompile(`[\000-\037\177]`) +) + +// Validate validates a reference name. +// This follows the git-check-ref-format rules. +// See https://git-scm.com/docs/git-check-ref-format +// +// It is important to note that this function does not check if the reference +// exists in the repository. +// It only checks if the reference name is valid. +// This functions does not support the --refspec-pattern, --normalize, and +// --allow-onelevel options. +// +// Git imposes the following rules on how references are named: +// +// 1. They can include slash / for hierarchical (directory) grouping, but no +// slash-separated component can begin with a dot . or end with the +// sequence .lock. +// 2. They must contain at least one /. This enforces the presence of a +// category like heads/, tags/ etc. but the actual names are not +// restricted. If the --allow-onelevel option is used, this rule is +// waived. +// 3. They cannot have two consecutive dots .. anywhere. +// 4. They cannot have ASCII control characters (i.e. bytes whose values are +// lower than \040, or \177 DEL), space, tilde ~, caret ^, or colon : +// anywhere. +// 5. They cannot have question-mark ?, asterisk *, or open bracket [ +// anywhere. See the --refspec-pattern option below for an exception to this +// rule. +// 6. They cannot begin or end with a slash / or contain multiple consecutive +// slashes (see the --normalize option below for an exception to this rule). +// 7. They cannot end with a dot .. +// 8. They cannot contain a sequence @{. +// 9. They cannot be the single character @. +// 10. They cannot contain a \. +func (r ReferenceName) Validate() error { + s := string(r) + if len(s) == 0 { + return ErrInvalidReferenceName + } + + // HEAD is a special case + if r == HEAD { + return nil + } + + // rule 7 + if strings.HasSuffix(s, ".") { + return ErrInvalidReferenceName + } + + // rule 2 + parts := strings.Split(s, "/") + if len(parts) < 2 { + return ErrInvalidReferenceName + } + + isBranch := r.IsBranch() + isTag := r.IsTag() + for _, part := range parts { + // rule 6 + if len(part) == 0 { + return ErrInvalidReferenceName + } + + if strings.HasPrefix(part, ".") || // rule 1 + strings.Contains(part, "..") || // rule 3 + ctrlSeqs.MatchString(part) || // rule 4 + strings.ContainsAny(part, "~^:?*[ \t\n") || // rule 4 & 5 + strings.Contains(part, "@{") || // rule 8 + part == "@" || // rule 9 + strings.Contains(part, "\\") || // rule 10 + strings.HasSuffix(part, ".lock") { // rule 1 + return ErrInvalidReferenceName + } + + if (isBranch || isTag) && strings.HasPrefix(part, "-") { // branches & tags can't start with - + return ErrInvalidReferenceName + } + } + + return nil +} + const ( HEAD ReferenceName = "HEAD" Master ReferenceName = "refs/heads/master" diff --git a/plumbing/reference_test.go b/plumbing/reference_test.go index 04dfef9..ce57075 100644 --- a/plumbing/reference_test.go +++ b/plumbing/reference_test.go @@ -103,6 +103,65 @@ func (s *ReferenceSuite) TestIsTag(c *C) { c.Assert(r.IsTag(), Equals, true) } +func (s *ReferenceSuite) TestValidReferenceNames(c *C) { + valid := []ReferenceName{ + "refs/heads/master", + "refs/notes/commits", + "refs/remotes/origin/master", + "HEAD", + "refs/tags/v3.1.1", + "refs/pulls/1/head", + "refs/pulls/1/merge", + "refs/pulls/1/abc.123", + "refs/pulls", + "refs/-", // should this be allowed? + } + for _, v := range valid { + c.Assert(v.Validate(), IsNil) + } + + invalid := []ReferenceName{ + "refs", + "refs/", + "refs//", + "refs/heads/\\", + "refs/heads/\\foo", + "refs/heads/\\foo/bar", + "abc", + "", + "refs/heads/ ", + "refs/heads/ /", + "refs/heads/ /foo", + "refs/heads/.", + "refs/heads/..", + "refs/heads/foo..", + "refs/heads/foo.lock", + "refs/heads/foo@{bar}", + "refs/heads/foo[", + "refs/heads/foo~", + "refs/heads/foo^", + "refs/heads/foo:", + "refs/heads/foo?", + "refs/heads/foo*", + "refs/heads/foo[bar", + "refs/heads/foo\t", + "refs/heads/@", + "refs/heads/@{bar}", + "refs/heads/\n", + "refs/heads/-foo", + "refs/heads/foo..bar", + "refs/heads/-", + "refs/tags/-", + "refs/tags/-foo", + } + + for i, v := range invalid { + comment := Commentf("invalid reference name case %d: %s", i, v) + c.Assert(v.Validate(), NotNil, comment) + c.Assert(v.Validate(), ErrorMatches, "invalid reference name", comment) + } +} + func benchMarkReferenceString(r *Reference, b *testing.B) { for n := 0; n < b.N; n++ { _ = r.String() diff --git a/plumbing/serverinfo/serverinfo.go b/plumbing/serverinfo/serverinfo.go new file mode 100644 index 0000000..d7ea7ef --- /dev/null +++ b/plumbing/serverinfo/serverinfo.go @@ -0,0 +1,94 @@ +package serverinfo + +import ( + "fmt" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/internal/reference" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/plumbing/storer" + "github.com/go-git/go-git/v5/storage" +) + +// UpdateServerInfo updates the server info files in the repository. +// +// It generates a list of available refs for the repository. +// Used by git http transport (dumb), for more information refer to: +// https://git-scm.com/book/id/v2/Git-Internals-Transfer-Protocols#_the_dumb_protocol +func UpdateServerInfo(s storage.Storer, fs billy.Filesystem) error { + pos, ok := s.(storer.PackedObjectStorer) + if !ok { + return git.ErrPackedObjectsNotSupported + } + + infoRefs, err := fs.Create("info/refs") + if err != nil { + return err + } + + defer infoRefs.Close() + + refsIter, err := s.IterReferences() + if err != nil { + return err + } + + defer refsIter.Close() + + var refs []*plumbing.Reference + if err := refsIter.ForEach(func(ref *plumbing.Reference) error { + refs = append(refs, ref) + return nil + }); err != nil { + return err + } + + reference.Sort(refs) + for _, ref := range refs { + name := ref.Name() + hash := ref.Hash() + switch ref.Type() { + case plumbing.SymbolicReference: + if name == plumbing.HEAD { + continue + } + ref, err := s.Reference(ref.Target()) + if err != nil { + return err + } + + hash = ref.Hash() + fallthrough + case plumbing.HashReference: + fmt.Fprintf(infoRefs, "%s\t%s\n", hash, name) + if name.IsTag() { + tag, err := object.GetTag(s, hash) + if err == nil { + fmt.Fprintf(infoRefs, "%s\t%s^{}\n", tag.Target, name) + } + } + } + } + + infoPacks, err := fs.Create("objects/info/packs") + if err != nil { + return err + } + + defer infoPacks.Close() + + packs, err := pos.ObjectPacks() + if err != nil { + return err + } + + for _, p := range packs { + fmt.Fprintf(infoPacks, "P pack-%s.pack\n", p) + } + + fmt.Fprintln(infoPacks) + + return nil +} diff --git a/plumbing/serverinfo/serverinfo_test.go b/plumbing/serverinfo/serverinfo_test.go new file mode 100644 index 0000000..0a52ea2 --- /dev/null +++ b/plumbing/serverinfo/serverinfo_test.go @@ -0,0 +1,185 @@ +package serverinfo + +import ( + "io" + "strings" + "testing" + + "github.com/go-git/go-billy/v5" + "github.com/go-git/go-billy/v5/memfs" + fixtures "github.com/go-git/go-git-fixtures/v4" + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" + "github.com/go-git/go-git/v5/plumbing/storer" + "github.com/go-git/go-git/v5/storage" + "github.com/go-git/go-git/v5/storage/memory" + . "gopkg.in/check.v1" +) + +type ServerInfoSuite struct{} + +var _ = Suite(&ServerInfoSuite{}) + +func Test(t *testing.T) { TestingT(t) } + +func (s *ServerInfoSuite) TestUpdateServerInfoInit(c *C) { + fs := memfs.New() + st := memory.NewStorage() + r, err := git.Init(st, fs) + c.Assert(err, IsNil) + c.Assert(r, NotNil) + + err = UpdateServerInfo(st, fs) + c.Assert(err, IsNil) +} + +func assertInfoRefs(c *C, st storage.Storer, fs billy.Filesystem) { + refsFile, err := fs.Open("info/refs") + c.Assert(err, IsNil) + + defer refsFile.Close() + bts, err := io.ReadAll(refsFile) + c.Assert(err, IsNil) + + localRefs := make(map[plumbing.ReferenceName]plumbing.Hash) + for _, line := range strings.Split(string(bts), "\n") { + if line == "" { + continue + } + parts := strings.Split(line, "\t") + c.Assert(parts, HasLen, 2) + hash := plumbing.NewHash(parts[0]) + name := plumbing.ReferenceName(parts[1]) + localRefs[name] = hash + } + + refs, err := st.IterReferences() + c.Assert(err, IsNil) + + err = refs.ForEach(func(ref *plumbing.Reference) error { + name := ref.Name() + hash := ref.Hash() + switch ref.Type() { + case plumbing.SymbolicReference: + if name == plumbing.HEAD { + return nil + } + ref, err := st.Reference(ref.Target()) + c.Assert(err, IsNil) + hash = ref.Hash() + fallthrough + case plumbing.HashReference: + h, ok := localRefs[name] + c.Assert(ok, Equals, true) + c.Assert(h, Equals, hash) + if name.IsTag() { + tag, err := object.GetTag(st, hash) + if err == nil { + t, ok := localRefs[name+"^{}"] + c.Assert(ok, Equals, true) + c.Assert(t, Equals, tag.Target) + } + } + } + return nil + }) + + c.Assert(err, IsNil) +} + +func assertObjectPacks(c *C, st storage.Storer, fs billy.Filesystem) { + infoPacks, err := fs.Open("objects/info/packs") + c.Assert(err, IsNil) + + defer infoPacks.Close() + bts, err := io.ReadAll(infoPacks) + c.Assert(err, IsNil) + + pos, ok := st.(storer.PackedObjectStorer) + c.Assert(ok, Equals, true) + localPacks := make(map[string]struct{}) + packs, err := pos.ObjectPacks() + c.Assert(err, IsNil) + + for _, line := range strings.Split(string(bts), "\n") { + if line == "" { + continue + } + parts := strings.Split(line, " ") + c.Assert(parts, HasLen, 2) + pack := strings.TrimPrefix(parts[1], "pack-") + pack = strings.TrimSuffix(pack, ".pack") + localPacks[pack] = struct{}{} + } + + for _, p := range packs { + _, ok := localPacks[p.String()] + c.Assert(ok, Equals, true) + } +} + +func (s *ServerInfoSuite) TestUpdateServerInfoTags(c *C) { + fs := memfs.New() + st := memory.NewStorage() + r, err := git.Clone(st, fs, &git.CloneOptions{ + URL: fixtures.ByURL("https://github.com/git-fixtures/tags.git").One().URL, + }) + c.Assert(err, IsNil) + c.Assert(r, NotNil) + + err = UpdateServerInfo(st, fs) + c.Assert(err, IsNil) + + assertInfoRefs(c, st, fs) + assertObjectPacks(c, st, fs) +} + +func (s *ServerInfoSuite) TestUpdateServerInfoBasic(c *C) { + fs := memfs.New() + st := memory.NewStorage() + r, err := git.Clone(st, fs, &git.CloneOptions{ + URL: fixtures.Basic().One().URL, + }) + c.Assert(err, IsNil) + c.Assert(r, NotNil) + + err = UpdateServerInfo(st, fs) + c.Assert(err, IsNil) + + assertInfoRefs(c, st, fs) + assertObjectPacks(c, st, fs) +} + +func (s *ServerInfoSuite) TestUpdateServerInfoBasicChange(c *C) { + fs := memfs.New() + st := memory.NewStorage() + r, err := git.Clone(st, fs, &git.CloneOptions{ + URL: fixtures.Basic().One().URL, + }) + c.Assert(err, IsNil) + c.Assert(r, NotNil) + + err = UpdateServerInfo(st, fs) + c.Assert(err, IsNil) + + assertInfoRefs(c, st, fs) + assertObjectPacks(c, st, fs) + + head, err := r.Head() + c.Assert(err, IsNil) + + ref := plumbing.NewHashReference("refs/heads/my-branch", head.Hash()) + err = r.Storer.SetReference(ref) + c.Assert(err, IsNil) + + _, err = r.CreateTag("test-tag", head.Hash(), &git.CreateTagOptions{ + Message: "test-tag", + }) + c.Assert(err, IsNil) + + err = UpdateServerInfo(st, fs) + + assertInfoRefs(c, st, fs) + assertObjectPacks(c, st, fs) +} diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go index d8a9c27..126b374 100644 --- a/plumbing/storer/object.go +++ b/plumbing/storer/object.go @@ -42,6 +42,7 @@ type EncodedObjectStorer interface { HasEncodedObject(plumbing.Hash) error // EncodedObjectSize returns the plaintext size of the encoded object. EncodedObjectSize(plumbing.Hash) (int64, error) + AddAlternate(remote string) error } // DeltaObjectStorer is an EncodedObjectStorer that can return delta diff --git a/plumbing/storer/object_test.go b/plumbing/storer/object_test.go index 30424ff..f2e6a5e 100644 --- a/plumbing/storer/object_test.go +++ b/plumbing/storer/object_test.go @@ -168,3 +168,7 @@ func (o *MockObjectStorage) IterEncodedObjects(t plumbing.ObjectType) (EncodedOb func (o *MockObjectStorage) Begin() Transaction { return nil } + +func (o *MockObjectStorage) AddAlternate(remote string) error { + return nil +} diff --git a/plumbing/transport/common.go b/plumbing/transport/common.go index c6a054a..b05437f 100644 --- a/plumbing/transport/common.go +++ b/plumbing/transport/common.go @@ -108,7 +108,7 @@ type Endpoint struct { // Host is the host. Host string // Port is the port to connect, if 0 the default port for the given protocol - // wil be used. + // will be used. Port int // Path is the repository path. Path string diff --git a/plumbing/transport/common_test.go b/plumbing/transport/common_test.go index d9f12ab..3efc555 100644 --- a/plumbing/transport/common_test.go +++ b/plumbing/transport/common_test.go @@ -210,3 +210,10 @@ func (s *SuiteCommon) TestNewEndpointIPv6(c *C) { c.Assert(e.Host, Equals, "[::1]") c.Assert(e.String(), Equals, "http://[::1]:8080/foo.git") } + +func FuzzNewEndpoint(f *testing.F) { + + f.Fuzz(func(t *testing.T, input string) { + NewEndpoint(input) + }) +} diff --git a/plumbing/transport/file/client.go b/plumbing/transport/file/client.go index 6f0a380..38714e2 100644 --- a/plumbing/transport/file/client.go +++ b/plumbing/transport/file/client.go @@ -11,7 +11,6 @@ import ( "github.com/go-git/go-git/v5/plumbing/transport" "github.com/go-git/go-git/v5/plumbing/transport/internal/common" - "github.com/go-git/go-git/v5/utils/ioutil" "golang.org/x/sys/execabs" ) @@ -112,7 +111,7 @@ func (c *command) Start() error { func (c *command) StderrPipe() (io.Reader, error) { // Pipe returned by Command.StderrPipe has a race with Read + Command.Wait. // We use an io.Pipe and close it after the command finishes. - r, w := ioutil.Pipe() + r, w := io.Pipe() c.cmd.Stderr = w c.stderrCloser = r return r, nil diff --git a/plumbing/transport/file/common_test.go b/plumbing/transport/file/common_test.go index 7e033a8..a217e97 100644 --- a/plumbing/transport/file/common_test.go +++ b/plumbing/transport/file/common_test.go @@ -29,8 +29,8 @@ func (s *CommonSuite) SetUpSuite(c *C) { s.ReceivePackBin = filepath.Join(s.tmpDir, "git-receive-pack") s.UploadPackBin = filepath.Join(s.tmpDir, "git-upload-pack") bin := filepath.Join(s.tmpDir, "go-git") - cmd := exec.Command("go", "build", "-o", bin, - "../../../cli/go-git/...") + cmd := exec.Command("go", "build", "-o", bin) + cmd.Dir = "../../../cli/go-git" c.Assert(cmd.Run(), IsNil) c.Assert(os.Symlink(bin, s.ReceivePackBin), IsNil) c.Assert(os.Symlink(bin, s.UploadPackBin), IsNil) diff --git a/plumbing/transport/git/common.go b/plumbing/transport/git/common.go index 92fc0be..2b878b0 100644 --- a/plumbing/transport/git/common.go +++ b/plumbing/transport/git/common.go @@ -2,12 +2,11 @@ package git import ( - "fmt" "io" "net" "strconv" - "github.com/go-git/go-git/v5/plumbing/format/pktline" + "github.com/go-git/go-git/v5/plumbing/protocol/packp" "github.com/go-git/go-git/v5/plumbing/transport" "github.com/go-git/go-git/v5/plumbing/transport/internal/common" "github.com/go-git/go-git/v5/utils/ioutil" @@ -42,10 +41,18 @@ type command struct { // Start executes the command sending the required message to the TCP connection func (c *command) Start() error { - cmd := endpointToCommand(c.command, c.endpoint) + req := packp.GitProtoRequest{ + RequestCommand: c.command, + Pathname: c.endpoint.Path, + } + host := c.endpoint.Host + if c.endpoint.Port != DefaultPort { + host = net.JoinHostPort(c.endpoint.Host, strconv.Itoa(c.endpoint.Port)) + } + + req.Host = host - e := pktline.NewEncoder(c.conn) - return e.Encode([]byte(cmd)) + return req.Encode(c.conn) } func (c *command) connect() error { @@ -90,15 +97,6 @@ func (c *command) StdoutPipe() (io.Reader, error) { return c.conn, nil } -func endpointToCommand(cmd string, ep *transport.Endpoint) string { - host := ep.Host - if ep.Port != DefaultPort { - host = net.JoinHostPort(ep.Host, strconv.Itoa(ep.Port)) - } - - return fmt.Sprintf("%s %s%chost=%s%c", cmd, ep.Path, 0, host, 0) -} - // Close closes the TCP connection and connection. func (c *command) Close() error { if !c.connected { diff --git a/plumbing/transport/git/common_test.go b/plumbing/transport/git/common_test.go index 7389919..3cab933 100644 --- a/plumbing/transport/git/common_test.go +++ b/plumbing/transport/git/common_test.go @@ -1,6 +1,7 @@ package git import ( + "bytes" "fmt" "net" "os" @@ -32,7 +33,12 @@ func (s *BaseSuite) SetUpTest(c *C) { See https://github.com/git-for-windows/git/issues/907`) } - var err error + cmd := exec.Command("git", "daemon", "--help") + output, err := cmd.CombinedOutput() + if err != nil && bytes.Contains(output, []byte("'daemon' is not a git command")) { + c.Fatal("git daemon cannot be found") + } + s.port, err = freePort() c.Assert(err, IsNil) @@ -85,11 +91,15 @@ func (s *BaseSuite) prepareRepository(c *C, f *fixtures.Fixture, name string) *t } func (s *BaseSuite) TearDownTest(c *C) { - _ = s.daemon.Process.Signal(os.Kill) - _ = s.daemon.Wait() + if s.daemon != nil { + _ = s.daemon.Process.Signal(os.Kill) + _ = s.daemon.Wait() + } - err := os.RemoveAll(s.base) - c.Assert(err, IsNil) + if s.base != "" { + err := os.RemoveAll(s.base) + c.Assert(err, IsNil) + } } func freePort() (int, error) { diff --git a/plumbing/transport/http/common.go b/plumbing/transport/http/common.go index a7cdc1e..54126fe 100644 --- a/plumbing/transport/http/common.go +++ b/plumbing/transport/http/common.go @@ -406,14 +406,28 @@ func (a *TokenAuth) String() string { // Err is a dedicated error to return errors based on status code type Err struct { Response *http.Response + Reason string } -// NewErr returns a new Err based on a http response +// NewErr returns a new Err based on a http response and closes response body +// if needed func NewErr(r *http.Response) error { if r.StatusCode >= http.StatusOK && r.StatusCode < http.StatusMultipleChoices { return nil } + var reason string + + // If a response message is present, add it to error + var messageBuffer bytes.Buffer + if r.Body != nil { + messageLength, _ := messageBuffer.ReadFrom(r.Body) + if messageLength > 0 { + reason = messageBuffer.String() + } + _ = r.Body.Close() + } + switch r.StatusCode { case http.StatusUnauthorized: return transport.ErrAuthenticationRequired @@ -423,7 +437,7 @@ func NewErr(r *http.Response) error { return transport.ErrRepositoryNotFound } - return plumbing.NewUnexpectedError(&Err{r}) + return plumbing.NewUnexpectedError(&Err{r, reason}) } // StatusCode returns the status code of the response diff --git a/plumbing/transport/http/common_test.go b/plumbing/transport/http/common_test.go index 1517228..6bd018b 100644 --- a/plumbing/transport/http/common_test.go +++ b/plumbing/transport/http/common_test.go @@ -3,6 +3,7 @@ package http import ( "crypto/tls" "fmt" + "io" "log" "net" "net/http" @@ -14,6 +15,7 @@ import ( "strings" "testing" + "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/transport" fixtures "github.com/go-git/go-git-fixtures/v4" @@ -90,6 +92,23 @@ func (s *ClientSuite) TestNewHTTPError40x(c *C) { "unexpected client error.*") } +func (s *ClientSuite) TestNewUnexpectedError(c *C) { + res := &http.Response{ + StatusCode: 500, + Body: io.NopCloser(strings.NewReader("Unexpected error")), + } + + err := NewErr(res) + c.Assert(err, NotNil) + c.Assert(err, FitsTypeOf, &plumbing.UnexpectedError{}) + + unexpectedError, _ := err.(*plumbing.UnexpectedError) + c.Assert(unexpectedError.Err, FitsTypeOf, &Err{}) + + httpError, _ := unexpectedError.Err.(*Err) + c.Assert(httpError.Reason, Equals, "Unexpected error") +} + func (s *ClientSuite) Test_newSession(c *C) { cl := NewClientWithOptions(nil, &ClientOptions{ CacheMaxEntries: 2, diff --git a/plumbing/transport/http/receive_pack.go b/plumbing/transport/http/receive_pack.go index 4387ecf..3e736cd 100644 --- a/plumbing/transport/http/receive_pack.go +++ b/plumbing/transport/http/receive_pack.go @@ -102,7 +102,6 @@ func (s *rpSession) doRequest( } if err := NewErr(res); err != nil { - _ = res.Body.Close() return nil, err } diff --git a/plumbing/transport/http/upload_pack.go b/plumbing/transport/http/upload_pack.go index 4f85145..3432618 100644 --- a/plumbing/transport/http/upload_pack.go +++ b/plumbing/transport/http/upload_pack.go @@ -100,7 +100,6 @@ func (s *upSession) doRequest( } if err := NewErr(res); err != nil { - _ = res.Body.Close() return nil, err } diff --git a/plumbing/transport/internal/common/common.go b/plumbing/transport/internal/common/common.go index 5fdf425..9e1d023 100644 --- a/plumbing/transport/internal/common/common.go +++ b/plumbing/transport/internal/common/common.go @@ -11,6 +11,7 @@ import ( "errors" "fmt" "io" + "regexp" "strings" "time" @@ -28,6 +29,10 @@ const ( var ( ErrTimeoutExceeded = errors.New("timeout exceeded") + // stdErrSkipPattern is used for skipping lines from a command's stderr output. + // Any line matching this pattern will be skipped from further + // processing and not be returned to calling code. + stdErrSkipPattern = regexp.MustCompile("^remote:( =*){0,1}$") ) // Commander creates Command instances. This is the main entry point for @@ -149,10 +154,17 @@ func (c *client) listenFirstError(r io.Reader) chan string { errLine := make(chan string, 1) go func() { s := bufio.NewScanner(r) - if s.Scan() { - errLine <- s.Text() - } else { - close(errLine) + for { + if s.Scan() { + line := s.Text() + if !stdErrSkipPattern.MatchString(line) { + errLine <- line + break + } + } else { + close(errLine) + break + } } _, _ = io.Copy(io.Discard, r) @@ -191,9 +203,22 @@ func (s *session) AdvertisedReferencesContext(ctx context.Context) (*packp.AdvRe } func (s *session) handleAdvRefDecodeError(err error) error { + var errLine *pktline.ErrorLine + if errors.As(err, &errLine) { + if isRepoNotFoundError(errLine.Text) { + return transport.ErrRepositoryNotFound + } + + return errLine + } + // If repository is not found, we get empty stdout and server writes an // error to stderr. - if err == packp.ErrEmptyInput { + if errors.Is(err, packp.ErrEmptyInput) { + // TODO:(v6): handle this error in a better way. + // Instead of checking the stderr output for a specific error message, + // define an ExitError and embed the stderr output and exit (if one + // exists) in the error struct. Just like exec.ExitError. s.finished = true if err := s.checkNotFoundError(); err != nil { return err @@ -233,6 +258,12 @@ func (s *session) handleAdvRefDecodeError(err error) error { // returned with the packfile content. The reader must be closed after reading. func (s *session) UploadPack(ctx context.Context, req *packp.UploadPackRequest) (*packp.UploadPackResponse, error) { if req.IsEmpty() { + // XXX: IsEmpty means haves are a subset of wants, in that case we have + // everything we asked for. Close the connection and return nil. + if err := s.finish(); err != nil { + return nil, err + } + // TODO:(v6) return nil here return nil, transport.ErrEmptyUploadPackRequest } @@ -381,54 +412,43 @@ func (s *session) checkNotFoundError() error { return transport.ErrRepositoryNotFound } + // TODO:(v6): return server error just as it is without a prefix return fmt.Errorf("unknown error: %s", line) } } -var ( - githubRepoNotFoundErr = "ERROR: Repository not found." - bitbucketRepoNotFoundErr = "conq: repository does not exist." +const ( + githubRepoNotFoundErr = "Repository not found." + bitbucketRepoNotFoundErr = "repository does not exist." localRepoNotFoundErr = "does not appear to be a git repository" - gitProtocolNotFoundErr = "ERR \n Repository not found." - gitProtocolNoSuchErr = "ERR no such repository" - gitProtocolAccessDeniedErr = "ERR access denied" - gogsAccessDeniedErr = "Gogs: Repository does not exist or you do not have access" + gitProtocolNotFoundErr = "Repository not found." + gitProtocolNoSuchErr = "no such repository" + gitProtocolAccessDeniedErr = "access denied" + gogsAccessDeniedErr = "Repository does not exist or you do not have access" + gitlabRepoNotFoundErr = "The project you were looking for could not be found" ) func isRepoNotFoundError(s string) bool { - if strings.HasPrefix(s, githubRepoNotFoundErr) { - return true - } - - if strings.HasPrefix(s, bitbucketRepoNotFoundErr) { - return true - } - - if strings.HasSuffix(s, localRepoNotFoundErr) { - return true - } - - if strings.HasPrefix(s, gitProtocolNotFoundErr) { - return true - } - - if strings.HasPrefix(s, gitProtocolNoSuchErr) { - return true - } - - if strings.HasPrefix(s, gitProtocolAccessDeniedErr) { - return true - } - - if strings.HasPrefix(s, gogsAccessDeniedErr) { - return true + for _, err := range []string{ + githubRepoNotFoundErr, + bitbucketRepoNotFoundErr, + localRepoNotFoundErr, + gitProtocolNotFoundErr, + gitProtocolNoSuchErr, + gitProtocolAccessDeniedErr, + gogsAccessDeniedErr, + gitlabRepoNotFoundErr, + } { + if strings.Contains(s, err) { + return true + } } return false } // uploadPack implements the git-upload-pack protocol. -func uploadPack(w io.WriteCloser, r io.Reader, req *packp.UploadPackRequest) error { +func uploadPack(w io.WriteCloser, _ io.Reader, req *packp.UploadPackRequest) error { // TODO support multi_ack mode // TODO support multi_ack_detailed mode // TODO support acks for common objects diff --git a/plumbing/transport/internal/common/common_test.go b/plumbing/transport/internal/common/common_test.go index affa787..9344bb6 100644 --- a/plumbing/transport/internal/common/common_test.go +++ b/plumbing/transport/internal/common/common_test.go @@ -4,6 +4,7 @@ import ( "fmt" "testing" + "github.com/go-git/go-git/v5/plumbing/transport" . "gopkg.in/check.v1" ) @@ -21,56 +22,8 @@ func (s *CommonSuite) TestIsRepoNotFoundErrorForUnknownSource(c *C) { c.Assert(isRepoNotFound, Equals, false) } -func (s *CommonSuite) TestIsRepoNotFoundErrorForGithub(c *C) { - msg := fmt.Sprintf("%s : some error stuf", githubRepoNotFoundErr) - - isRepoNotFound := isRepoNotFoundError(msg) - - c.Assert(isRepoNotFound, Equals, true) -} - -func (s *CommonSuite) TestIsRepoNotFoundErrorForBitBucket(c *C) { - msg := fmt.Sprintf("%s : some error stuf", bitbucketRepoNotFoundErr) - - isRepoNotFound := isRepoNotFoundError(msg) - - c.Assert(isRepoNotFound, Equals, true) -} - -func (s *CommonSuite) TestIsRepoNotFoundErrorForLocal(c *C) { - msg := fmt.Sprintf("some error stuf : %s", localRepoNotFoundErr) - - isRepoNotFound := isRepoNotFoundError(msg) - - c.Assert(isRepoNotFound, Equals, true) -} - -func (s *CommonSuite) TestIsRepoNotFoundErrorForGitProtocolNotFound(c *C) { - msg := fmt.Sprintf("%s : some error stuf", gitProtocolNotFoundErr) - - isRepoNotFound := isRepoNotFoundError(msg) - - c.Assert(isRepoNotFound, Equals, true) -} - -func (s *CommonSuite) TestIsRepoNotFoundErrorForGitProtocolNoSuch(c *C) { - msg := fmt.Sprintf("%s : some error stuf", gitProtocolNoSuchErr) - - isRepoNotFound := isRepoNotFoundError(msg) - - c.Assert(isRepoNotFound, Equals, true) -} - -func (s *CommonSuite) TestIsRepoNotFoundErrorForGitProtocolAccessDenied(c *C) { - msg := fmt.Sprintf("%s : some error stuf", gitProtocolAccessDeniedErr) - - isRepoNotFound := isRepoNotFoundError(msg) - - c.Assert(isRepoNotFound, Equals, true) -} - -func (s *CommonSuite) TestIsRepoNotFoundErrorForGogsAccessDenied(c *C) { - msg := fmt.Sprintf("%s : some error stuf", gogsAccessDeniedErr) +func (s *CommonSuite) TestIsRepoNotFoundError(c *C) { + msg := "no such repository : some error stuf" isRepoNotFound := isRepoNotFoundError(msg) @@ -90,3 +43,51 @@ func (s *CommonSuite) TestCheckNotFoundError(c *C) { c.Assert(err, IsNil) } + +func TestAdvertisedReferencesWithRemoteError(t *testing.T) { + tests := []struct { + name string + stderr string + wantErr error + }{ + { + name: "unknown error", + stderr: "something", + wantErr: fmt.Errorf("unknown error: something"), + }, + { + name: "GitLab: repository not found", + stderr: `remote: +remote: ======================================================================== +remote: +remote: ERROR: The project you were looking for could not be found or you don't have permission to view it. + +remote: +remote: ======================================================================== +remote:`, + wantErr: transport.ErrRepositoryNotFound, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := NewClient(MockCommander{stderr: tt.stderr}) + sess, err := client.NewUploadPackSession(nil, nil) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + + _, err = sess.AdvertisedReferences() + + if tt.wantErr != nil { + if tt.wantErr != err { + if tt.wantErr.Error() != err.Error() { + t.Fatalf("expected a different error: got '%s', expected '%s'", err, tt.wantErr) + } + } + } else if err != nil { + t.Fatalf("unexpected error: %s", err) + } + }) + } +} diff --git a/plumbing/transport/internal/common/mocks.go b/plumbing/transport/internal/common/mocks.go new file mode 100644 index 0000000..bc18b27 --- /dev/null +++ b/plumbing/transport/internal/common/mocks.go @@ -0,0 +1,46 @@ +package common + +import ( + "bytes" + "io" + + gogitioutil "github.com/go-git/go-git/v5/utils/ioutil" + + "github.com/go-git/go-git/v5/plumbing/transport" +) + +type MockCommand struct { + stdin bytes.Buffer + stdout bytes.Buffer + stderr bytes.Buffer +} + +func (c MockCommand) StderrPipe() (io.Reader, error) { + return &c.stderr, nil +} + +func (c MockCommand) StdinPipe() (io.WriteCloser, error) { + return gogitioutil.WriteNopCloser(&c.stdin), nil +} + +func (c MockCommand) StdoutPipe() (io.Reader, error) { + return &c.stdout, nil +} + +func (c MockCommand) Start() error { + return nil +} + +func (c MockCommand) Close() error { + panic("not implemented") +} + +type MockCommander struct { + stderr string +} + +func (c MockCommander) Command(cmd string, ep *transport.Endpoint, auth transport.AuthMethod) (Command, error) { + return &MockCommand{ + stderr: *bytes.NewBufferString(c.stderr), + }, nil +} diff --git a/plumbing/transport/server/server.go b/plumbing/transport/server/server.go index 11fa0c8..cf5d6f4 100644 --- a/plumbing/transport/server/server.go +++ b/plumbing/transport/server/server.go @@ -166,7 +166,7 @@ func (s *upSession) UploadPack(ctx context.Context, req *packp.UploadPackRequest return nil, err } - pr, pw := ioutil.Pipe() + pr, pw := io.Pipe() e := packfile.NewEncoder(pw, s.storer, false) go func() { // TODO: plumb through a pack window. diff --git a/plumbing/transport/ssh/common.go b/plumbing/transport/ssh/common.go index 1531603..05dea44 100644 --- a/plumbing/transport/ssh/common.go +++ b/plumbing/transport/ssh/common.go @@ -49,7 +49,9 @@ type runner struct { func (r *runner) Command(cmd string, ep *transport.Endpoint, auth transport.AuthMethod) (common.Command, error) { c := &command{command: cmd, endpoint: ep, config: r.config} if auth != nil { - c.setAuth(auth) + if err := c.setAuth(auth); err != nil { + return nil, err + } } if err := c.connect(); err != nil { @@ -168,7 +170,7 @@ func dial(network, addr string, proxyOpts transport.ProxyOptions, config *ssh.Cl defer cancel() var conn net.Conn - var err error + var dialErr error if proxyOpts.URL != "" { proxyUrl, err := proxyOpts.FullURL() @@ -186,12 +188,12 @@ func dial(network, addr string, proxyOpts transport.ProxyOptions, config *ssh.Cl return nil, fmt.Errorf("expected ssh proxy dialer to be of type %s; got %s", reflect.TypeOf(ctxDialer), reflect.TypeOf(dialer)) } - conn, err = ctxDialer.DialContext(ctx, "tcp", addr) + conn, dialErr = ctxDialer.DialContext(ctx, "tcp", addr) } else { - conn, err = proxy.Dial(ctx, network, addr) + conn, dialErr = proxy.Dial(ctx, network, addr) } - if err != nil { - return nil, err + if dialErr != nil { + return nil, dialErr } c, chans, reqs, err := ssh.NewClientConn(conn, addr, config) diff --git a/plumbing/transport/ssh/common_test.go b/plumbing/transport/ssh/common_test.go index 496e82d..a724936 100644 --- a/plumbing/transport/ssh/common_test.go +++ b/plumbing/transport/ssh/common_test.go @@ -172,6 +172,28 @@ func (s *SuiteCommon) TestIssue70(c *C) { c.Assert(err, IsNil) } +/* +Given, an endpoint to a git server with a socks5 proxy URL, +When, the socks5 proxy server is not reachable, +Then, there should not be any panic and an error with appropriate message should be returned. +Related issue : https://github.com/go-git/go-git/pull/900 +*/ +func (s *SuiteCommon) TestInvalidSocks5Proxy(c *C) { + ep, err := transport.NewEndpoint("git@github.com:foo/bar.git") + c.Assert(err, IsNil) + ep.Proxy.URL = "socks5://127.0.0.1:1080" + + auth, err := NewPublicKeys("foo", testdata.PEMBytes["rsa"], "") + c.Assert(err, IsNil) + c.Assert(auth, NotNil) + + ps, err := DefaultClient.NewUploadPackSession(ep, auth) + //Since the proxy server is not running, we expect an error. + c.Assert(ps, IsNil) + c.Assert(err, NotNil) + c.Assert(err, ErrorMatches, "socks connect .* dial tcp 127.0.0.1:1080: .*") +} + type mockSSHConfig struct { Values map[string]map[string]string } @@ -184,3 +206,26 @@ func (c *mockSSHConfig) Get(alias, key string) string { return a[key] } + +type invalidAuthMethod struct { +} + +func (a *invalidAuthMethod) Name() string { + return "invalid" +} + +func (a *invalidAuthMethod) String() string { + return "invalid" +} + +func (s *SuiteCommon) TestCommandWithInvalidAuthMethod(c *C) { + uploadPack := &UploadPackSuite{} + uploadPack.SetUpSuite(c) + r := &runner{} + auth := &invalidAuthMethod{} + + _, err := r.Command("command", uploadPack.newEndpoint(c, "endpoint"), auth) + + c.Assert(err, NotNil) + c.Assert(err, ErrorMatches, "invalid auth method") +} |