From 565d0b13ea802b61352f992bf1058f0f984aa528 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 22 Apr 2019 12:03:01 +0200 Subject: plumbing: format/commitgraph, add APIs for reading and writing commit-graph files Signed-off-by: Filip Navara --- plumbing/format/commitgraph/encoder.go | 151 +++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 plumbing/format/commitgraph/encoder.go (limited to 'plumbing/format/commitgraph/encoder.go') diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go new file mode 100644 index 0000000..aac4d0e --- /dev/null +++ b/plumbing/format/commitgraph/encoder.go @@ -0,0 +1,151 @@ +package commitgraph + +import ( + "crypto/sha1" + "hash" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +// Encoder writes MemoryIndex structs to an output stream. +type Encoder struct { + io.Writer + hash hash.Hash +} + +// NewEncoder returns a new stream encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := sha1.New() + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +func (e *Encoder) Encode(idx Index) error { + var err error + + // Get all the hashes in the memory index + hashes := idx.Hashes() + + // Sort the hashes and build our index + plumbing.HashesSort(hashes) + hashToIndex := make(map[plumbing.Hash]uint32) + hashFirstToCount := make(map[byte]uint32) + for i, hash := range hashes { + hashToIndex[hash] = uint32(i) + hashFirstToCount[hash[0]]++ + } + + // Find out if we will need large edge table + largeEdgesCount := 0 + for i := 0; i < len(hashes); i++ { + v, _ := idx.GetNodeByIndex(i) + if len(v.ParentHashes) > 2 { + largeEdgesCount += len(v.ParentHashes) - 2 + break + } + } + + chunks := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature} + chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36} + if largeEdgesCount > 0 { + chunks = append(chunks, largeEdgeListSignature) + chunkSizes = append(chunkSizes, uint64(largeEdgesCount)*4) + } + + // Write header + if _, err = e.Write(commitFileSignature); err == nil { + _, err = e.Write([]byte{1, 1, byte(len(chunks)), 0}) + } + if err != nil { + return err + } + + // Write chunk headers + offset := uint64(8 + len(chunks)*12 + 12) + for i, signature := range chunks { + if _, err = e.Write(signature); err == nil { + err = binary.WriteUint64(e, offset) + } + if err != nil { + return err + } + offset += chunkSizes[i] + } + if _, err = e.Write([]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); err != nil { + return err + } + + // Write fanout + var cumulative uint32 + for i := 0; i <= 0xff; i++ { + if err = binary.WriteUint32(e, hashFirstToCount[byte(i)]+cumulative); err != nil { + return err + } + cumulative += hashFirstToCount[byte(i)] + } + + // Write OID lookup + for _, hash := range hashes { + if _, err = e.Write(hash[:]); err != nil { + return err + } + } + + // Write commit data + var largeEdges []uint32 + for _, hash := range hashes { + origIndex, _ := idx.GetIndexByHash(hash) + commitData, _ := idx.GetNodeByIndex(origIndex) + if _, err := e.Write(commitData.TreeHash[:]); err != nil { + return err + } + + var parent1, parent2 uint32 + if len(commitData.ParentHashes) == 0 { + parent1 = parentNone + parent2 = parentNone + } else if len(commitData.ParentHashes) == 1 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = parentNone + } else if len(commitData.ParentHashes) == 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = hashToIndex[commitData.ParentHashes[1]] + } else if len(commitData.ParentHashes) > 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = uint32(len(largeEdges)) | parentOctopusUsed + for _, parentHash := range commitData.ParentHashes[1:] { + largeEdges = append(largeEdges, hashToIndex[parentHash]) + } + largeEdges[len(largeEdges)-1] |= parentLast + } + + if err = binary.WriteUint32(e, parent1); err == nil { + err = binary.WriteUint32(e, parent2) + } + if err != nil { + return err + } + + unixTime := uint64(commitData.When.Unix()) + unixTime |= uint64(commitData.Generation) << 34 + if err = binary.WriteUint64(e, unixTime); err != nil { + return err + } + } + + // Write large edges if necessary + for _, parent := range largeEdges { + if err = binary.WriteUint32(e, parent); err != nil { + return err + } + } + + // Write checksum + if _, err := e.Write(e.hash.Sum(nil)[:20]); err != nil { + return err + } + + return nil +} -- cgit From 5b30f0c466090923f4f821f05b94573f1336b633 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Apr 2019 14:51:52 +0200 Subject: Split Encoder into smaller functions Signed-off-by: Filip Navara --- plumbing/format/commitgraph/encoder.go | 124 +++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 43 deletions(-) (limited to 'plumbing/format/commitgraph/encoder.go') diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go index aac4d0e..a4c5ad3 100644 --- a/plumbing/format/commitgraph/encoder.go +++ b/plumbing/format/commitgraph/encoder.go @@ -25,81 +25,118 @@ func NewEncoder(w io.Writer) *Encoder { func (e *Encoder) Encode(idx Index) error { var err error - // Get all the hashes in the memory index + // Get all the hashes in the input index hashes := idx.Hashes() + // Sort the inout and prepare helper structures we'll need for encoding + hashToIndex, fanout, largeEdgesCount := e.prepare(idx, hashes) + + chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature} + chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36} + if largeEdgesCount > 0 { + chunkSignatures = append(chunkSignatures, largeEdgeListSignature) + chunkSizes = append(chunkSizes, uint64(largeEdgesCount)*4) + } + + if err = e.encodeFileHeader(len(chunkSignatures)); err != nil { + return err + } + if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { + return err + } + if err = e.encodeFanout(fanout); err != nil { + return err + } + if err = e.encodeOidLookup(hashes); err != nil { + return err + } + if largeEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil { + if err = e.encodeLargeEdges(largeEdges); err != nil { + return err + } + } + if err != nil { + return err + } + return e.encodeChecksum() +} + +func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, largeEdgesCount uint32) { // Sort the hashes and build our index plumbing.HashesSort(hashes) - hashToIndex := make(map[plumbing.Hash]uint32) - hashFirstToCount := make(map[byte]uint32) + hashToIndex = make(map[plumbing.Hash]uint32) + fanout = make([]uint32, 256) for i, hash := range hashes { hashToIndex[hash] = uint32(i) - hashFirstToCount[hash[0]]++ + fanout[hash[0]]++ + } + + // Convert the fanout to cumulative values + for i := 1; i <= 0xff; i++ { + fanout[i] += fanout[i-1] } // Find out if we will need large edge table - largeEdgesCount := 0 for i := 0; i < len(hashes); i++ { v, _ := idx.GetNodeByIndex(i) if len(v.ParentHashes) > 2 { - largeEdgesCount += len(v.ParentHashes) - 2 + largeEdgesCount += uint32(len(v.ParentHashes) - 2) break } } - chunks := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature} - chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36} - if largeEdgesCount > 0 { - chunks = append(chunks, largeEdgeListSignature) - chunkSizes = append(chunkSizes, uint64(largeEdgesCount)*4) - } + return +} - // Write header +func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { if _, err = e.Write(commitFileSignature); err == nil { - _, err = e.Write([]byte{1, 1, byte(len(chunks)), 0}) - } - if err != nil { - return err + _, err = e.Write([]byte{1, 1, byte(chunkCount), 0}) } + return +} - // Write chunk headers - offset := uint64(8 + len(chunks)*12 + 12) - for i, signature := range chunks { +func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { + // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator + offset := uint64(8 + len(chunkSignatures)*12 + 12) + for i, signature := range chunkSignatures { if _, err = e.Write(signature); err == nil { err = binary.WriteUint64(e, offset) } if err != nil { - return err + return } offset += chunkSizes[i] } - if _, err = e.Write([]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); err != nil { - return err + if _, err = e.Write([]byte{0, 0, 0, 0}); err == nil { + err = binary.WriteUint64(e, offset) } + return +} - // Write fanout - var cumulative uint32 +func (e *Encoder) encodeFanout(fanout []uint32) (err error) { for i := 0; i <= 0xff; i++ { - if err = binary.WriteUint32(e, hashFirstToCount[byte(i)]+cumulative); err != nil { - return err + if err = binary.WriteUint32(e, fanout[i]); err != nil { + return } - cumulative += hashFirstToCount[byte(i)] } + return +} - // Write OID lookup +func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { for _, hash := range hashes { if _, err = e.Write(hash[:]); err != nil { return err } } + return +} - // Write commit data - var largeEdges []uint32 +func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (largeEdges []uint32, err error) { for _, hash := range hashes { origIndex, _ := idx.GetIndexByHash(hash) commitData, _ := idx.GetNodeByIndex(origIndex) - if _, err := e.Write(commitData.TreeHash[:]); err != nil { - return err + if _, err = e.Write(commitData.TreeHash[:]); err != nil { + return } var parent1, parent2 uint32 @@ -125,27 +162,28 @@ func (e *Encoder) Encode(idx Index) error { err = binary.WriteUint32(e, parent2) } if err != nil { - return err + return } unixTime := uint64(commitData.When.Unix()) unixTime |= uint64(commitData.Generation) << 34 if err = binary.WriteUint64(e, unixTime); err != nil { - return err + return } } + return +} - // Write large edges if necessary +func (e *Encoder) encodeLargeEdges(largeEdges []uint32) (err error) { for _, parent := range largeEdges { if err = binary.WriteUint32(e, parent); err != nil { - return err + return } } + return +} - // Write checksum - if _, err := e.Write(e.hash.Sum(nil)[:20]); err != nil { - return err - } - - return nil +func (e *Encoder) encodeChecksum() error { + _, err := e.Write(e.hash.Sum(nil)[:20]) + return err } -- cgit From 5630ac19fc41ba608700981531f941137a17c612 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Apr 2019 15:12:26 +0200 Subject: Split OpenFileIndex into smaller functions Signed-off-by: Filip Navara --- plumbing/format/commitgraph/encoder.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'plumbing/format/commitgraph/encoder.go') diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go index a4c5ad3..b4a6978 100644 --- a/plumbing/format/commitgraph/encoder.go +++ b/plumbing/format/commitgraph/encoder.go @@ -107,7 +107,7 @@ func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint } offset += chunkSizes[i] } - if _, err = e.Write([]byte{0, 0, 0, 0}); err == nil { + if _, err = e.Write(lastSignature); err == nil { err = binary.WriteUint64(e, offset) } return -- cgit From ab5b89c4d4ba616ff125c3d489c1e5881c2dd14b Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 23 Apr 2019 15:20:32 +0200 Subject: Remove debug print, fix large edge count calculation to make the reencoded file binary identical Signed-off-by: Filip Navara --- plumbing/format/commitgraph/encoder.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'plumbing/format/commitgraph/encoder.go') diff --git a/plumbing/format/commitgraph/encoder.go b/plumbing/format/commitgraph/encoder.go index b4a6978..501b09e 100644 --- a/plumbing/format/commitgraph/encoder.go +++ b/plumbing/format/commitgraph/encoder.go @@ -80,7 +80,7 @@ func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[pl for i := 0; i < len(hashes); i++ { v, _ := idx.GetNodeByIndex(i) if len(v.ParentHashes) > 2 { - largeEdgesCount += uint32(len(v.ParentHashes) - 2) + largeEdgesCount += uint32(len(v.ParentHashes) - 1) break } } -- cgit