package v2 import ( "bytes" "crypto" encbin "encoding/binary" "errors" "io" "time" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/hash" "github.com/go-git/go-git/v5/utils/binary" ) var ( // ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph // file version is not supported. ErrUnsupportedVersion = errors.New("unsupported version") // ErrUnsupportedHash is returned by OpenFileIndex when the commit graph // hash function is not supported. Currently only SHA-1 is defined and // supported. ErrUnsupportedHash = errors.New("unsupported hash algorithm") // ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit // graph file is corrupted. ErrMalformedCommitGraphFile = errors.New("malformed commit graph file") commitFileSignature = []byte{'C', 'G', 'P', 'H'} parentNone = uint32(0x70000000) parentOctopusUsed = uint32(0x80000000) parentOctopusMask = uint32(0x7fffffff) parentLast = uint32(0x80000000) ) const ( szUint32 = 4 szUint64 = 8 szSignature = 4 szHeader = 4 szCommitData = 2*szUint32 + szUint64 lenFanout = 256 ) type fileIndex struct { reader ReaderAtCloser fanout [lenFanout]uint32 offsets [lenChunks]int64 parent Index hasGenerationV2 bool minimumNumberOfHashes uint32 } // ReaderAtCloser is an interface that combines io.ReaderAt and io.Closer. type ReaderAtCloser interface { io.ReaderAt io.Closer } // OpenFileIndex opens a serialized commit graph file in the format described at // https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt func OpenFileIndex(reader ReaderAtCloser) (Index, error) { return OpenFileIndexWithParent(reader, nil) } // OpenFileIndexWithParent opens a serialized commit graph file in the format described at // https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt func OpenFileIndexWithParent(reader ReaderAtCloser, parent Index) (Index, error) { if reader == nil { return nil, io.ErrUnexpectedEOF } fi := &fileIndex{reader: reader, parent: parent} if err := fi.verifyFileHeader(); err != nil { return nil, err } if err := fi.readChunkHeaders(); err != nil { return nil, err } if err := fi.readFanout(); err != nil { return nil, err } fi.hasGenerationV2 = fi.offsets[GenerationDataChunk] > 0 if fi.parent != nil { fi.hasGenerationV2 = fi.hasGenerationV2 && fi.parent.HasGenerationV2() } if fi.parent != nil { fi.minimumNumberOfHashes = fi.parent.MaximumNumberOfHashes() } return fi, nil } // Close closes the underlying reader and the parent index if it exists. func (fi *fileIndex) Close() (err error) { if fi.parent != nil { defer func() { parentErr := fi.parent.Close() // only report the error from the parent if there is no error from the reader if err == nil { err = parentErr } }() } err = fi.reader.Close() return } func (fi *fileIndex) verifyFileHeader() error { // Verify file signature signature := make([]byte, szSignature) if _, err := fi.reader.ReadAt(signature, 0); err != nil { return err } if !bytes.Equal(signature, commitFileSignature) { return ErrMalformedCommitGraphFile } // Read and verify the file header header := make([]byte, szHeader) if _, err := fi.reader.ReadAt(header, szHeader); err != nil { return err } if header[0] != 1 { return ErrUnsupportedVersion } if !(hash.CryptoType == crypto.SHA1 && header[1] == 1) && !(hash.CryptoType == crypto.SHA256 && header[1] == 2) { // Unknown hash type / unsupported hash type return ErrUnsupportedHash } return nil } func (fi *fileIndex) readChunkHeaders() error { // The chunk table is a list of 4-byte chunk signatures and uint64 offsets into the file chunkID := make([]byte, szChunkSig) for i := 0; ; i++ { chunkHeader := io.NewSectionReader(fi.reader, szSignature+szHeader+(int64(i)*(szChunkSig+szUint64)), szChunkSig+szUint64) if _, err := io.ReadAtLeast(chunkHeader, chunkID, szChunkSig); err != nil { return err } chunkOffset, err := binary.ReadUint64(chunkHeader) if err != nil { return err } chunkType, ok := ChunkTypeFromBytes(chunkID) if !ok { continue } if chunkType == ZeroChunk || int(chunkType) >= len(fi.offsets) { break } fi.offsets[chunkType] = int64(chunkOffset) } if fi.offsets[OIDFanoutChunk] <= 0 || fi.offsets[OIDLookupChunk] <= 0 || fi.offsets[CommitDataChunk] <= 0 { return ErrMalformedCommitGraphFile } return nil } func (fi *fileIndex) readFanout() error { // The Fanout table is a 256 entry table of the number (as uint32) of OIDs with first byte at most i. // Thus F[255] stores the total number of commits (N) fanoutReader := io.NewSectionReader(fi.reader, fi.offsets[OIDFanoutChunk], lenFanout*szUint32) for i := 0; i < 256; i++ { fanoutValue, err := binary.ReadUint32(fanoutReader) if err != nil { return err } if fanoutValue > 0x7fffffff { return ErrMalformedCommitGraphFile } fi.fanout[i] = fanoutValue } return nil } // GetIndexByHash looks up the provided hash in the commit-graph fanout and returns the index of the commit data for the given hash. func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (uint32, error) { var oid plumbing.Hash // Find the hash in the oid lookup table var low uint32 if h[0] == 0 { low = 0 } else { low = fi.fanout[h[0]-1] } high := fi.fanout[h[0]] for low < high { mid := (low + high) >> 1 offset := fi.offsets[OIDLookupChunk] + int64(mid)*hash.Size if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { return 0, err } cmp := bytes.Compare(h[:], oid[:]) if cmp < 0 { high = mid } else if cmp == 0 { return mid + fi.minimumNumberOfHashes, nil } else { low = mid + 1 } } if fi.parent != nil { idx, err := fi.parent.GetIndexByHash(h) if err != nil { return 0, err } return idx, nil } return 0, plumbing.ErrObjectNotFound } // GetCommitDataByIndex returns the commit data for the given index in the commit-graph. func (fi *fileIndex) GetCommitDataByIndex(idx uint32) (*CommitData, error) { if idx < fi.minimumNumberOfHashes { if fi.parent != nil { data, err := fi.parent.GetCommitDataByIndex(idx) if err != nil { return nil, err } return data, nil } return nil, plumbing.ErrObjectNotFound } idx -= fi.minimumNumberOfHashes if idx >= fi.fanout[0xff] { return nil, plumbing.ErrObjectNotFound } offset := fi.offsets[CommitDataChunk] + int64(idx)*(hash.Size+szCommitData) commitDataReader := io.NewSectionReader(fi.reader, offset, hash.Size+szCommitData) treeHash, err := binary.ReadHash(commitDataReader) if err != nil { return nil, err } parent1, err := binary.ReadUint32(commitDataReader) if err != nil { return nil, err } parent2, err := binary.ReadUint32(commitDataReader) if err != nil { return nil, err } genAndTime, err := binary.ReadUint64(commitDataReader) if err != nil { return nil, err } var parentIndexes []uint32 if parent2&parentOctopusUsed == parentOctopusUsed { // Octopus merge - Look-up the extra parents from the extra edge list // The extra edge list is a list of uint32s, each of which is an index into the Commit Data table, terminated by a index with the most significant bit on. parentIndexes = []uint32{parent1 & parentOctopusMask} offset := fi.offsets[ExtraEdgeListChunk] + szUint32*int64(parent2&parentOctopusMask) buf := make([]byte, szUint32) for { _, err := fi.reader.ReadAt(buf, offset) if err != nil { return nil, err } parent := encbin.BigEndian.Uint32(buf) offset += szUint32 parentIndexes = append(parentIndexes, parent&parentOctopusMask) if parent&parentLast == parentLast { break } } } else if parent2 != parentNone { parentIndexes = []uint32{parent1 & parentOctopusMask, parent2 & parentOctopusMask} } else if parent1 != parentNone { parentIndexes = []uint32{parent1 & parentOctopusMask} } parentHashes, err := fi.getHashesFromIndexes(parentIndexes) if err != nil { return nil, err } generationV2 := uint64(0) if fi.hasGenerationV2 { // set the GenerationV2 result to the commit time generationV2 = uint64(genAndTime & 0x3FFFFFFFF) // Next read the generation (offset) data from the generation data chunk offset := fi.offsets[GenerationDataChunk] + int64(idx)*szUint32 buf := make([]byte, szUint32) if _, err := fi.reader.ReadAt(buf, offset); err != nil { return nil, err } genV2Data := encbin.BigEndian.Uint32(buf) // check if the data is an overflow that needs to be looked up in the overflow chunk if genV2Data&0x80000000 > 0 { // Overflow offset := fi.offsets[GenerationDataOverflowChunk] + int64(genV2Data&0x7fffffff)*szUint64 buf := make([]byte, 8) if _, err := fi.reader.ReadAt(buf, offset); err != nil { return nil, err } generationV2 += encbin.BigEndian.Uint64(buf) } else { generationV2 += uint64(genV2Data) } } return &CommitData{ TreeHash: treeHash, ParentIndexes: parentIndexes, ParentHashes: parentHashes, Generation: genAndTime >> 34, GenerationV2: generationV2, When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0), }, nil } // GetHashByIndex looks up the hash for the given index in the commit-graph. func (fi *fileIndex) GetHashByIndex(idx uint32) (found plumbing.Hash, err error) { if idx < fi.minimumNumberOfHashes { if fi.parent != nil { return fi.parent.GetHashByIndex(idx) } return found, ErrMalformedCommitGraphFile } idx -= fi.minimumNumberOfHashes if idx >= fi.fanout[0xff] { return found, ErrMalformedCommitGraphFile } offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size if _, err := fi.reader.ReadAt(found[:], offset); err != nil { return found, err } return found, nil } func (fi *fileIndex) getHashesFromIndexes(indexes []uint32) ([]plumbing.Hash, error) { hashes := make([]plumbing.Hash, len(indexes)) for i, idx := range indexes { if idx < fi.minimumNumberOfHashes { if fi.parent != nil { hash, err := fi.parent.GetHashByIndex(idx) if err != nil { return nil, err } hashes[i] = hash continue } return nil, ErrMalformedCommitGraphFile } idx -= fi.minimumNumberOfHashes if idx >= fi.fanout[0xff] { return nil, ErrMalformedCommitGraphFile } offset := fi.offsets[OIDLookupChunk] + int64(idx)*hash.Size if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil { return nil, err } } return hashes, nil } // Hashes returns all the hashes that are available in the index. func (fi *fileIndex) Hashes() []plumbing.Hash { hashes := make([]plumbing.Hash, fi.fanout[0xff]+fi.minimumNumberOfHashes) for i := uint32(0); i < fi.minimumNumberOfHashes; i++ { hash, err := fi.parent.GetHashByIndex(i) if err != nil { return nil } hashes[i] = hash } for i := uint32(0); i < fi.fanout[0xff]; i++ { offset := fi.offsets[OIDLookupChunk] + int64(i)*hash.Size if n, err := fi.reader.ReadAt(hashes[i+fi.minimumNumberOfHashes][:], offset); err != nil || n < hash.Size { return nil } } return hashes } func (fi *fileIndex) HasGenerationV2() bool { return fi.hasGenerationV2 } func (fi *fileIndex) MaximumNumberOfHashes() uint32 { return fi.minimumNumberOfHashes + fi.fanout[0xff] }