aboutsummaryrefslogblamecommitdiffstats
path: root/plumbing/format/packfile/scanner.go
blob: 5d9e8fb65aab47f4b28e8cf933ef77ad6131992a (plain) (tree)
1
2
3
4
5
6
7
8
9


                
               

                       
             

                    
            
                             
              
 


                                                  









                                                                                                     

                                                              

 

                                                                                
                          
                                           

                             
                                     


                             
                     
                          
                       


                                                                               

                                      



                                                                                

 


                                                                          
                                  
 
                              
                        
                                                     

                                
         

 










                                      


                                                                                
                                                                 



                                                
                                     





                                              

         
                                     



                                     
                                      
                           



                       
                                           



                                                                     
                                    
                           
              

 
                                                                      
                                                   
                                 
                                                        





                                    
                                                                 
                                                     
                                          

 
                                                                 
                                                 
                                     

 
                                                                           
                                                                    
                                                     


                                    
                                                                        
                                               
                                     

 




















                                                                          

                                                                              
                                             
                               

         
















                                                                              
                   

                     
                            
                           

                     
                                                   
                       
                               

         
                                                           

                               

         
                       
                                     
                                                           



                                       
                                                 
                                     
                             
                                                       







                                       











                                                      
                                                 




                                   
                                                    
                       
                          

         







                                                                             



                                                                    
                                                                                 
                                 



                                
                                 



                        
                                                                 

                     
                                                
                                                     
         
 




                           

                                                                     

 

                                                                     
                                                         




                                                

                                                        









                                                      

                                                                             
                                                                                    
                             
                                      

                   
                             

                     
              

 














                                                                   
                                                           
                                                          
                                                                

                                                  
 

                                                                 

         
                                         
                                           
                                          
                              
              

 





                                            


                                                                              
                                                                         



                                            








                                                   
                                                
                                                     

                                        
                                             

         
                                   

 
                                      
                                 


                                                            


                  
                                
                                 


                  













                                                                               

 




                                                                
         
                   
 
                 

 



                                                 
 


                                                            
         
 
 

                                                           
 




                                                      

 






                                                        

 

                                       

 



                                                                              
 






                                                            
                 
 

                                                           

         
                            
 
package packfile

import (
	"bufio"
	"bytes"
	"compress/zlib"
	"fmt"
	"hash"
	"hash/crc32"
	"io"
	stdioutil "io/ioutil"
	"sync"

	"github.com/go-git/go-git/v5/plumbing"
	"github.com/go-git/go-git/v5/utils/binary"
	"github.com/go-git/go-git/v5/utils/ioutil"
)

var (
	// ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile
	ErrEmptyPackfile = NewError("empty packfile")
	// ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect.
	ErrBadSignature = NewError("malformed pack file signature")
	// ErrUnsupportedVersion is returned by ReadHeader when the packfile version is
	// different than VersionSupported.
	ErrUnsupportedVersion = NewError("unsupported packfile version")
	// ErrSeekNotSupported returned if seek is not support
	ErrSeekNotSupported = NewError("not seek support")
)

// ObjectHeader contains the information related to the object, this information
// is collected from the previous bytes to the content of the object.
type ObjectHeader struct {
	Type            plumbing.ObjectType
	Offset          int64
	Length          int64
	Reference       plumbing.Hash
	OffsetReference int64
}

type Scanner struct {
	r   *scannerReader
	crc hash.Hash32

	// pendingObject is used to detect if an object has been read, or still
	// is waiting to be read
	pendingObject    *ObjectHeader
	version, objects uint32

	// lsSeekable says if this scanner can do Seek or not, to have a Scanner
	// seekable a r implementing io.Seeker is required
	IsSeekable bool
}

// NewScanner returns a new Scanner based on a reader, if the given reader
// implements io.ReadSeeker the Scanner will be also Seekable
func NewScanner(r io.Reader) *Scanner {
	_, ok := r.(io.ReadSeeker)

	crc := crc32.NewIEEE()
	return &Scanner{
		r:          newScannerReader(r, crc),
		crc:        crc,
		IsSeekable: ok,
	}
}

func (s *Scanner) Reset(r io.Reader) {
	_, ok := r.(io.ReadSeeker)

	s.r.Reset(r)
	s.crc.Reset()
	s.IsSeekable = ok
	s.pendingObject = nil
	s.version = 0
	s.objects = 0
}

// Header reads the whole packfile header (signature, version and object count).
// It returns the version and the object count and performs checks on the
// validity of the signature and the version fields.
func (s *Scanner) Header() (version, objects uint32, err error) {
	if s.version != 0 {
		return s.version, s.objects, nil
	}

	sig, err := s.readSignature()
	if err != nil {
		if err == io.EOF {
			err = ErrEmptyPackfile
		}

		return
	}

	if !s.isValidSignature(sig) {
		err = ErrBadSignature
		return
	}

	version, err = s.readVersion()
	s.version = version
	if err != nil {
		return
	}

	if !s.isSupportedVersion(version) {
		err = ErrUnsupportedVersion.AddDetails("%d", version)
		return
	}

	objects, err = s.readCount()
	s.objects = objects
	return
}

// readSignature reads an returns the signature field in the packfile.
func (s *Scanner) readSignature() ([]byte, error) {
	var sig = make([]byte, 4)
	if _, err := io.ReadFull(s.r, sig); err != nil {
		return []byte{}, err
	}

	return sig, nil
}

// isValidSignature returns if sig is a valid packfile signature.
func (s *Scanner) isValidSignature(sig []byte) bool {
	return bytes.Equal(sig, signature)
}

// readVersion reads and returns the version field of a packfile.
func (s *Scanner) readVersion() (uint32, error) {
	return binary.ReadUint32(s.r)
}

// isSupportedVersion returns whether version v is supported by the parser.
// The current supported version is VersionSupported, defined above.
func (s *Scanner) isSupportedVersion(v uint32) bool {
	return v == VersionSupported
}

// readCount reads and returns the count of objects field of a packfile.
func (s *Scanner) readCount() (uint32, error) {
	return binary.ReadUint32(s.r)
}

// SeekObjectHeader seeks to specified offset and returns the ObjectHeader
// for the next object in the reader
func (s *Scanner) SeekObjectHeader(offset int64) (*ObjectHeader, error) {
	// if seeking we assume that you are not interested in the header
	if s.version == 0 {
		s.version = VersionSupported
	}

	if _, err := s.r.Seek(offset, io.SeekStart); err != nil {
		return nil, err
	}

	h, err := s.nextObjectHeader()
	if err != nil {
		return nil, err
	}

	h.Offset = offset
	return h, nil
}

// NextObjectHeader returns the ObjectHeader for the next object in the reader
func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) {
	if err := s.doPending(); err != nil {
		return nil, err
	}

	offset, err := s.r.Seek(0, io.SeekCurrent)
	if err != nil {
		return nil, err
	}

	h, err := s.nextObjectHeader()
	if err != nil {
		return nil, err
	}

	h.Offset = offset
	return h, nil
}

// nextObjectHeader returns the ObjectHeader for the next object in the reader
// without the Offset field
func (s *Scanner) nextObjectHeader() (*ObjectHeader, error) {
	s.r.Flush()
	s.crc.Reset()

	h := &ObjectHeader{}
	s.pendingObject = h

	var err error
	h.Offset, err = s.r.Seek(0, io.SeekCurrent)
	if err != nil {
		return nil, err
	}

	h.Type, h.Length, err = s.readObjectTypeAndLength()
	if err != nil {
		return nil, err
	}

	switch h.Type {
	case plumbing.OFSDeltaObject:
		no, err := binary.ReadVariableWidthInt(s.r)
		if err != nil {
			return nil, err
		}

		h.OffsetReference = h.Offset - no
	case plumbing.REFDeltaObject:
		var err error
		h.Reference, err = binary.ReadHash(s.r)
		if err != nil {
			return nil, err
		}
	}

	return h, nil
}

func (s *Scanner) doPending() error {
	if s.version == 0 {
		var err error
		s.version, s.objects, err = s.Header()
		if err != nil {
			return err
		}
	}

	return s.discardObjectIfNeeded()
}

func (s *Scanner) discardObjectIfNeeded() error {
	if s.pendingObject == nil {
		return nil
	}

	h := s.pendingObject
	n, _, err := s.NextObject(stdioutil.Discard)
	if err != nil {
		return err
	}

	if n != h.Length {
		return fmt.Errorf(
			"error discarding object, discarded %d, expected %d",
			n, h.Length,
		)
	}

	return nil
}

// ReadObjectTypeAndLength reads and returns the object type and the
// length field from an object entry in a packfile.
func (s *Scanner) readObjectTypeAndLength() (plumbing.ObjectType, int64, error) {
	t, c, err := s.readType()
	if err != nil {
		return t, 0, err
	}

	l, err := s.readLength(c)

	return t, l, err
}

func (s *Scanner) readType() (plumbing.ObjectType, byte, error) {
	var c byte
	var err error
	if c, err = s.r.ReadByte(); err != nil {
		return plumbing.ObjectType(0), 0, err
	}

	typ := parseType(c)

	return typ, c, nil
}

func parseType(b byte) plumbing.ObjectType {
	return plumbing.ObjectType((b & maskType) >> firstLengthBits)
}

// the length is codified in the last 4 bits of the first byte and in
// the last 7 bits of subsequent bytes.  Last byte has a 0 MSB.
func (s *Scanner) readLength(first byte) (int64, error) {
	length := int64(first & maskFirstLength)

	c := first
	shift := firstLengthBits
	var err error
	for c&maskContinue > 0 {
		if c, err = s.r.ReadByte(); err != nil {
			return 0, err
		}

		length += int64(c&maskLength) << shift
		shift += lengthBits
	}

	return length, nil
}

// NextObject writes the content of the next object into the reader, returns
// the number of bytes written, the CRC32 of the content and an error, if any
func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err error) {
	s.pendingObject = nil
	written, err = s.copyObject(w)

	s.r.Flush()
	crc32 = s.crc.Sum32()
	s.crc.Reset()

	return
}

// ReadObject returns a reader for the object content and an error
func (s *Scanner) ReadObject() (io.ReadCloser, error) {
	s.pendingObject = nil
	zr := zlibReaderPool.Get().(io.ReadCloser)

	if err := zr.(zlib.Resetter).Reset(s.r, nil); err != nil {
		return nil, fmt.Errorf("zlib reset error: %s", err)
	}

	return ioutil.NewReadCloserWithCloser(zr, func() error {
		zlibReaderPool.Put(zr)
		return nil
	}), nil
}

// ReadRegularObject reads and write a non-deltified object
// from it zlib stream in an object entry in the packfile.
func (s *Scanner) copyObject(w io.Writer) (n int64, err error) {
	zr := zlibReaderPool.Get().(io.ReadCloser)
	defer zlibReaderPool.Put(zr)

	if err = zr.(zlib.Resetter).Reset(s.r, nil); err != nil {
		return 0, fmt.Errorf("zlib reset error: %s", err)
	}

	defer ioutil.CheckClose(zr, &err)
	buf := byteSlicePool.Get().([]byte)
	n, err = io.CopyBuffer(w, zr, buf)
	byteSlicePool.Put(buf)
	return
}

var byteSlicePool = sync.Pool{
	New: func() interface{} {
		return make([]byte, 32*1024)
	},
}

// SeekFromStart sets a new offset from start, returns the old position before
// the change.
func (s *Scanner) SeekFromStart(offset int64) (previous int64, err error) {
	// if seeking we assume that you are not interested in the header
	if s.version == 0 {
		s.version = VersionSupported
	}

	previous, err = s.r.Seek(0, io.SeekCurrent)
	if err != nil {
		return -1, err
	}

	_, err = s.r.Seek(offset, io.SeekStart)
	return previous, err
}

// Checksum returns the checksum of the packfile
func (s *Scanner) Checksum() (plumbing.Hash, error) {
	err := s.discardObjectIfNeeded()
	if err != nil {
		return plumbing.ZeroHash, err
	}

	return binary.ReadHash(s.r)
}

// Close reads the reader until io.EOF
func (s *Scanner) Close() error {
	buf := byteSlicePool.Get().([]byte)
	_, err := io.CopyBuffer(stdioutil.Discard, s.r, buf)
	byteSlicePool.Put(buf)
	return err
}

// Flush is a no-op (deprecated)
func (s *Scanner) Flush() error {
	return nil
}

// scannerReader has the following characteristics:
// - Provides an io.SeekReader impl for bufio.Reader, when the underlying
//   reader supports it.
// - Keeps track of the current read position, for when the underlying reader
//   isn't an io.SeekReader, but we still want to know the current offset.
// - Writes to the hash writer what it reads, with the aid of a smaller buffer.
//   The buffer helps avoid a performance penality for performing small writes
//   to the crc32 hash writer.
type scannerReader struct {
	reader io.Reader
	crc    io.Writer
	rbuf   *bufio.Reader
	wbuf   *bufio.Writer
	offset int64
}

func newScannerReader(r io.Reader, h io.Writer) *scannerReader {
	sr := &scannerReader{
		rbuf: bufio.NewReader(nil),
		wbuf: bufio.NewWriterSize(nil, 64),
		crc:  h,
	}
	sr.Reset(r)

	return sr
}

func (r *scannerReader) Reset(reader io.Reader) {
	r.reader = reader
	r.rbuf.Reset(r.reader)
	r.wbuf.Reset(r.crc)

	r.offset = 0
	if seeker, ok := r.reader.(io.ReadSeeker); ok {
		r.offset, _ = seeker.Seek(0, io.SeekCurrent)
	}
}

func (r *scannerReader) Read(p []byte) (n int, err error) {
	n, err = r.rbuf.Read(p)

	r.offset += int64(n)
	if _, err := r.wbuf.Write(p[:n]); err != nil {
		return n, err
	}
	return
}

func (r *scannerReader) ReadByte() (b byte, err error) {
	b, err = r.rbuf.ReadByte()
	if err == nil {
		r.offset++
		return b, r.wbuf.WriteByte(b)
	}
	return
}

func (r *scannerReader) Flush() error {
	return r.wbuf.Flush()
}

// Seek seeks to a location. If the underlying reader is not an io.ReadSeeker,
// then only whence=io.SeekCurrent is supported, any other operation fails.
func (r *scannerReader) Seek(offset int64, whence int) (int64, error) {
	var err error

	if seeker, ok := r.reader.(io.ReadSeeker); !ok {
		if whence != io.SeekCurrent || offset != 0 {
			return -1, ErrSeekNotSupported
		}
	} else {
		if whence == io.SeekCurrent && offset == 0 {
			return r.offset, nil
		}

		r.offset, err = seeker.Seek(offset, whence)
		r.rbuf.Reset(r.reader)
	}

	return r.offset, err
}