aboutsummaryrefslogblamecommitdiffstats
path: root/formats/packfile/reader.go
blob: 959e41199f273bf7b35b1685f1d4f499f7fe6bac (plain) (tree)
1
2
3
4
5
6
7
8
9



                
                         

             

                   
                                       

                                            

 

               











                                                                                               

                                        
 



                                   
 
 

                                                                       




                                                                                     
                              
 



                                                                                                                                          
 
                               

                                   

 


                                                             
                                                        
 
                                              
                                                      
         

 
                                                            
                                                            

                                                  
                                  
                                                     
                 
 
                              

         
                                     
                       
                              

         
                                       
                                                

         
                                   
                       
                              

         

                                                    

         
                                                 

 
                                         
                                    
                                                           


                          
                                                             
                                           




                  
                                              
                        
                                                                          





                             
                                                  



                                                                                      
                                         

                                            



                                                

                                                                            

                 
                            







                                  
                                                      

                        
 
                       
                                                   


                               
                                                 
                                  
                                                                


                                 
                                                           


                                       
                                                   
                                                               


                          


                         
                     
                           
                                 
                                         
                                 
                                                
                                                                                 
                                       
                
                                                            


                       

 

                                                      
                                                           

                          
 

                                              


                          
                                      
                

                                                     
 



                                                    

         



                                        


                  
                                                                   

                                               

                          

         

                                              


                          
                                          
                
                                                                        

         


                                                   
                           
                                                    

         


                                        



                  
                                                    
                                      

 

                                             
                       
                                          
                                             
                 
 
                                           
         
 

                        

                               

 
                         




                                           

 





                                                            

 





                                                                          
package packfile

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"io"
	"io/ioutil"

	"gopkg.in/src-d/go-git.v2/core"

	"github.com/klauspost/compress/zlib"
)

type Format int

var (
	EmptyRepositoryErr        = newError("empty repository")
	UnsupportedVersionErr     = newError("unsupported packfile version")
	MaxObjectsLimitReachedErr = newError("max. objects limit reached")
	MalformedPackfileErr      = newError("malformed pack file, does not start with 'PACK'")
	InvalidObjectErr          = newError("invalid git object")
	PatchingErr               = newError("patching error")
	PackEntryNotFoundErr      = newError("can't find a pack entry")
	ObjectNotFoundErr         = newError("can't find a object")
	ZLibErr                   = newError("zlib reading error")
)

const (
	DefaultMaxObjectsLimit = 1 << 20

	VersionSupported        = 2
	UnknownFormat    Format = 0
	OFSDeltaFormat   Format = 1
	REFDeltaFormat   Format = 2
)

// Reader reads a packfile from a binary string splitting it on objects
type Reader struct {
	// MaxObjectsLimit is the limit of objects to be load in the packfile, if
	// a packfile excess this number an error is throw, the default value
	// is defined by DefaultMaxObjectsLimit, usually the default limit is more
	// than enough to work with any repository, working extremly big repositories
	// where the number of object is bigger the memory can be exhausted.
	MaxObjectsLimit uint32

	// Format specifies if we are using ref-delta's or ofs-delta's, choosing the
	// correct format the memory usage is optimized
	// https://github.com/git/git/blob/8d530c4d64ffcc853889f7b385f554d53db375ed/Documentation/technical/protocol-capabilities.txt#L154
	Format Format

	r       *trackingReader
	s       core.ObjectStorage
	offsets map[int64]core.Hash
}

// NewReader returns a new Reader that reads from a io.Reader
func NewReader(r io.Reader) *Reader {
	return &Reader{
		MaxObjectsLimit: DefaultMaxObjectsLimit,

		r:       NewTrackingReader(r),
		offsets: make(map[int64]core.Hash, 0),
	}
}

// Read reads the objects and stores it at the ObjectStorage
func (r *Reader) Read(s core.ObjectStorage) (int64, error) {
	r.s = s
	if err := r.validateHeader(); err != nil {
		if err == io.EOF {
			return -1, EmptyRepositoryErr
		}

		return -1, err
	}

	version, err := r.readInt32()
	if err != nil {
		return -1, err
	}

	if version > VersionSupported {
		return -1, UnsupportedVersionErr
	}

	count, err := r.readInt32()
	if err != nil {
		return -1, err
	}

	if count > r.MaxObjectsLimit {
		return -1, MaxObjectsLimitReachedErr
	}

	return r.r.position, r.readObjects(count)
}

func (r *Reader) validateHeader() error {
	var header = make([]byte, 4)
	if _, err := io.ReadFull(r.r, header); err != nil {
		return err
	}

	if !bytes.Equal(header, []byte{'P', 'A', 'C', 'K'}) {
		return MalformedPackfileErr
	}

	return nil
}

func (r *Reader) readInt32() (uint32, error) {
	var value uint32
	if err := binary.Read(r.r, binary.BigEndian, &value); err != nil {
		return 0, err
	}

	return value, nil
}

func (r *Reader) readObjects(count uint32) error {
	// This code has 50-80 µs of overhead per object not counting zlib inflation.
	// Together with zlib inflation, it's 400-410 µs for small objects.
	// That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB,
	// of which 12-20 % is _not_ zlib inflation (ie. is our code).
	for i := 0; i < int(count); i++ {
		start := r.r.position
		obj, err := r.newRAWObject()
		if err != nil && err != io.EOF {
			return err
		}

		if r.Format == UnknownFormat || r.Format == OFSDeltaFormat {
			r.offsets[start] = obj.Hash()
		}

		r.s.Set(obj)
		if err == io.EOF {
			break
		}
	}

	return nil
}

func (r *Reader) newRAWObject() (core.Object, error) {
	raw := r.s.New()
	var steps int64

	var buf [1]byte
	if _, err := r.r.Read(buf[:]); err != nil {
		return nil, err
	}

	typ := core.ObjectType((buf[0] >> 4) & 7)
	size := int64(buf[0] & 15)
	steps++ // byte we just read to get `o.typ` and `o.size`

	var shift uint = 4
	for buf[0]&0x80 == 0x80 {
		if _, err := r.r.Read(buf[:]); err != nil {
			return nil, err
		}

		size += int64(buf[0]&0x7f) << shift
		steps++ // byte we just read to update `o.size`
		shift += 7
	}

	raw.SetType(typ)
	raw.SetSize(size)

	var err error
	switch raw.Type() {
	case core.REFDeltaObject:
		err = r.readREFDelta(raw)
	case core.OFSDeltaObject:
		err = r.readOFSDelta(raw, steps)
	case core.CommitObject, core.TreeObject, core.BlobObject, core.TagObject:
		err = r.readObject(raw)
	default:
		err = InvalidObjectErr.n("tag %q", raw.Type)
	}

	return raw, err
}

func (r *Reader) readREFDelta(raw core.Object) error {
	var ref core.Hash
	if _, err := io.ReadFull(r.r, ref[:]); err != nil {
		return err
	}

	buf := bytes.NewBuffer(nil)
	if err := r.inflate(buf); err != nil {
		return err
	}

	referenced, ok := r.s.Get(ref)
	if !ok {
		return ObjectNotFoundErr.n("%s", ref)
	}

	d, _ := ioutil.ReadAll(referenced.Reader())
	patched := patchDelta(d, buf.Bytes())
	if patched == nil {
		return PatchingErr.n("hash %q", ref)
	}

	raw.SetType(referenced.Type())
	raw.SetSize(int64(len(patched)))
	raw.Writer().Write(patched)

	return nil
}

func (r *Reader) readOFSDelta(raw core.Object, steps int64) error {
	start := r.r.position
	offset, err := decodeOffset(r.r, steps)
	if err != nil {
		return err
	}

	buf := bytes.NewBuffer(nil)
	if err := r.inflate(buf); err != nil {
		return err
	}

	ref, ok := r.offsets[start+offset]
	if !ok {
		return PackEntryNotFoundErr.n("offset %d", start+offset)
	}

	referenced, _ := r.s.Get(ref)
	d, _ := ioutil.ReadAll(referenced.Reader())
	patched := patchDelta(d, buf.Bytes())
	if patched == nil {
		return PatchingErr.n("hash %q", ref)
	}

	raw.SetType(referenced.Type())
	raw.SetSize(int64(len(patched)))
	raw.Writer().Write(patched)

	return nil
}

func (r *Reader) readObject(raw core.Object) error {
	return r.inflate(raw.Writer())
}

func (r *Reader) inflate(w io.Writer) error {
	zr, err := zlib.NewReader(r.r)
	if err != nil {
		if err == zlib.ErrHeader {
			return zlib.ErrHeader
		}

		return ZLibErr.n("%s", err)
	}

	defer zr.Close()

	_, err = io.Copy(w, zr)
	return err
}

type ReaderError struct {
	reason, additional string
}

func newError(reason string) *ReaderError {
	return &ReaderError{reason: reason}
}

func (e *ReaderError) Error() string {
	if e.additional == "" {
		return e.reason
	}

	return fmt.Sprintf("%s: %s", e.reason, e.additional)
}

func (e *ReaderError) n(format string, args ...interface{}) *ReaderError {
	return &ReaderError{
		reason:     e.reason,
		additional: fmt.Sprintf(format, args...),
	}
}