aboutsummaryrefslogblamecommitdiffstats
path: root/plumbing/format/packfile/parser.go
blob: 62f1d13cb8e4a67213849a67d8ba3209aabac05b (plain) (tree)
1
2
3
4
5
6
7
8




                
             

            


                                                     
                                                  
                                                

 
     


                                                                                






                                                                                                       

 






                                                                                     
                                                                                             






                                                                             





                                                
                                
 
                              

                                                                       



                     

















                                                                            
                                
                                               

         
                       





                                                     







                                                                  
         

































                                                                      



                                                 
                                        


                                             






                                                


                                             
                                                 


                                             

                                                      










                                       

                                             









                                                                 













                                                                          
                                       

                                      

                                              







                                                       
                                             

                                    
                                                                      
                                
                                                                 


                                                                             
                                                                      

                                             
                                                              









                                                                                                                 
                         


                                                                             



                                                                    




































                                                                              



                                  







                                                                                  
                               


                                      
                                            
 








                                                                                               
                                       
                                                  

                 
                                                   
                                           





                                                                     

         
                  


                                        


                                      
                                  
                           
                                         
                                      



                                  


                                                                                                  
 
                                                                                                       
                                  


                                                            


                                                                           
                                                            



                                                                                                          

                                                  
                                                           
                         




                                                                            





                  









                                                                 
                                                                    
                                                                       




                                              

         
                                                                                   
                                                                  
                                                  

                                                                            
                               
                                  
                 
                                 
 

                                   
                               
                                  

                 

                                                

                                                                  

         

                                                                   
                                                

         
                                 

                                            
                                       
                                         
                               
                                  

                 
                                                                         
                               
                                  
                 
                
                                         
                               
                                  
                 

         






                                                                
                                

                                               
                                           
         
                  

 














                                                                      
                               
                    
                      
                         
         
                                  
                          
         

                                      
                                 
                       
                          

         



























                                                                
                       
                          

         
                       
                                   
                                                                                                    
                                                                          
                                  

                 
                  

 
                                                             


                                                          
                                                
                 

                                       
         
 
                                                                       
                          
         
 

                                                             
         
                  

 





                                                                                                                        

         
                       
                                          






                                                                    
 



                                          
         
 
                  

 







                                                                         
 
                        




                                                                                       
















                                                                             
                           

                                 



                                 











                                     
package packfile

import (
	"bytes"
	"errors"
	"fmt"
	"io"

	"github.com/go-git/go-git/v5/plumbing"
	"github.com/go-git/go-git/v5/plumbing/cache"
	"github.com/go-git/go-git/v5/plumbing/storer"
	"github.com/go-git/go-git/v5/utils/ioutil"
	"github.com/go-git/go-git/v5/utils/sync"
)

var (
	// ErrReferenceDeltaNotFound is returned when the reference delta is not
	// found.
	ErrReferenceDeltaNotFound = errors.New("reference delta not found")

	// ErrNotSeekableSource is returned when the source for the parser is not
	// seekable and a storage was not provided, so it can't be parsed.
	ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided")

	// ErrDeltaNotCached is returned when the delta could not be found in cache.
	ErrDeltaNotCached = errors.New("delta could not be found in cache")
)

// Observer interface is implemented by index encoders.
type Observer interface {
	// OnHeader is called when a new packfile is opened.
	OnHeader(count uint32) error
	// OnInflatedObjectHeader is called for each object header read.
	OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error
	// OnInflatedObjectContent is called for each decoded object.
	OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, content []byte) error
	// OnFooter is called when decoding is done.
	OnFooter(h plumbing.Hash) error
}

// Parser decodes a packfile and calls any observer associated to it. Is used
// to generate indexes.
type Parser struct {
	storage    storer.EncodedObjectStorer
	scanner    *Scanner
	count      uint32
	oi         []*objectInfo
	oiByHash   map[plumbing.Hash]*objectInfo
	oiByOffset map[int64]*objectInfo
	checksum   plumbing.Hash

	cache *cache.BufferLRU
	// delta content by offset, only used if source is not seekable
	deltas map[int64][]byte

	ob []Observer
}

// NewParser creates a new Parser. The Scanner source must be seekable.
// If it's not, NewParserWithStorage should be used instead.
func NewParser(scanner *Scanner, ob ...Observer) (*Parser, error) {
	return NewParserWithStorage(scanner, nil, ob...)
}

// NewParserWithStorage creates a new Parser. The scanner source must either
// be seekable or a storage must be provided.
func NewParserWithStorage(
	scanner *Scanner,
	storage storer.EncodedObjectStorer,
	ob ...Observer,
) (*Parser, error) {
	if !scanner.IsSeekable && storage == nil {
		return nil, ErrNotSeekableSource
	}

	var deltas map[int64][]byte
	if !scanner.IsSeekable {
		deltas = make(map[int64][]byte)
	}

	return &Parser{
		storage: storage,
		scanner: scanner,
		ob:      ob,
		count:   0,
		cache:   cache.NewBufferLRUDefault(),
		deltas:  deltas,
	}, nil
}

func (p *Parser) forEachObserver(f func(o Observer) error) error {
	for _, o := range p.ob {
		if err := f(o); err != nil {
			return err
		}
	}
	return nil
}

func (p *Parser) onHeader(count uint32) error {
	return p.forEachObserver(func(o Observer) error {
		return o.OnHeader(count)
	})
}

func (p *Parser) onInflatedObjectHeader(
	t plumbing.ObjectType,
	objSize int64,
	pos int64,
) error {
	return p.forEachObserver(func(o Observer) error {
		return o.OnInflatedObjectHeader(t, objSize, pos)
	})
}

func (p *Parser) onInflatedObjectContent(
	h plumbing.Hash,
	pos int64,
	crc uint32,
	content []byte,
) error {
	return p.forEachObserver(func(o Observer) error {
		return o.OnInflatedObjectContent(h, pos, crc, content)
	})
}

func (p *Parser) onFooter(h plumbing.Hash) error {
	return p.forEachObserver(func(o Observer) error {
		return o.OnFooter(h)
	})
}

// Parse start decoding phase of the packfile.
func (p *Parser) Parse() (plumbing.Hash, error) {
	if err := p.init(); err != nil {
		return plumbing.ZeroHash, err
	}

	if err := p.indexObjects(); err != nil {
		return plumbing.ZeroHash, err
	}

	var err error
	p.checksum, err = p.scanner.Checksum()
	if err != nil && err != io.EOF {
		return plumbing.ZeroHash, err
	}

	if err := p.resolveDeltas(); err != nil {
		return plumbing.ZeroHash, err
	}

	if err := p.onFooter(p.checksum); err != nil {
		return plumbing.ZeroHash, err
	}

	return p.checksum, nil
}

func (p *Parser) init() error {
	_, c, err := p.scanner.Header()
	if err != nil {
		return err
	}

	if err := p.onHeader(c); err != nil {
		return err
	}

	p.count = c
	p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count)
	p.oiByOffset = make(map[int64]*objectInfo, p.count)
	p.oi = make([]*objectInfo, p.count)

	return nil
}

type objectHeaderWriter func(typ plumbing.ObjectType, sz int64) error

type lazyObjectWriter interface {
	// LazyWriter enables an object to be lazily written.
	// It returns:
	// - w: a writer to receive the object's content.
	// - lwh: a func to write the object header.
	// - err: any error from the initial writer creation process.
	//
	// Note that if the object header is not written BEFORE the writer
	// is used, this will result in an invalid object.
	LazyWriter() (w io.WriteCloser, lwh objectHeaderWriter, err error)
}

func (p *Parser) indexObjects() error {
	buf := sync.GetBytesBuffer()
	defer sync.PutBytesBuffer(buf)

	for i := uint32(0); i < p.count; i++ {
		oh, err := p.scanner.NextObjectHeader()
		if err != nil {
			return err
		}

		delta := false
		var ota *objectInfo
		switch t := oh.Type; t {
		case plumbing.OFSDeltaObject:
			delta = true

			parent, ok := p.oiByOffset[oh.OffsetReference]
			if !ok {
				return plumbing.ErrObjectNotFound
			}

			ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
			parent.Children = append(parent.Children, ota)
		case plumbing.REFDeltaObject:
			delta = true
			parent, ok := p.oiByHash[oh.Reference]
			if !ok {
				// can't find referenced object in this pack file
				// this must be a "thin" pack.
				parent = &objectInfo{ //Placeholder parent
					SHA1:        oh.Reference,
					ExternalRef: true, // mark as an external reference that must be resolved
					Type:        plumbing.AnyObject,
					DiskType:    plumbing.AnyObject,
				}
				p.oiByHash[oh.Reference] = parent
			}
			ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
			parent.Children = append(parent.Children, ota)

		default:
			ota = newBaseObject(oh.Offset, oh.Length, t)
		}

		hasher := plumbing.NewHasher(oh.Type, oh.Length)
		writers := []io.Writer{hasher}
		var obj *plumbing.MemoryObject

		// Lazy writing is only available for non-delta objects.
		if p.storage != nil && !delta {
			// When a storage is set and supports lazy writing,
			// use that instead of creating a memory object.
			if low, ok := p.storage.(lazyObjectWriter); ok {
				ow, lwh, err := low.LazyWriter()
				if err != nil {
					return err
				}

				if err = lwh(oh.Type, oh.Length); err != nil {
					return err
				}

				defer ow.Close()
				writers = append(writers, ow)
			} else {
				obj = new(plumbing.MemoryObject)
				obj.SetSize(oh.Length)
				obj.SetType(oh.Type)

				writers = append(writers, obj)
			}
		}
		if delta && !p.scanner.IsSeekable {
			buf.Reset()
			buf.Grow(int(oh.Length))
			writers = append(writers, buf)
		}

		mw := io.MultiWriter(writers...)

		_, crc, err := p.scanner.NextObject(mw)
		if err != nil {
			return err
		}

		// Non delta objects needs to be added into the storage. This
		// is only required when lazy writing is not supported.
		if obj != nil {
			if _, err := p.storage.SetEncodedObject(obj); err != nil {
				return err
			}
		}

		ota.Crc32 = crc
		ota.Length = oh.Length

		if !delta {
			sha1 := hasher.Sum()

			// Move children of placeholder parent into actual parent, in case this
			// was a non-external delta reference.
			if placeholder, ok := p.oiByHash[sha1]; ok {
				ota.Children = placeholder.Children
				for _, c := range ota.Children {
					c.Parent = ota
				}
			}

			ota.SHA1 = sha1
			p.oiByHash[ota.SHA1] = ota
		}

		if delta && !p.scanner.IsSeekable {
			data := buf.Bytes()
			p.deltas[oh.Offset] = make([]byte, len(data))
			copy(p.deltas[oh.Offset], data)
		}

		p.oiByOffset[oh.Offset] = ota
		p.oi[i] = ota
	}

	return nil
}

func (p *Parser) resolveDeltas() error {
	buf := sync.GetBytesBuffer()
	defer sync.PutBytesBuffer(buf)

	for _, obj := range p.oi {
		buf.Reset()
		buf.Grow(int(obj.Length))
		err := p.get(obj, buf)
		if err != nil {
			return err
		}

		if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil {
			return err
		}

		if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, nil); err != nil {
			return err
		}

		if !obj.IsDelta() && len(obj.Children) > 0 {
			// Dealing with an io.ReaderAt object, means we can
			// create it once and reuse across all children.
			r := bytes.NewReader(buf.Bytes())
			for _, child := range obj.Children {
				// Even though we are discarding the output, we still need to read it to
				// so that the scanner can advance to the next object, and the SHA1 can be
				// calculated.
				if err := p.resolveObject(io.Discard, child, r); err != nil {
					return err
				}
				p.resolveExternalRef(child)
			}

			// Remove the delta from the cache.
			if obj.DiskType.IsDelta() && !p.scanner.IsSeekable {
				delete(p.deltas, obj.Offset)
			}
		}
	}

	return nil
}

func (p *Parser) resolveExternalRef(o *objectInfo) {
	if ref, ok := p.oiByHash[o.SHA1]; ok && ref.ExternalRef {
		p.oiByHash[o.SHA1] = o
		o.Children = ref.Children
		for _, c := range o.Children {
			c.Parent = o
		}
	}
}

func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) {
	if !o.ExternalRef { // skip cache check for placeholder parents
		b, ok := p.cache.Get(o.Offset)
		if ok {
			_, err := buf.Write(b)
			return err
		}
	}

	// If it's not on the cache and is not a delta we can try to find it in the
	// storage, if there's one. External refs must enter here.
	if p.storage != nil && !o.Type.IsDelta() {
		var e plumbing.EncodedObject
		e, err = p.storage.EncodedObject(plumbing.AnyObject, o.SHA1)
		if err != nil {
			return err
		}
		o.Type = e.Type()

		var r io.ReadCloser
		r, err = e.Reader()
		if err != nil {
			return err
		}

		defer ioutil.CheckClose(r, &err)

		_, err = buf.ReadFrom(io.LimitReader(r, e.Size()))
		return err
	}

	if o.ExternalRef {
		// we were not able to resolve a ref in a thin pack
		return ErrReferenceDeltaNotFound
	}

	if o.DiskType.IsDelta() {
		b := sync.GetBytesBuffer()
		defer sync.PutBytesBuffer(b)
		buf.Grow(int(o.Length))
		err := p.get(o.Parent, b)
		if err != nil {
			return err
		}

		err = p.resolveObject(buf, o, bytes.NewReader(b.Bytes()))
		if err != nil {
			return err
		}
	} else {
		err := p.readData(buf, o)
		if err != nil {
			return err
		}
	}

	// If the scanner is seekable, caching this data into
	// memory by offset seems wasteful.
	// There is a trade-off to be considered here in terms
	// of execution time vs memory consumption.
	//
	// TODO: improve seekable execution time, so that we can
	// skip this cache.
	if len(o.Children) > 0 {
		data := make([]byte, buf.Len())
		copy(data, buf.Bytes())
		p.cache.Put(o.Offset, data)
	}
	return nil
}

// resolveObject resolves an object from base, using information
// provided by o.
//
// This call has the side-effect of changing field values
// from the object info o:
//   - Type: OFSDeltaObject may become the target type (e.g. Blob).
//   - Size: The size may be update with the target size.
//   - Hash: Zero hashes will be calculated as part of the object
//     resolution. Hence why this process can't be avoided even when w
//     is an io.Discard.
//
// base must be an io.ReaderAt, which is a requirement from
// patchDeltaStream. The main reason being that reversing an
// delta object may lead to going backs and forths within base,
// which is not supported by io.Reader.
func (p *Parser) resolveObject(
	w io.Writer,
	o *objectInfo,
	base io.ReaderAt,
) error {
	if !o.DiskType.IsDelta() {
		return nil
	}
	buf := sync.GetBytesBuffer()
	defer sync.PutBytesBuffer(buf)
	err := p.readData(buf, o)
	if err != nil {
		return err
	}

	writers := []io.Writer{w}
	var obj *plumbing.MemoryObject
	var lwh objectHeaderWriter

	if p.storage != nil {
		if low, ok := p.storage.(lazyObjectWriter); ok {
			ow, wh, err := low.LazyWriter()
			if err != nil {
				return err
			}
			lwh = wh

			defer ow.Close()
			writers = append(writers, ow)
		} else {
			obj = new(plumbing.MemoryObject)
			ow, err := obj.Writer()
			if err != nil {
				return err
			}

			writers = append(writers, ow)
		}
	}

	mw := io.MultiWriter(writers...)

	err = applyPatchBase(o, base, buf, mw, lwh)
	if err != nil {
		return err
	}

	if obj != nil {
		obj.SetType(o.Type)
		obj.SetSize(o.Size()) // Size here is correct as it was populated by applyPatchBase.
		if _, err := p.storage.SetEncodedObject(obj); err != nil {
			return err
		}
	}
	return err
}

func (p *Parser) readData(w io.Writer, o *objectInfo) error {
	if !p.scanner.IsSeekable && o.DiskType.IsDelta() {
		data, ok := p.deltas[o.Offset]
		if !ok {
			return ErrDeltaNotCached
		}
		_, err := w.Write(data)
		return err
	}

	if _, err := p.scanner.SeekObjectHeader(o.Offset); err != nil {
		return err
	}

	if _, _, err := p.scanner.NextObject(w); err != nil {
		return err
	}
	return nil
}

// applyPatchBase applies the patch to target.
//
// Note that ota will be updated based on the description in resolveObject.
func applyPatchBase(ota *objectInfo, base io.ReaderAt, delta io.Reader, target io.Writer, wh objectHeaderWriter) error {
	if target == nil {
		return fmt.Errorf("cannot apply patch against nil target")
	}

	typ := ota.Type
	if ota.SHA1 == plumbing.ZeroHash {
		typ = ota.Parent.Type
	}

	sz, h, err := patchDeltaWriter(target, base, delta, typ, wh)
	if err != nil {
		return err
	}

	if ota.SHA1 == plumbing.ZeroHash {
		ota.Type = typ
		ota.Length = int64(sz)
		ota.SHA1 = h
	}

	return nil
}

func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) {
	hasher := plumbing.NewHasher(t, int64(len(data)))
	if _, err := hasher.Write(data); err != nil {
		return plumbing.ZeroHash, err
	}

	return hasher.Sum(), nil
}

type objectInfo struct {
	Offset      int64
	Length      int64
	Type        plumbing.ObjectType
	DiskType    plumbing.ObjectType
	ExternalRef bool // indicates this is an external reference in a thin pack file

	Crc32 uint32

	Parent   *objectInfo
	Children []*objectInfo
	SHA1     plumbing.Hash
}

func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo {
	return newDeltaObject(offset, length, t, nil)
}

func newDeltaObject(
	offset, length int64,
	t plumbing.ObjectType,
	parent *objectInfo,
) *objectInfo {
	obj := &objectInfo{
		Offset:   offset,
		Length:   length,
		Type:     t,
		DiskType: t,
		Crc32:    0,
		Parent:   parent,
	}

	return obj
}

func (o *objectInfo) IsDelta() bool {
	return o.Type.IsDelta()
}

func (o *objectInfo) Size() int64 {
	return o.Length
}