aboutsummaryrefslogblamecommitdiffstats
path: root/plumbing/format/packfile/parser.go
blob: 460fc3f5acfec8ab2218f6d6078b5455f424468b (plain) (tree)






































































































































































































































































































































































                                                                                         
package packfile

import (
	"bytes"
	"errors"
	"io"

	"gopkg.in/src-d/go-git.v4/plumbing"
	"gopkg.in/src-d/go-git.v4/plumbing/cache"
)

// Observer interface is implemented by index encoders.
type Observer interface {
	// OnHeader is called when a new packfile is opened.
	OnHeader(count uint32) error
	// OnInflatedObjectHeader is called for each object header read.
	OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error
	// OnInflatedObjectContent is called for each decoded object.
	OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error
	// OnFooter is called when decoding is done.
	OnFooter(h plumbing.Hash) error
}

// Parser decodes a packfile and calls any observer associated to it. Is used
// to generate indexes.
type Parser struct {
	scanner    *Scanner
	count      uint32
	oi         []*objectInfo
	oiByHash   map[plumbing.Hash]*objectInfo
	oiByOffset map[int64]*objectInfo
	hashOffset map[plumbing.Hash]int64
	checksum   plumbing.Hash

	cache *cache.ObjectLRU

	ob []Observer
}

// NewParser creates a new Parser struct.
func NewParser(scanner *Scanner, ob ...Observer) *Parser {
	return &Parser{
		scanner: scanner,
		ob:      ob,
		count:   0,
		cache:   cache.NewObjectLRUDefault(),
	}
}

// Parse start decoding phase of the packfile.
func (p *Parser) Parse() (plumbing.Hash, error) {
	err := p.init()
	if err != nil {
		return plumbing.ZeroHash, err
	}

	err = p.firstPass()
	if err != nil {
		return plumbing.ZeroHash, err
	}

	err = p.resolveDeltas()
	if err != nil {
		return plumbing.ZeroHash, err
	}

	for _, o := range p.ob {
		err := o.OnFooter(p.checksum)
		if err != nil {
			return plumbing.ZeroHash, err
		}
	}

	return p.checksum, nil
}

func (p *Parser) init() error {
	_, c, err := p.scanner.Header()
	if err != nil {
		return err
	}

	for _, o := range p.ob {
		err := o.OnHeader(c)
		if err != nil {
			return err
		}
	}

	p.count = c
	p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count)
	p.oiByOffset = make(map[int64]*objectInfo, p.count)
	p.oi = make([]*objectInfo, p.count)

	return nil
}

func (p *Parser) firstPass() error {
	buf := new(bytes.Buffer)

	for i := uint32(0); i < p.count; i++ {
		buf.Truncate(0)

		oh, err := p.scanner.NextObjectHeader()
		if err != nil {
			return err
		}

		delta := false
		var ota *objectInfo
		switch t := oh.Type; t {
		case plumbing.OFSDeltaObject, plumbing.REFDeltaObject:
			delta = true

			var parent *objectInfo
			var ok bool

			if t == plumbing.OFSDeltaObject {
				parent, ok = p.oiByOffset[oh.OffsetReference]
			} else {
				parent, ok = p.oiByHash[oh.Reference]
			}

			if !ok {
				// TODO improve error
				return errors.New("Reference delta not found")
			}

			ota = newDeltaObject(oh.Offset, oh.Length, t, parent)

			parent.Children = append(parent.Children, ota)
		default:
			ota = newBaseObject(oh.Offset, oh.Length, t)
		}

		size, crc, err := p.scanner.NextObject(buf)
		if err != nil {
			return err
		}

		ota.Crc32 = crc
		ota.PackSize = size
		ota.Length = oh.Length

		if !delta {
			ota.Write(buf.Bytes())
			ota.SHA1 = ota.Sum()
		}

		p.oiByOffset[oh.Offset] = ota
		p.oiByHash[oh.Reference] = ota

		p.oi[i] = ota
	}

	checksum, err := p.scanner.Checksum()
	p.checksum = checksum

	if err == io.EOF {
		return nil
	}

	return err
}

func (p *Parser) resolveDeltas() error {
	for _, obj := range p.oi {
		for _, o := range p.ob {
			err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset)
			if err != nil {
				return err
			}

			err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32)
			if err != nil {
				return err
			}
		}

		if !obj.IsDelta() && len(obj.Children) > 0 {
			var err error
			base, err := p.get(obj)
			if err != nil {
				return err
			}

			for _, child := range obj.Children {
				_, err = p.resolveObject(child, base)
				if err != nil {
					return err
				}
			}
		}
	}

	return nil
}

func (p *Parser) get(o *objectInfo) ([]byte, error) {
	e, ok := p.cache.Get(o.SHA1)
	if ok {
		r, err := e.Reader()
		if err != nil {
			return nil, err
		}

		buf := make([]byte, e.Size())
		_, err = r.Read(buf)
		if err != nil {
			return nil, err
		}

		return buf, nil
	}

	// Read from disk
	if o.DiskType.IsDelta() {
		base, err := p.get(o.Parent)
		if err != nil {
			return nil, err
		}

		data, err := p.resolveObject(o, base)
		if err != nil {
			return nil, err
		}

		if len(o.Children) > 0 {
			m := &plumbing.MemoryObject{}
			m.Write(data)
			m.SetType(o.Type)
			m.SetSize(o.Size())
			p.cache.Put(m)
		}

		return data, nil
	}

	data, err := p.readData(o)
	if err != nil {
		return nil, err
	}

	if len(o.Children) > 0 {
		m := &plumbing.MemoryObject{}
		m.Write(data)
		m.SetType(o.Type)
		m.SetSize(o.Size())
		p.cache.Put(m)
	}

	return data, nil
}

func (p *Parser) resolveObject(
	o *objectInfo,
	base []byte) ([]byte, error) {

	if !o.DiskType.IsDelta() {
		return nil, nil
	}

	data, err := p.readData(o)
	if err != nil {
		return nil, err
	}

	data, err = applyPatchBase(o, data, base)
	if err != nil {
		return nil, err
	}

	return data, nil
}

func (p *Parser) readData(o *objectInfo) ([]byte, error) {
	buf := new(bytes.Buffer)

	// TODO: skip header. Header size can be calculated with the offset of the
	// next offset in the first pass.
	p.scanner.SeekFromStart(o.Offset)
	_, err := p.scanner.NextObjectHeader()
	if err != nil {
		return nil, err
	}

	buf.Truncate(0)

	_, _, err = p.scanner.NextObject(buf)
	if err != nil {
		return nil, err
	}

	return buf.Bytes(), nil
}

func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) {
	patched, err := PatchDelta(base, data)
	if err != nil {
		return nil, err
	}

	ota.Type = ota.Parent.Type
	hash := plumbing.ComputeHash(ota.Type, patched)

	ota.SHA1 = hash

	return patched, nil
}

type objectInfo struct {
	plumbing.Hasher

	Offset   int64
	Length   int64
	PackSize int64
	Type     plumbing.ObjectType
	DiskType plumbing.ObjectType

	Crc32 uint32

	Parent   *objectInfo
	Children []*objectInfo
	SHA1     plumbing.Hash
}

func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo {
	return newDeltaObject(offset, length, t, nil)
}

func newDeltaObject(
	offset, length int64,
	t plumbing.ObjectType,
	parent *objectInfo,
) *objectInfo {
	children := make([]*objectInfo, 0)

	obj := &objectInfo{
		Hasher:   plumbing.NewHasher(t, length),
		Offset:   offset,
		Length:   length,
		PackSize: 0,
		Type:     t,
		DiskType: t,
		Crc32:    0,
		Parent:   parent,
		Children: children,
	}

	return obj
}

func (o *objectInfo) IsDelta() bool {
	return o.Type.IsDelta()
}

func (o *objectInfo) Size() int64 {
	return o.Length
}