package packfile
import (
"bytes"
"errors"
"io"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/cache"
)
// Observer interface is implemented by index encoders.
type Observer interface {
// OnHeader is called when a new packfile is opened.
OnHeader(count uint32) error
// OnInflatedObjectHeader is called for each object header read.
OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error
// OnInflatedObjectContent is called for each decoded object.
OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error
// OnFooter is called when decoding is done.
OnFooter(h plumbing.Hash) error
}
// Parser decodes a packfile and calls any observer associated to it. Is used
// to generate indexes.
type Parser struct {
scanner *Scanner
count uint32
oi []*objectInfo
oiByHash map[plumbing.Hash]*objectInfo
oiByOffset map[int64]*objectInfo
hashOffset map[plumbing.Hash]int64
checksum plumbing.Hash
cache *cache.ObjectLRU
ob []Observer
}
// NewParser creates a new Parser struct.
func NewParser(scanner *Scanner, ob ...Observer) *Parser {
return &Parser{
scanner: scanner,
ob: ob,
count: 0,
cache: cache.NewObjectLRUDefault(),
}
}
// Parse start decoding phase of the packfile.
func (p *Parser) Parse() (plumbing.Hash, error) {
err := p.init()
if err != nil {
return plumbing.ZeroHash, err
}
err = p.firstPass()
if err != nil {
return plumbing.ZeroHash, err
}
err = p.resolveDeltas()
if err != nil {
return plumbing.ZeroHash, err
}
for _, o := range p.ob {
err := o.OnFooter(p.checksum)
if err != nil {
return plumbing.ZeroHash, err
}
}
return p.checksum, nil
}
func (p *Parser) init() error {
_, c, err := p.scanner.Header()
if err != nil {
return err
}
for _, o := range p.ob {
err := o.OnHeader(c)
if err != nil {
return err
}
}
p.count = c
p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count)
p.oiByOffset = make(map[int64]*objectInfo, p.count)
p.oi = make([]*objectInfo, p.count)
return nil
}
func (p *Parser) firstPass() error {
buf := new(bytes.Buffer)
for i := uint32(0); i < p.count; i++ {
buf.Truncate(0)
oh, err := p.scanner.NextObjectHeader()
if err != nil {
return err
}
delta := false
var ota *objectInfo
switch t := oh.Type; t {
case plumbing.OFSDeltaObject, plumbing.REFDeltaObject:
delta = true
var parent *objectInfo
var ok bool
if t == plumbing.OFSDeltaObject {
parent, ok = p.oiByOffset[oh.OffsetReference]
} else {
parent, ok = p.oiByHash[oh.Reference]
}
if !ok {
// TODO improve error
return errors.New("Reference delta not found")
}
ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
parent.Children = append(parent.Children, ota)
default:
ota = newBaseObject(oh.Offset, oh.Length, t)
}
size, crc, err := p.scanner.NextObject(buf)
if err != nil {
return err
}
ota.Crc32 = crc
ota.PackSize = size
ota.Length = oh.Length
if !delta {
ota.Write(buf.Bytes())
ota.SHA1 = ota.Sum()
}
p.oiByOffset[oh.Offset] = ota
p.oiByHash[oh.Reference] = ota
p.oi[i] = ota
}
checksum, err := p.scanner.Checksum()
p.checksum = checksum
if err == io.EOF {
return nil
}
return err
}
func (p *Parser) resolveDeltas() error {
for _, obj := range p.oi {
for _, o := range p.ob {
err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset)
if err != nil {
return err
}
err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32)
if err != nil {
return err
}
}
if !obj.IsDelta() && len(obj.Children) > 0 {
var err error
base, err := p.get(obj)
if err != nil {
return err
}
for _, child := range obj.Children {
_, err = p.resolveObject(child, base)
if err != nil {
return err
}
}
}
}
return nil
}
func (p *Parser) get(o *objectInfo) ([]byte, error) {
e, ok := p.cache.Get(o.SHA1)
if ok {
r, err := e.Reader()
if err != nil {
return nil, err
}
buf := make([]byte, e.Size())
_, err = r.Read(buf)
if err != nil {
return nil, err
}
return buf, nil
}
// Read from disk
if o.DiskType.IsDelta() {
base, err := p.get(o.Parent)
if err != nil {
return nil, err
}
data, err := p.resolveObject(o, base)
if err != nil {
return nil, err
}
if len(o.Children) > 0 {
m := &plumbing.MemoryObject{}
m.Write(data)
m.SetType(o.Type)
m.SetSize(o.Size())
p.cache.Put(m)
}
return data, nil
}
data, err := p.readData(o)
if err != nil {
return nil, err
}
if len(o.Children) > 0 {
m := &plumbing.MemoryObject{}
m.Write(data)
m.SetType(o.Type)
m.SetSize(o.Size())
p.cache.Put(m)
}
return data, nil
}
func (p *Parser) resolveObject(
o *objectInfo,
base []byte) ([]byte, error) {
if !o.DiskType.IsDelta() {
return nil, nil
}
data, err := p.readData(o)
if err != nil {
return nil, err
}
data, err = applyPatchBase(o, data, base)
if err != nil {
return nil, err
}
return data, nil
}
func (p *Parser) readData(o *objectInfo) ([]byte, error) {
buf := new(bytes.Buffer)
// TODO: skip header. Header size can be calculated with the offset of the
// next offset in the first pass.
p.scanner.SeekFromStart(o.Offset)
_, err := p.scanner.NextObjectHeader()
if err != nil {
return nil, err
}
buf.Truncate(0)
_, _, err = p.scanner.NextObject(buf)
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) {
patched, err := PatchDelta(base, data)
if err != nil {
return nil, err
}
ota.Type = ota.Parent.Type
hash := plumbing.ComputeHash(ota.Type, patched)
ota.SHA1 = hash
return patched, nil
}
type objectInfo struct {
plumbing.Hasher
Offset int64
Length int64
PackSize int64
Type plumbing.ObjectType
DiskType plumbing.ObjectType
Crc32 uint32
Parent *objectInfo
Children []*objectInfo
SHA1 plumbing.Hash
}
func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo {
return newDeltaObject(offset, length, t, nil)
}
func newDeltaObject(
offset, length int64,
t plumbing.ObjectType,
parent *objectInfo,
) *objectInfo {
children := make([]*objectInfo, 0)
obj := &objectInfo{
Hasher: plumbing.NewHasher(t, length),
Offset: offset,
Length: length,
PackSize: 0,
Type: t,
DiskType: t,
Crc32: 0,
Parent: parent,
Children: children,
}
return obj
}
func (o *objectInfo) IsDelta() bool {
return o.Type.IsDelta()
}
func (o *objectInfo) Size() int64 {
return o.Length
}