aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/packfile/parser.go
diff options
context:
space:
mode:
authorJavi Fontan <jfontan@gmail.com>2018-07-19 17:05:45 +0200
committerJavi Fontan <jfontan@gmail.com>2018-07-26 14:17:26 +0200
commitda5677f5ba3970d585d5955b15a6a1c3c262c07b (patch)
tree2c2679eb4e6881d33c1b9f47662bd23dc189d21d /plumbing/format/packfile/parser.go
parenta8ff3e599b3ee998a8b8626cd9fe9fa68490d354 (diff)
downloadgo-git-da5677f5ba3970d585d5955b15a6a1c3c262c07b.tar.gz
plumbing/packfile: add new packfile parser
Signed-off-by: Javi Fontan <jfontan@gmail.com>
Diffstat (limited to 'plumbing/format/packfile/parser.go')
-rw-r--r--plumbing/format/packfile/parser.go359
1 files changed, 359 insertions, 0 deletions
diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go
new file mode 100644
index 0000000..460fc3f
--- /dev/null
+++ b/plumbing/format/packfile/parser.go
@@ -0,0 +1,359 @@
+package packfile
+
+import (
+ "bytes"
+ "errors"
+ "io"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/cache"
+)
+
+// Observer interface is implemented by index encoders.
+type Observer interface {
+ // OnHeader is called when a new packfile is opened.
+ OnHeader(count uint32) error
+ // OnInflatedObjectHeader is called for each object header read.
+ OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error
+ // OnInflatedObjectContent is called for each decoded object.
+ OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error
+ // OnFooter is called when decoding is done.
+ OnFooter(h plumbing.Hash) error
+}
+
+// Parser decodes a packfile and calls any observer associated to it. Is used
+// to generate indexes.
+type Parser struct {
+ scanner *Scanner
+ count uint32
+ oi []*objectInfo
+ oiByHash map[plumbing.Hash]*objectInfo
+ oiByOffset map[int64]*objectInfo
+ hashOffset map[plumbing.Hash]int64
+ checksum plumbing.Hash
+
+ cache *cache.ObjectLRU
+
+ ob []Observer
+}
+
+// NewParser creates a new Parser struct.
+func NewParser(scanner *Scanner, ob ...Observer) *Parser {
+ return &Parser{
+ scanner: scanner,
+ ob: ob,
+ count: 0,
+ cache: cache.NewObjectLRUDefault(),
+ }
+}
+
+// Parse start decoding phase of the packfile.
+func (p *Parser) Parse() (plumbing.Hash, error) {
+ err := p.init()
+ if err != nil {
+ return plumbing.ZeroHash, err
+ }
+
+ err = p.firstPass()
+ if err != nil {
+ return plumbing.ZeroHash, err
+ }
+
+ err = p.resolveDeltas()
+ if err != nil {
+ return plumbing.ZeroHash, err
+ }
+
+ for _, o := range p.ob {
+ err := o.OnFooter(p.checksum)
+ if err != nil {
+ return plumbing.ZeroHash, err
+ }
+ }
+
+ return p.checksum, nil
+}
+
+func (p *Parser) init() error {
+ _, c, err := p.scanner.Header()
+ if err != nil {
+ return err
+ }
+
+ for _, o := range p.ob {
+ err := o.OnHeader(c)
+ if err != nil {
+ return err
+ }
+ }
+
+ p.count = c
+ p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count)
+ p.oiByOffset = make(map[int64]*objectInfo, p.count)
+ p.oi = make([]*objectInfo, p.count)
+
+ return nil
+}
+
+func (p *Parser) firstPass() error {
+ buf := new(bytes.Buffer)
+
+ for i := uint32(0); i < p.count; i++ {
+ buf.Truncate(0)
+
+ oh, err := p.scanner.NextObjectHeader()
+ if err != nil {
+ return err
+ }
+
+ delta := false
+ var ota *objectInfo
+ switch t := oh.Type; t {
+ case plumbing.OFSDeltaObject, plumbing.REFDeltaObject:
+ delta = true
+
+ var parent *objectInfo
+ var ok bool
+
+ if t == plumbing.OFSDeltaObject {
+ parent, ok = p.oiByOffset[oh.OffsetReference]
+ } else {
+ parent, ok = p.oiByHash[oh.Reference]
+ }
+
+ if !ok {
+ // TODO improve error
+ return errors.New("Reference delta not found")
+ }
+
+ ota = newDeltaObject(oh.Offset, oh.Length, t, parent)
+
+ parent.Children = append(parent.Children, ota)
+ default:
+ ota = newBaseObject(oh.Offset, oh.Length, t)
+ }
+
+ size, crc, err := p.scanner.NextObject(buf)
+ if err != nil {
+ return err
+ }
+
+ ota.Crc32 = crc
+ ota.PackSize = size
+ ota.Length = oh.Length
+
+ if !delta {
+ ota.Write(buf.Bytes())
+ ota.SHA1 = ota.Sum()
+ }
+
+ p.oiByOffset[oh.Offset] = ota
+ p.oiByHash[oh.Reference] = ota
+
+ p.oi[i] = ota
+ }
+
+ checksum, err := p.scanner.Checksum()
+ p.checksum = checksum
+
+ if err == io.EOF {
+ return nil
+ }
+
+ return err
+}
+
+func (p *Parser) resolveDeltas() error {
+ for _, obj := range p.oi {
+ for _, o := range p.ob {
+ err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset)
+ if err != nil {
+ return err
+ }
+
+ err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32)
+ if err != nil {
+ return err
+ }
+ }
+
+ if !obj.IsDelta() && len(obj.Children) > 0 {
+ var err error
+ base, err := p.get(obj)
+ if err != nil {
+ return err
+ }
+
+ for _, child := range obj.Children {
+ _, err = p.resolveObject(child, base)
+ if err != nil {
+ return err
+ }
+ }
+ }
+ }
+
+ return nil
+}
+
+func (p *Parser) get(o *objectInfo) ([]byte, error) {
+ e, ok := p.cache.Get(o.SHA1)
+ if ok {
+ r, err := e.Reader()
+ if err != nil {
+ return nil, err
+ }
+
+ buf := make([]byte, e.Size())
+ _, err = r.Read(buf)
+ if err != nil {
+ return nil, err
+ }
+
+ return buf, nil
+ }
+
+ // Read from disk
+ if o.DiskType.IsDelta() {
+ base, err := p.get(o.Parent)
+ if err != nil {
+ return nil, err
+ }
+
+ data, err := p.resolveObject(o, base)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(o.Children) > 0 {
+ m := &plumbing.MemoryObject{}
+ m.Write(data)
+ m.SetType(o.Type)
+ m.SetSize(o.Size())
+ p.cache.Put(m)
+ }
+
+ return data, nil
+ }
+
+ data, err := p.readData(o)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(o.Children) > 0 {
+ m := &plumbing.MemoryObject{}
+ m.Write(data)
+ m.SetType(o.Type)
+ m.SetSize(o.Size())
+ p.cache.Put(m)
+ }
+
+ return data, nil
+}
+
+func (p *Parser) resolveObject(
+ o *objectInfo,
+ base []byte) ([]byte, error) {
+
+ if !o.DiskType.IsDelta() {
+ return nil, nil
+ }
+
+ data, err := p.readData(o)
+ if err != nil {
+ return nil, err
+ }
+
+ data, err = applyPatchBase(o, data, base)
+ if err != nil {
+ return nil, err
+ }
+
+ return data, nil
+}
+
+func (p *Parser) readData(o *objectInfo) ([]byte, error) {
+ buf := new(bytes.Buffer)
+
+ // TODO: skip header. Header size can be calculated with the offset of the
+ // next offset in the first pass.
+ p.scanner.SeekFromStart(o.Offset)
+ _, err := p.scanner.NextObjectHeader()
+ if err != nil {
+ return nil, err
+ }
+
+ buf.Truncate(0)
+
+ _, _, err = p.scanner.NextObject(buf)
+ if err != nil {
+ return nil, err
+ }
+
+ return buf.Bytes(), nil
+}
+
+func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) {
+ patched, err := PatchDelta(base, data)
+ if err != nil {
+ return nil, err
+ }
+
+ ota.Type = ota.Parent.Type
+ hash := plumbing.ComputeHash(ota.Type, patched)
+
+ ota.SHA1 = hash
+
+ return patched, nil
+}
+
+type objectInfo struct {
+ plumbing.Hasher
+
+ Offset int64
+ Length int64
+ PackSize int64
+ Type plumbing.ObjectType
+ DiskType plumbing.ObjectType
+
+ Crc32 uint32
+
+ Parent *objectInfo
+ Children []*objectInfo
+ SHA1 plumbing.Hash
+}
+
+func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo {
+ return newDeltaObject(offset, length, t, nil)
+}
+
+func newDeltaObject(
+ offset, length int64,
+ t plumbing.ObjectType,
+ parent *objectInfo,
+) *objectInfo {
+ children := make([]*objectInfo, 0)
+
+ obj := &objectInfo{
+ Hasher: plumbing.NewHasher(t, length),
+ Offset: offset,
+ Length: length,
+ PackSize: 0,
+ Type: t,
+ DiskType: t,
+ Crc32: 0,
+ Parent: parent,
+ Children: children,
+ }
+
+ return obj
+}
+
+func (o *objectInfo) IsDelta() bool {
+ return o.Type.IsDelta()
+}
+
+func (o *objectInfo) Size() int64 {
+ return o.Length
+}