From da5677f5ba3970d585d5955b15a6a1c3c262c07b Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 19 Jul 2018 17:05:45 +0200 Subject: plumbing/packfile: add new packfile parser Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 359 +++++++++++++++++++++++++++++++++++++ 1 file changed, 359 insertions(+) create mode 100644 plumbing/format/packfile/parser.go (limited to 'plumbing/format/packfile/parser.go') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go new file mode 100644 index 0000000..460fc3f --- /dev/null +++ b/plumbing/format/packfile/parser.go @@ -0,0 +1,359 @@ +package packfile + +import ( + "bytes" + "errors" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" +) + +// Observer interface is implemented by index encoders. +type Observer interface { + // OnHeader is called when a new packfile is opened. + OnHeader(count uint32) error + // OnInflatedObjectHeader is called for each object header read. + OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error + // OnInflatedObjectContent is called for each decoded object. + OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error + // OnFooter is called when decoding is done. + OnFooter(h plumbing.Hash) error +} + +// Parser decodes a packfile and calls any observer associated to it. Is used +// to generate indexes. +type Parser struct { + scanner *Scanner + count uint32 + oi []*objectInfo + oiByHash map[plumbing.Hash]*objectInfo + oiByOffset map[int64]*objectInfo + hashOffset map[plumbing.Hash]int64 + checksum plumbing.Hash + + cache *cache.ObjectLRU + + ob []Observer +} + +// NewParser creates a new Parser struct. +func NewParser(scanner *Scanner, ob ...Observer) *Parser { + return &Parser{ + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewObjectLRUDefault(), + } +} + +// Parse start decoding phase of the packfile. +func (p *Parser) Parse() (plumbing.Hash, error) { + err := p.init() + if err != nil { + return plumbing.ZeroHash, err + } + + err = p.firstPass() + if err != nil { + return plumbing.ZeroHash, err + } + + err = p.resolveDeltas() + if err != nil { + return plumbing.ZeroHash, err + } + + for _, o := range p.ob { + err := o.OnFooter(p.checksum) + if err != nil { + return plumbing.ZeroHash, err + } + } + + return p.checksum, nil +} + +func (p *Parser) init() error { + _, c, err := p.scanner.Header() + if err != nil { + return err + } + + for _, o := range p.ob { + err := o.OnHeader(c) + if err != nil { + return err + } + } + + p.count = c + p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count) + p.oiByOffset = make(map[int64]*objectInfo, p.count) + p.oi = make([]*objectInfo, p.count) + + return nil +} + +func (p *Parser) firstPass() error { + buf := new(bytes.Buffer) + + for i := uint32(0); i < p.count; i++ { + buf.Truncate(0) + + oh, err := p.scanner.NextObjectHeader() + if err != nil { + return err + } + + delta := false + var ota *objectInfo + switch t := oh.Type; t { + case plumbing.OFSDeltaObject, plumbing.REFDeltaObject: + delta = true + + var parent *objectInfo + var ok bool + + if t == plumbing.OFSDeltaObject { + parent, ok = p.oiByOffset[oh.OffsetReference] + } else { + parent, ok = p.oiByHash[oh.Reference] + } + + if !ok { + // TODO improve error + return errors.New("Reference delta not found") + } + + ota = newDeltaObject(oh.Offset, oh.Length, t, parent) + + parent.Children = append(parent.Children, ota) + default: + ota = newBaseObject(oh.Offset, oh.Length, t) + } + + size, crc, err := p.scanner.NextObject(buf) + if err != nil { + return err + } + + ota.Crc32 = crc + ota.PackSize = size + ota.Length = oh.Length + + if !delta { + ota.Write(buf.Bytes()) + ota.SHA1 = ota.Sum() + } + + p.oiByOffset[oh.Offset] = ota + p.oiByHash[oh.Reference] = ota + + p.oi[i] = ota + } + + checksum, err := p.scanner.Checksum() + p.checksum = checksum + + if err == io.EOF { + return nil + } + + return err +} + +func (p *Parser) resolveDeltas() error { + for _, obj := range p.oi { + for _, o := range p.ob { + err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset) + if err != nil { + return err + } + + err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32) + if err != nil { + return err + } + } + + if !obj.IsDelta() && len(obj.Children) > 0 { + var err error + base, err := p.get(obj) + if err != nil { + return err + } + + for _, child := range obj.Children { + _, err = p.resolveObject(child, base) + if err != nil { + return err + } + } + } + } + + return nil +} + +func (p *Parser) get(o *objectInfo) ([]byte, error) { + e, ok := p.cache.Get(o.SHA1) + if ok { + r, err := e.Reader() + if err != nil { + return nil, err + } + + buf := make([]byte, e.Size()) + _, err = r.Read(buf) + if err != nil { + return nil, err + } + + return buf, nil + } + + // Read from disk + if o.DiskType.IsDelta() { + base, err := p.get(o.Parent) + if err != nil { + return nil, err + } + + data, err := p.resolveObject(o, base) + if err != nil { + return nil, err + } + + if len(o.Children) > 0 { + m := &plumbing.MemoryObject{} + m.Write(data) + m.SetType(o.Type) + m.SetSize(o.Size()) + p.cache.Put(m) + } + + return data, nil + } + + data, err := p.readData(o) + if err != nil { + return nil, err + } + + if len(o.Children) > 0 { + m := &plumbing.MemoryObject{} + m.Write(data) + m.SetType(o.Type) + m.SetSize(o.Size()) + p.cache.Put(m) + } + + return data, nil +} + +func (p *Parser) resolveObject( + o *objectInfo, + base []byte) ([]byte, error) { + + if !o.DiskType.IsDelta() { + return nil, nil + } + + data, err := p.readData(o) + if err != nil { + return nil, err + } + + data, err = applyPatchBase(o, data, base) + if err != nil { + return nil, err + } + + return data, nil +} + +func (p *Parser) readData(o *objectInfo) ([]byte, error) { + buf := new(bytes.Buffer) + + // TODO: skip header. Header size can be calculated with the offset of the + // next offset in the first pass. + p.scanner.SeekFromStart(o.Offset) + _, err := p.scanner.NextObjectHeader() + if err != nil { + return nil, err + } + + buf.Truncate(0) + + _, _, err = p.scanner.NextObject(buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { + patched, err := PatchDelta(base, data) + if err != nil { + return nil, err + } + + ota.Type = ota.Parent.Type + hash := plumbing.ComputeHash(ota.Type, patched) + + ota.SHA1 = hash + + return patched, nil +} + +type objectInfo struct { + plumbing.Hasher + + Offset int64 + Length int64 + PackSize int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType + + Crc32 uint32 + + Parent *objectInfo + Children []*objectInfo + SHA1 plumbing.Hash +} + +func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { + return newDeltaObject(offset, length, t, nil) +} + +func newDeltaObject( + offset, length int64, + t plumbing.ObjectType, + parent *objectInfo, +) *objectInfo { + children := make([]*objectInfo, 0) + + obj := &objectInfo{ + Hasher: plumbing.NewHasher(t, length), + Offset: offset, + Length: length, + PackSize: 0, + Type: t, + DiskType: t, + Crc32: 0, + Parent: parent, + Children: children, + } + + return obj +} + +func (o *objectInfo) IsDelta() bool { + return o.Type.IsDelta() +} + +func (o *objectInfo) Size() int64 { + return o.Length +} -- cgit From 79f249465b24104b73c9dc220d9098cecdab4d77 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 26 Jul 2018 13:42:51 +0200 Subject: plumbing, storage: integrate new index Now dotgit.PackWriter uses the new packfile.Parser and index. Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'plumbing/format/packfile/parser.go') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 460fc3f..696f5ba 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -311,11 +311,12 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { type objectInfo struct { plumbing.Hasher - Offset int64 - Length int64 - PackSize int64 - Type plumbing.ObjectType - DiskType plumbing.ObjectType + Offset int64 + Length int64 + HeaderLength int64 + PackSize int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType Crc32 uint32 -- cgit From 6a24b4c1f0cb9e5daf30fa7979f2643a967af1ad Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Tue, 7 Aug 2018 18:41:19 +0200 Subject: *: use parser to populate non writable storages and bug fixes Signed-off-by: Miguel Molina --- plumbing/format/packfile/parser.go | 130 +++++++++++++++++++++++++------------ 1 file changed, 87 insertions(+), 43 deletions(-) (limited to 'plumbing/format/packfile/parser.go') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 696f5ba..f0a7674 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -9,6 +9,16 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing/cache" ) +var ( + // ErrObjectContentAlreadyRead is returned when the content of the object + // was already read, since the content can only be read once. + ErrObjectContentAlreadyRead = errors.New("object content was already read") + + // ErrReferenceDeltaNotFound is returned when the reference delta is not + // found. + ErrReferenceDeltaNotFound = errors.New("reference delta not found") +) + // Observer interface is implemented by index encoders. type Observer interface { // OnHeader is called when a new packfile is opened. @@ -16,7 +26,7 @@ type Observer interface { // OnInflatedObjectHeader is called for each object header read. OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error // OnInflatedObjectContent is called for each decoded object. - OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error + OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, content []byte) error // OnFooter is called when decoding is done. OnFooter(h plumbing.Hash) error } @@ -32,41 +42,44 @@ type Parser struct { hashOffset map[plumbing.Hash]int64 checksum plumbing.Hash - cache *cache.ObjectLRU + cache *cache.ObjectLRU + contentCache map[int64][]byte ob []Observer } // NewParser creates a new Parser struct. func NewParser(scanner *Scanner, ob ...Observer) *Parser { + var contentCache map[int64][]byte + if !scanner.IsSeekable { + contentCache = make(map[int64][]byte) + } + return &Parser{ - scanner: scanner, - ob: ob, - count: 0, - cache: cache.NewObjectLRUDefault(), + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewObjectLRUDefault(), + contentCache: contentCache, } } // Parse start decoding phase of the packfile. func (p *Parser) Parse() (plumbing.Hash, error) { - err := p.init() - if err != nil { + if err := p.init(); err != nil { return plumbing.ZeroHash, err } - err = p.firstPass() - if err != nil { + if err := p.firstPass(); err != nil { return plumbing.ZeroHash, err } - err = p.resolveDeltas() - if err != nil { + if err := p.resolveDeltas(); err != nil { return plumbing.ZeroHash, err } for _, o := range p.ob { - err := o.OnFooter(p.checksum) - if err != nil { + if err := o.OnFooter(p.checksum); err != nil { return plumbing.ZeroHash, err } } @@ -81,8 +94,7 @@ func (p *Parser) init() error { } for _, o := range p.ob { - err := o.OnHeader(c) - if err != nil { + if err := o.OnHeader(c); err != nil { return err } } @@ -99,7 +111,7 @@ func (p *Parser) firstPass() error { buf := new(bytes.Buffer) for i := uint32(0); i < p.count; i++ { - buf.Truncate(0) + buf.Reset() oh, err := p.scanner.NextObjectHeader() if err != nil { @@ -122,8 +134,7 @@ func (p *Parser) firstPass() error { } if !ok { - // TODO improve error - return errors.New("Reference delta not found") + return ErrReferenceDeltaNotFound } ota = newDeltaObject(oh.Offset, oh.Length, t, parent) @@ -143,35 +154,41 @@ func (p *Parser) firstPass() error { ota.Length = oh.Length if !delta { - ota.Write(buf.Bytes()) + if _, err := ota.Write(buf.Bytes()); err != nil { + return err + } ota.SHA1 = ota.Sum() + p.oiByHash[ota.SHA1] = ota } p.oiByOffset[oh.Offset] = ota - p.oiByHash[oh.Reference] = ota p.oi[i] = ota } - checksum, err := p.scanner.Checksum() - p.checksum = checksum - - if err == io.EOF { - return nil + var err error + p.checksum, err = p.scanner.Checksum() + if err != nil && err != io.EOF { + return err } - return err + return nil } func (p *Parser) resolveDeltas() error { for _, obj := range p.oi { + content, err := obj.Content() + if err != nil { + return err + } + for _, o := range p.ob { err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset) if err != nil { return err } - err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32) + err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content) if err != nil { return err } @@ -185,8 +202,7 @@ func (p *Parser) resolveDeltas() error { } for _, child := range obj.Children { - _, err = p.resolveObject(child, base) - if err != nil { + if _, err := p.resolveObject(child, base); err != nil { return err } } @@ -205,8 +221,7 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { } buf := make([]byte, e.Size()) - _, err = r.Read(buf) - if err != nil { + if _, err = r.Read(buf); err != nil { return nil, err } @@ -254,8 +269,8 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { func (p *Parser) resolveObject( o *objectInfo, - base []byte) ([]byte, error) { - + base []byte, +) ([]byte, error) { if !o.DiskType.IsDelta() { return nil, nil } @@ -278,16 +293,17 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) { // TODO: skip header. Header size can be calculated with the offset of the // next offset in the first pass. - p.scanner.SeekFromStart(o.Offset) - _, err := p.scanner.NextObjectHeader() - if err != nil { + if _, err := p.scanner.SeekFromStart(o.Offset); err != nil { return nil, err } - buf.Truncate(0) + if _, err := p.scanner.NextObjectHeader(); err != nil { + return nil, err + } - _, _, err = p.scanner.NextObject(buf) - if err != nil { + buf.Reset() + + if _, _, err := p.scanner.NextObject(buf); err != nil { return nil, err } @@ -301,9 +317,11 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { } ota.Type = ota.Parent.Type - hash := plumbing.ComputeHash(ota.Type, patched) - - ota.SHA1 = hash + ota.Hasher = plumbing.NewHasher(ota.Type, int64(len(patched))) + if _, err := ota.Write(patched); err != nil { + return nil, err + } + ota.SHA1 = ota.Sum() return patched, nil } @@ -323,6 +341,8 @@ type objectInfo struct { Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash + + content *bytes.Buffer } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -351,6 +371,30 @@ func newDeltaObject( return obj } +func (o *objectInfo) Write(bs []byte) (int, error) { + n, err := o.Hasher.Write(bs) + if err != nil { + return 0, err + } + + o.content = bytes.NewBuffer(nil) + + _, _ = o.content.Write(bs) + return n, nil +} + +// Content returns the content of the object. This operation can only be done +// once. +func (o *objectInfo) Content() ([]byte, error) { + if o.content == nil { + return nil, ErrObjectContentAlreadyRead + } + + r := o.content + o.content = nil + return r.Bytes(), nil +} + func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } -- cgit From 5889a3b669f0f515ff445aa040afc1e7eeb2bbd1 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Wed, 8 Aug 2018 16:56:20 +0200 Subject: plumbing: packfile, allow non-seekable sources on Parser Signed-off-by: Miguel Molina --- plumbing/format/packfile/parser.go | 311 ++++++++++++++++++++++++------------- 1 file changed, 202 insertions(+), 109 deletions(-) (limited to 'plumbing/format/packfile/parser.go') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index f0a7674..beb3e27 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -7,16 +7,20 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/storer" ) var ( - // ErrObjectContentAlreadyRead is returned when the content of the object - // was already read, since the content can only be read once. - ErrObjectContentAlreadyRead = errors.New("object content was already read") - // ErrReferenceDeltaNotFound is returned when the reference delta is not // found. ErrReferenceDeltaNotFound = errors.New("reference delta not found") + + // ErrNotSeekableSource is returned when the source for the parser is not + // seekable and a storage was not provided, so it can't be parsed. + ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided") + + // ErrDeltaNotCached is returned when the delta could not be found in cache. + ErrDeltaNotCached = errors.New("delta could not be found in cache") ) // Observer interface is implemented by index encoders. @@ -34,34 +38,96 @@ type Observer interface { // Parser decodes a packfile and calls any observer associated to it. Is used // to generate indexes. type Parser struct { - scanner *Scanner - count uint32 - oi []*objectInfo - oiByHash map[plumbing.Hash]*objectInfo - oiByOffset map[int64]*objectInfo - hashOffset map[plumbing.Hash]int64 - checksum plumbing.Hash - - cache *cache.ObjectLRU - contentCache map[int64][]byte + storage storer.EncodedObjectStorer + scanner *Scanner + count uint32 + oi []*objectInfo + oiByHash map[plumbing.Hash]*objectInfo + oiByOffset map[int64]*objectInfo + hashOffset map[plumbing.Hash]int64 + pendingRefDeltas map[plumbing.Hash][]*objectInfo + checksum plumbing.Hash + + cache *cache.ObjectLRU + // delta content by offset, only used if source is not seekable + deltas map[int64][]byte ob []Observer } -// NewParser creates a new Parser struct. -func NewParser(scanner *Scanner, ob ...Observer) *Parser { - var contentCache map[int64][]byte +// NewParser creates a new Parser. The Scanner source must be seekable. +// If it's not, NewParserWithStorage should be used instead. +func NewParser(scanner *Scanner, ob ...Observer) (*Parser, error) { + return NewParserWithStorage(scanner, nil, ob...) +} + +// NewParserWithStorage creates a new Parser. The scanner source must either +// be seekable or a storage must be provided. +func NewParserWithStorage( + scanner *Scanner, + storage storer.EncodedObjectStorer, + ob ...Observer, +) (*Parser, error) { + if !scanner.IsSeekable && storage == nil { + return nil, ErrNotSeekableSource + } + + var deltas map[int64][]byte if !scanner.IsSeekable { - contentCache = make(map[int64][]byte) + deltas = make(map[int64][]byte) } return &Parser{ - scanner: scanner, - ob: ob, - count: 0, - cache: cache.NewObjectLRUDefault(), - contentCache: contentCache, + storage: storage, + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewObjectLRUDefault(), + pendingRefDeltas: make(map[plumbing.Hash][]*objectInfo), + deltas: deltas, + }, nil +} + +func (p *Parser) forEachObserver(f func(o Observer) error) error { + for _, o := range p.ob { + if err := f(o); err != nil { + return err + } } + return nil +} + +func (p *Parser) onHeader(count uint32) error { + return p.forEachObserver(func(o Observer) error { + return o.OnHeader(count) + }) +} + +func (p *Parser) onInflatedObjectHeader( + t plumbing.ObjectType, + objSize int64, + pos int64, +) error { + return p.forEachObserver(func(o Observer) error { + return o.OnInflatedObjectHeader(t, objSize, pos) + }) +} + +func (p *Parser) onInflatedObjectContent( + h plumbing.Hash, + pos int64, + crc uint32, + content []byte, +) error { + return p.forEachObserver(func(o Observer) error { + return o.OnInflatedObjectContent(h, pos, crc, content) + }) +} + +func (p *Parser) onFooter(h plumbing.Hash) error { + return p.forEachObserver(func(o Observer) error { + return o.OnFooter(h) + }) } // Parse start decoding phase of the packfile. @@ -70,7 +136,13 @@ func (p *Parser) Parse() (plumbing.Hash, error) { return plumbing.ZeroHash, err } - if err := p.firstPass(); err != nil { + if err := p.indexObjects(); err != nil { + return plumbing.ZeroHash, err + } + + var err error + p.checksum, err = p.scanner.Checksum() + if err != nil && err != io.EOF { return plumbing.ZeroHash, err } @@ -78,10 +150,12 @@ func (p *Parser) Parse() (plumbing.Hash, error) { return plumbing.ZeroHash, err } - for _, o := range p.ob { - if err := o.OnFooter(p.checksum); err != nil { - return plumbing.ZeroHash, err - } + if len(p.pendingRefDeltas) > 0 { + return plumbing.ZeroHash, ErrReferenceDeltaNotFound + } + + if err := p.onFooter(p.checksum); err != nil { + return plumbing.ZeroHash, err } return p.checksum, nil @@ -93,10 +167,8 @@ func (p *Parser) init() error { return err } - for _, o := range p.ob { - if err := o.OnHeader(c); err != nil { - return err - } + if err := p.onHeader(c); err != nil { + return err } p.count = c @@ -107,7 +179,7 @@ func (p *Parser) init() error { return nil } -func (p *Parser) firstPass() error { +func (p *Parser) indexObjects() error { buf := new(bytes.Buffer) for i := uint32(0); i < p.count; i++ { @@ -121,25 +193,30 @@ func (p *Parser) firstPass() error { delta := false var ota *objectInfo switch t := oh.Type; t { - case plumbing.OFSDeltaObject, plumbing.REFDeltaObject: + case plumbing.OFSDeltaObject: delta = true - var parent *objectInfo - var ok bool - - if t == plumbing.OFSDeltaObject { - parent, ok = p.oiByOffset[oh.OffsetReference] - } else { - parent, ok = p.oiByHash[oh.Reference] - } - + parent, ok := p.oiByOffset[oh.OffsetReference] if !ok { - return ErrReferenceDeltaNotFound + return plumbing.ErrObjectNotFound } ota = newDeltaObject(oh.Offset, oh.Length, t, parent) - parent.Children = append(parent.Children, ota) + case plumbing.REFDeltaObject: + delta = true + + parent, ok := p.oiByHash[oh.Reference] + if ok { + ota = newDeltaObject(oh.Offset, oh.Length, t, parent) + parent.Children = append(parent.Children, ota) + } else { + ota = newBaseObject(oh.Offset, oh.Length, t) + p.pendingRefDeltas[oh.Reference] = append( + p.pendingRefDeltas[oh.Reference], + ota, + ) + } default: ota = newBaseObject(oh.Offset, oh.Length, t) } @@ -153,23 +230,35 @@ func (p *Parser) firstPass() error { ota.PackSize = size ota.Length = oh.Length + data := buf.Bytes() if !delta { - if _, err := ota.Write(buf.Bytes()); err != nil { + if _, err := ota.Write(data); err != nil { return err } ota.SHA1 = ota.Sum() p.oiByHash[ota.SHA1] = ota } - p.oiByOffset[oh.Offset] = ota + if p.storage != nil && !delta { + obj := new(plumbing.MemoryObject) + obj.SetSize(oh.Length) + obj.SetType(oh.Type) + if _, err := obj.Write(data); err != nil { + return err + } - p.oi[i] = ota - } + if _, err := p.storage.SetEncodedObject(obj); err != nil { + return err + } + } - var err error - p.checksum, err = p.scanner.Checksum() - if err != nil && err != io.EOF { - return err + if delta && !p.scanner.IsSeekable { + p.deltas[oh.Offset] = make([]byte, len(data)) + copy(p.deltas[oh.Offset], data) + } + + p.oiByOffset[oh.Offset] = ota + p.oi[i] = ota } return nil @@ -177,21 +266,17 @@ func (p *Parser) firstPass() error { func (p *Parser) resolveDeltas() error { for _, obj := range p.oi { - content, err := obj.Content() + content, err := p.get(obj) if err != nil { return err } - for _, o := range p.ob { - err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset) - if err != nil { - return err - } + if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil { + return err + } - err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content) - if err != nil { - return err - } + if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil { + return err } if !obj.IsDelta() && len(obj.Children) > 0 { @@ -206,6 +291,11 @@ func (p *Parser) resolveDeltas() error { return err } } + + // Remove the delta from the cache. + if obj.DiskType.IsDelta() && !p.scanner.IsSeekable { + delete(p.deltas, obj.Offset) + } } } @@ -214,7 +304,17 @@ func (p *Parser) resolveDeltas() error { func (p *Parser) get(o *objectInfo) ([]byte, error) { e, ok := p.cache.Get(o.SHA1) - if ok { + // If it's not on the cache and is not a delta we can try to find it in the + // storage, if there's one. + if !ok && p.storage != nil && !o.Type.IsDelta() { + var err error + e, err = p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) + if err != nil { + return nil, err + } + } + + if e != nil { r, err := e.Reader() if err != nil { return nil, err @@ -228,32 +328,23 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { return buf, nil } - // Read from disk + var data []byte if o.DiskType.IsDelta() { base, err := p.get(o.Parent) if err != nil { return nil, err } - data, err := p.resolveObject(o, base) + data, err = p.resolveObject(o, base) if err != nil { return nil, err } - - if len(o.Children) > 0 { - m := &plumbing.MemoryObject{} - m.Write(data) - m.SetType(o.Type) - m.SetSize(o.Size()) - p.cache.Put(m) + } else { + var err error + data, err = p.readData(o) + if err != nil { + return nil, err } - - return data, nil - } - - data, err := p.readData(o) - if err != nil { - return nil, err } if len(o.Children) > 0 { @@ -285,11 +376,39 @@ func (p *Parser) resolveObject( return nil, err } + if pending, ok := p.pendingRefDeltas[o.SHA1]; ok { + for _, po := range pending { + po.Parent = o + o.Children = append(o.Children, po) + } + delete(p.pendingRefDeltas, o.SHA1) + } + + if p.storage != nil { + obj := new(plumbing.MemoryObject) + obj.SetSize(o.Size()) + obj.SetType(o.Type) + if _, err := obj.Write(data); err != nil { + return nil, err + } + + if _, err := p.storage.SetEncodedObject(obj); err != nil { + return nil, err + } + } + return data, nil } func (p *Parser) readData(o *objectInfo) ([]byte, error) { - buf := new(bytes.Buffer) + if !p.scanner.IsSeekable && o.DiskType.IsDelta() { + data, ok := p.deltas[o.Offset] + if !ok { + return nil, ErrDeltaNotCached + } + + return data, nil + } // TODO: skip header. Header size can be calculated with the offset of the // next offset in the first pass. @@ -301,8 +420,7 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) { return nil, err } - buf.Reset() - + buf := new(bytes.Buffer) if _, _, err := p.scanner.NextObject(buf); err != nil { return nil, err } @@ -322,6 +440,7 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { return nil, err } ota.SHA1 = ota.Sum() + ota.Length = int64(len(patched)) return patched, nil } @@ -341,8 +460,6 @@ type objectInfo struct { Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash - - content *bytes.Buffer } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -371,30 +488,6 @@ func newDeltaObject( return obj } -func (o *objectInfo) Write(bs []byte) (int, error) { - n, err := o.Hasher.Write(bs) - if err != nil { - return 0, err - } - - o.content = bytes.NewBuffer(nil) - - _, _ = o.content.Write(bs) - return n, nil -} - -// Content returns the content of the object. This operation can only be done -// once. -func (o *objectInfo) Content() ([]byte, error) { - if o.content == nil { - return nil, ErrObjectContentAlreadyRead - } - - r := o.content - o.content = nil - return r.Bytes(), nil -} - func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } -- cgit From 71a3c9161d4d8d2baf16440a86a02e8f5678aef2 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 9 Aug 2018 10:55:51 +0200 Subject: plumbing: packfile, read object content only once Signed-off-by: Miguel Molina --- plumbing/format/packfile/parser.go | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'plumbing/format/packfile/parser.go') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index beb3e27..581c334 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -280,14 +280,8 @@ func (p *Parser) resolveDeltas() error { } if !obj.IsDelta() && len(obj.Children) > 0 { - var err error - base, err := p.get(obj) - if err != nil { - return err - } - for _, child := range obj.Children { - if _, err := p.resolveObject(child, base); err != nil { + if _, err := p.resolveObject(child, content); err != nil { return err } } @@ -297,12 +291,18 @@ func (p *Parser) resolveDeltas() error { delete(p.deltas, obj.Offset) } } + + obj.Content = nil } return nil } func (p *Parser) get(o *objectInfo) ([]byte, error) { + if len(o.Content) > 0 { + return o.Content, nil + } + e, ok := p.cache.Get(o.SHA1) // If it's not on the cache and is not a delta we can try to find it in the // storage, if there's one. @@ -460,6 +460,8 @@ type objectInfo struct { Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash + + Content []byte } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -488,6 +490,12 @@ func newDeltaObject( return obj } +func (o *objectInfo) Write(b []byte) (int, error) { + o.Content = make([]byte, len(b)) + copy(o.Content, b) + return o.Hasher.Write(b) +} + func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } -- cgit