From a8c4426d204f42e683e902dcb277494004d5e59d Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 14 Aug 2018 11:59:11 +0200 Subject: plumbing: add buffer cache and use it in packfile parser It uses less memory and is faster as slices don't have to be converted from/to MemoryObject and they are indexed by offset. Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'plumbing/format/packfile/parser.go') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 581c334..88f33dc 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -48,7 +48,7 @@ type Parser struct { pendingRefDeltas map[plumbing.Hash][]*objectInfo checksum plumbing.Hash - cache *cache.ObjectLRU + cache *cache.BufferLRU // delta content by offset, only used if source is not seekable deltas map[int64][]byte @@ -82,7 +82,7 @@ func NewParserWithStorage( scanner: scanner, ob: ob, count: 0, - cache: cache.NewObjectLRUDefault(), + cache: cache.NewBufferLRUDefault(), pendingRefDeltas: make(map[plumbing.Hash][]*objectInfo), deltas: deltas, }, nil @@ -303,29 +303,29 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { return o.Content, nil } - e, ok := p.cache.Get(o.SHA1) + b, ok := p.cache.Get(o.Offset) // If it's not on the cache and is not a delta we can try to find it in the // storage, if there's one. if !ok && p.storage != nil && !o.Type.IsDelta() { var err error - e, err = p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) + e, err := p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) if err != nil { return nil, err } - } - if e != nil { r, err := e.Reader() if err != nil { return nil, err } - buf := make([]byte, e.Size()) - if _, err = r.Read(buf); err != nil { + b = make([]byte, e.Size()) + if _, err = r.Read(b); err != nil { return nil, err } + } - return buf, nil + if b != nil { + return b, nil } var data []byte @@ -348,11 +348,7 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { } if len(o.Children) > 0 { - m := &plumbing.MemoryObject{} - m.Write(data) - m.SetType(o.Type) - m.SetSize(o.Size()) - p.cache.Put(m) + p.cache.Put(o.Offset, data) } return data, nil -- cgit From 555a6ca02e88279cef421df88a108c2955fcde77 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 14 Aug 2018 12:21:12 +0200 Subject: plumbing/pacfile: tidy up objectInfo struct * a new hasher is created when needed * delete unused fields * base content is no longer kept in memory Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 58 +++++++++++++++----------------------- 1 file changed, 22 insertions(+), 36 deletions(-) (limited to 'plumbing/format/packfile/parser.go') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 88f33dc..3a9c4d7 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -221,21 +221,22 @@ func (p *Parser) indexObjects() error { ota = newBaseObject(oh.Offset, oh.Length, t) } - size, crc, err := p.scanner.NextObject(buf) + _, crc, err := p.scanner.NextObject(buf) if err != nil { return err } ota.Crc32 = crc - ota.PackSize = size ota.Length = oh.Length data := buf.Bytes() if !delta { - if _, err := ota.Write(data); err != nil { + sha1, err := getSHA1(ota.Type, data) + if err != nil { return err } - ota.SHA1 = ota.Sum() + + ota.SHA1 = sha1 p.oiByHash[ota.SHA1] = ota } @@ -291,18 +292,12 @@ func (p *Parser) resolveDeltas() error { delete(p.deltas, obj.Offset) } } - - obj.Content = nil } return nil } func (p *Parser) get(o *objectInfo) ([]byte, error) { - if len(o.Content) > 0 { - return o.Content, nil - } - b, ok := p.cache.Get(o.Offset) // If it's not on the cache and is not a delta we can try to find it in the // storage, if there's one. @@ -406,8 +401,6 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) { return data, nil } - // TODO: skip header. Header size can be calculated with the offset of the - // next offset in the first pass. if _, err := p.scanner.SeekFromStart(o.Offset); err != nil { return nil, err } @@ -431,33 +424,37 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { } ota.Type = ota.Parent.Type - ota.Hasher = plumbing.NewHasher(ota.Type, int64(len(patched))) - if _, err := ota.Write(patched); err != nil { + sha1, err := getSHA1(ota.Type, patched) + if err != nil { return nil, err } - ota.SHA1 = ota.Sum() + + ota.SHA1 = sha1 ota.Length = int64(len(patched)) return patched, nil } -type objectInfo struct { - plumbing.Hasher +func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) { + hasher := plumbing.NewHasher(t, int64(len(data))) + if _, err := hasher.Write(data); err != nil { + return plumbing.ZeroHash, err + } + + return hasher.Sum(), nil +} - Offset int64 - Length int64 - HeaderLength int64 - PackSize int64 - Type plumbing.ObjectType - DiskType plumbing.ObjectType +type objectInfo struct { + Offset int64 + Length int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType Crc32 uint32 Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash - - Content []byte } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -469,29 +466,18 @@ func newDeltaObject( t plumbing.ObjectType, parent *objectInfo, ) *objectInfo { - children := make([]*objectInfo, 0) - obj := &objectInfo{ - Hasher: plumbing.NewHasher(t, length), Offset: offset, Length: length, - PackSize: 0, Type: t, DiskType: t, Crc32: 0, Parent: parent, - Children: children, } return obj } -func (o *objectInfo) Write(b []byte) (int, error) { - o.Content = make([]byte, len(b)) - copy(o.Content, b) - return o.Hasher.Write(b) -} - func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } -- cgit From eb2aa9b2c3bf7af93fd261228be1b96e61c52bcf Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 14 Aug 2018 16:56:29 +0200 Subject: plumbing/packfile: do not compute sha1 for already undeltified objects Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'plumbing/format/packfile/parser.go') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 3a9c4d7..28582b5 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -423,14 +423,16 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { return nil, err } - ota.Type = ota.Parent.Type - sha1, err := getSHA1(ota.Type, patched) - if err != nil { - return nil, err - } + if ota.SHA1 == plumbing.ZeroHash { + ota.Type = ota.Parent.Type + sha1, err := getSHA1(ota.Type, patched) + if err != nil { + return nil, err + } - ota.SHA1 = sha1 - ota.Length = int64(len(patched)) + ota.SHA1 = sha1 + ota.Length = int64(len(patched)) + } return patched, nil } -- cgit