From 9490da0f86a12269abb2099e2ead1f20eec166d2 Mon Sep 17 00:00:00 2001 From: Paulo Gomes Date: Fri, 4 Nov 2022 12:44:40 +0000 Subject: Optimize zlib reader and consolidate sync.pools Expands on the optimisations from https://github.com/fluxcd/go-git/pull/5 and ensures that zlib reader does not need to recreate a deflate dictionary at every use. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The use of sync pools was consolidated into a new sync utils package. name old time/op new time/op delta Parser-16 7.51ms ± 3% 7.71ms ± 6% ~ (p=0.222 n=5+5) name old alloc/op new alloc/op delta Parser-16 4.65MB ± 3% 1.90MB ± 3% -59.06% (p=0.008 n=5+5) name old allocs/op new allocs/op delta Parser-16 3.48k ± 0% 3.32k ± 0% -4.57% (p=0.016 n=5+4) Signed-off-by: Paulo Gomes --- plumbing/format/packfile/common.go | 18 ----------- plumbing/format/packfile/diff_delta.go | 21 ++++++------- plumbing/format/packfile/packfile.go | 34 ++++++++++----------- plumbing/format/packfile/parser.go | 19 +++++++----- plumbing/format/packfile/patch_delta.go | 18 +++++------ plumbing/format/packfile/scanner.go | 54 ++++++++++++++------------------- 6 files changed, 66 insertions(+), 98 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/common.go b/plumbing/format/packfile/common.go index df423ad..36c5ef5 100644 --- a/plumbing/format/packfile/common.go +++ b/plumbing/format/packfile/common.go @@ -1,10 +1,7 @@ package packfile import ( - "bytes" - "compress/zlib" "io" - "sync" "github.com/go-git/go-git/v5/plumbing/storer" "github.com/go-git/go-git/v5/utils/ioutil" @@ -61,18 +58,3 @@ func WritePackfileToObjectStorage( return err } - -var bufPool = sync.Pool{ - New: func() interface{} { - return bytes.NewBuffer(nil) - }, -} - -var zlibInitBytes = []byte{0x78, 0x9c, 0x01, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x01} - -var zlibReaderPool = sync.Pool{ - New: func() interface{} { - r, _ := zlib.NewReader(bytes.NewReader(zlibInitBytes)) - return r - }, -} diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go index 1951b34..2c7a335 100644 --- a/plumbing/format/packfile/diff_delta.go +++ b/plumbing/format/packfile/diff_delta.go @@ -5,6 +5,7 @@ import ( "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/utils/ioutil" + "github.com/go-git/go-git/v5/utils/sync" ) // See https://github.com/jelmer/dulwich/blob/master/dulwich/pack.py and @@ -43,18 +44,16 @@ func getDelta(index *deltaIndex, base, target plumbing.EncodedObject) (o plumbin defer ioutil.CheckClose(tr, &err) - bb := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(bb) - bb.Reset() + bb := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(bb) _, err = bb.ReadFrom(br) if err != nil { return nil, err } - tb := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(tb) - tb.Reset() + tb := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(tb) _, err = tb.ReadFrom(tr) if err != nil { @@ -80,9 +79,8 @@ func DiffDelta(src, tgt []byte) []byte { } func diffDelta(index *deltaIndex, src []byte, tgt []byte) []byte { - buf := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(buf) - buf.Reset() + buf := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(buf) buf.Write(deltaEncodeSize(len(src))) buf.Write(deltaEncodeSize(len(tgt))) @@ -90,9 +88,8 @@ func diffDelta(index *deltaIndex, src []byte, tgt []byte) []byte { index.init(src) } - ibuf := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(ibuf) - ibuf.Reset() + ibuf := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(ibuf) for i := 0; i < len(tgt); i++ { offset, l := index.findMatch(src, tgt, i) diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 8dd6041..6852702 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -2,7 +2,6 @@ package packfile import ( "bytes" - "compress/zlib" "fmt" "io" "os" @@ -13,6 +12,7 @@ import ( "github.com/go-git/go-git/v5/plumbing/format/idxfile" "github.com/go-git/go-git/v5/plumbing/storer" "github.com/go-git/go-git/v5/utils/ioutil" + "github.com/go-git/go-git/v5/utils/sync" ) var ( @@ -138,9 +138,8 @@ func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) { case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: return h.Length, nil case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: - buf := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(buf) - buf.Reset() + buf := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(buf) if _, _, err := p.s.NextObject(buf); err != nil { return 0, err @@ -227,9 +226,9 @@ func (p *Packfile) getNextObject(h *ObjectHeader, hash plumbing.Hash) (plumbing. // For delta objects we read the delta data and apply the small object // optimization only if the expanded version of the object still meets // the small object threshold condition. - buf := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(buf) - buf.Reset() + buf := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(buf) + if _, _, err := p.s.NextObject(buf); err != nil { return nil, err } @@ -290,14 +289,13 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { func asyncReader(p *Packfile) (io.ReadCloser, error) { reader := ioutil.NewReaderUsingReaderAt(p.file, p.s.r.offset) - zr := zlibReaderPool.Get().(io.ReadCloser) - - if err := zr.(zlib.Resetter).Reset(reader, nil); err != nil { + zr, err := sync.GetZlibReader(reader) + if err != nil { return nil, fmt.Errorf("zlib reset error: %s", err) } - return ioutil.NewReadCloserWithCloser(zr, func() error { - zlibReaderPool.Put(zr) + return ioutil.NewReadCloserWithCloser(zr.Reader, func() error { + sync.PutZlibReader(zr) return nil }), nil @@ -373,9 +371,9 @@ func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) (err err } func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) error { - buf := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(buf) - buf.Reset() + buf := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(buf) + _, _, err := p.s.NextObject(buf) if err != nil { return err @@ -417,9 +415,9 @@ func (p *Packfile) fillREFDeltaObjectContentWithBuffer(obj plumbing.EncodedObjec } func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error { - buf := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(buf) - buf.Reset() + buf := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(buf) + _, _, err := p.s.NextObject(buf) if err != nil { return err diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 9ec838e..522c146 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -10,6 +10,7 @@ import ( "github.com/go-git/go-git/v5/plumbing/cache" "github.com/go-git/go-git/v5/plumbing/storer" "github.com/go-git/go-git/v5/utils/ioutil" + "github.com/go-git/go-git/v5/utils/sync" ) var ( @@ -175,7 +176,8 @@ func (p *Parser) init() error { } func (p *Parser) indexObjects() error { - buf := new(bytes.Buffer) + buf := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(buf) for i := uint32(0); i < p.count; i++ { buf.Reset() @@ -219,6 +221,7 @@ func (p *Parser) indexObjects() error { ota = newBaseObject(oh.Offset, oh.Length, t) } + buf.Grow(int(oh.Length)) _, crc, err := p.scanner.NextObject(buf) if err != nil { return err @@ -264,7 +267,9 @@ func (p *Parser) indexObjects() error { } func (p *Parser) resolveDeltas() error { - buf := &bytes.Buffer{} + buf := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(buf) + for _, obj := range p.oi { buf.Reset() err := p.get(obj, buf) @@ -346,9 +351,8 @@ func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) (err error) { } if o.DiskType.IsDelta() { - b := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(b) - b.Reset() + b := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(b) err := p.get(o.Parent, b) if err != nil { return err @@ -382,9 +386,8 @@ func (p *Parser) resolveObject( if !o.DiskType.IsDelta() { return nil } - buf := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(buf) - buf.Reset() + buf := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(buf) err := p.readData(buf, o) if err != nil { return err diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go index 053466d..f00562d 100644 --- a/plumbing/format/packfile/patch_delta.go +++ b/plumbing/format/packfile/patch_delta.go @@ -9,6 +9,7 @@ import ( "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/utils/ioutil" + "github.com/go-git/go-git/v5/utils/sync" ) // See https://github.com/git/git/blob/49fa3dc76179e04b0833542fa52d0f287a4955ac/delta.h @@ -34,18 +35,16 @@ func ApplyDelta(target, base plumbing.EncodedObject, delta []byte) (err error) { defer ioutil.CheckClose(w, &err) - buf := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(buf) - buf.Reset() + buf := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(buf) _, err = buf.ReadFrom(r) if err != nil { return err } src := buf.Bytes() - dst := bufPool.Get().(*bytes.Buffer) - defer bufPool.Put(dst) - dst.Reset() + dst := sync.GetBytesBuffer() + defer sync.PutBytesBuffer(dst) err = patchDelta(dst, src, delta) if err != nil { return err @@ -53,10 +52,9 @@ func ApplyDelta(target, base plumbing.EncodedObject, delta []byte) (err error) { target.SetSize(int64(dst.Len())) - bufp := byteSlicePool.Get().(*[]byte) - b := *bufp - _, err = io.CopyBuffer(w, dst, b) - byteSlicePool.Put(bufp) + b := sync.GetByteSlice() + _, err = io.CopyBuffer(w, dst, *b) + sync.PutByteSlice(b) return err } diff --git a/plumbing/format/packfile/scanner.go b/plumbing/format/packfile/scanner.go index b655594..9ebb84a 100644 --- a/plumbing/format/packfile/scanner.go +++ b/plumbing/format/packfile/scanner.go @@ -3,17 +3,16 @@ package packfile import ( "bufio" "bytes" - "compress/zlib" "fmt" "hash" "hash/crc32" "io" stdioutil "io/ioutil" - "sync" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/utils/binary" "github.com/go-git/go-git/v5/utils/ioutil" + "github.com/go-git/go-git/v5/utils/sync" ) var ( @@ -323,14 +322,14 @@ func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err erro // ReadObject returns a reader for the object content and an error func (s *Scanner) ReadObject() (io.ReadCloser, error) { s.pendingObject = nil - zr := zlibReaderPool.Get().(io.ReadCloser) + zr, err := sync.GetZlibReader(s.r) - if err := zr.(zlib.Resetter).Reset(s.r, nil); err != nil { + if err != nil { return nil, fmt.Errorf("zlib reset error: %s", err) } - return ioutil.NewReadCloserWithCloser(zr, func() error { - zlibReaderPool.Put(zr) + return ioutil.NewReadCloserWithCloser(zr.Reader, func() error { + sync.PutZlibReader(zr) return nil }), nil } @@ -338,28 +337,20 @@ func (s *Scanner) ReadObject() (io.ReadCloser, error) { // ReadRegularObject reads and write a non-deltified object // from it zlib stream in an object entry in the packfile. func (s *Scanner) copyObject(w io.Writer) (n int64, err error) { - zr := zlibReaderPool.Get().(io.ReadCloser) - defer zlibReaderPool.Put(zr) + zr, err := sync.GetZlibReader(s.r) + defer sync.PutZlibReader(zr) - if err = zr.(zlib.Resetter).Reset(s.r, nil); err != nil { + if err != nil { return 0, fmt.Errorf("zlib reset error: %s", err) } - defer ioutil.CheckClose(zr, &err) - bufp := byteSlicePool.Get().(*[]byte) - buf := *bufp - n, err = io.CopyBuffer(w, zr, buf) - byteSlicePool.Put(bufp) + defer ioutil.CheckClose(zr.Reader, &err) + buf := sync.GetByteSlice() + n, err = io.CopyBuffer(w, zr.Reader, *buf) + sync.PutByteSlice(buf) return } -var byteSlicePool = sync.Pool{ - New: func() interface{} { - b := make([]byte, 32*1024) - return &b - }, -} - // SeekFromStart sets a new offset from start, returns the old position before // the change. func (s *Scanner) SeekFromStart(offset int64) (previous int64, err error) { @@ -389,10 +380,9 @@ func (s *Scanner) Checksum() (plumbing.Hash, error) { // Close reads the reader until io.EOF func (s *Scanner) Close() error { - bufp := byteSlicePool.Get().(*[]byte) - buf := *bufp - _, err := io.CopyBuffer(stdioutil.Discard, s.r, buf) - byteSlicePool.Put(bufp) + buf := sync.GetByteSlice() + _, err := io.CopyBuffer(stdioutil.Discard, s.r, *buf) + sync.PutByteSlice(buf) return err } @@ -403,13 +393,13 @@ func (s *Scanner) Flush() error { } // scannerReader has the following characteristics: -// - Provides an io.SeekReader impl for bufio.Reader, when the underlying -// reader supports it. -// - Keeps track of the current read position, for when the underlying reader -// isn't an io.SeekReader, but we still want to know the current offset. -// - Writes to the hash writer what it reads, with the aid of a smaller buffer. -// The buffer helps avoid a performance penalty for performing small writes -// to the crc32 hash writer. +// - Provides an io.SeekReader impl for bufio.Reader, when the underlying +// reader supports it. +// - Keeps track of the current read position, for when the underlying reader +// isn't an io.SeekReader, but we still want to know the current offset. +// - Writes to the hash writer what it reads, with the aid of a smaller buffer. +// The buffer helps avoid a performance penalty for performing small writes +// to the crc32 hash writer. type scannerReader struct { reader io.Reader crc io.Writer -- cgit