From f33ceb084eaa95c9c7e702eea1e9c9afb69ec806 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Tue, 24 Jan 2023 13:35:48 +0100 Subject: cache: faster indexing by caping Bleve batch count --- cache/subcache.go | 51 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) (limited to 'cache') diff --git a/cache/subcache.go b/cache/subcache.go index 09e53c23..b0ba6e52 100644 --- a/cache/subcache.go +++ b/cache/subcache.go @@ -187,6 +187,27 @@ func (sc *SubCache[EntityT, ExcerptT, CacheT]) write() error { } func (sc *SubCache[EntityT, ExcerptT, CacheT]) Build() <-chan BuildEvent { + // value chosen experimentally as giving the fasted indexing, while + // not driving the cache size on disk too high. + // + // | batchCount | bugIndex (MB) | idIndex (kB) | time (s) | + // |:----------:|:-------------:|:------------:|:--------:| + // | 10 | 24 | 84 | 1,59 | + // | 30 | 26 | 84 | 1,388 | + // | 50 | 26 | 84 | 1,44 | + // | 60 | 26 | 80 | 1,377 | + // | 68 | 27 | 80 | 1,385 | + // | 75 | 26 | 84 | 1,32 | + // | 80 | 26 | 80 | 1,37 | + // | 85 | 27 | 80 | 1,317 | + // | 100 | 26 | 80 | 1,455 | + // | 150 | 26 | 80 | 2,066 | + // | 200 | 28 | 80 | 2,885 | + // | 250 | 30 | 72 | 3,555 | + // | 300 | 31 | 72 | 4,787 | + // | 500 | 23 | 72 | 5,4 | + const maxBatchCount = 75 + out := make(chan BuildEvent) go func() { @@ -221,6 +242,7 @@ func (sc *SubCache[EntityT, ExcerptT, CacheT]) Build() <-chan BuildEvent { } indexer, indexEnd := index.IndexBatch() + var batchCount int for e := range allEntities { if e.Err != nil { @@ -245,6 +267,21 @@ func (sc *SubCache[EntityT, ExcerptT, CacheT]) Build() <-chan BuildEvent { return } + batchCount++ + if batchCount >= maxBatchCount { + err = indexEnd() + if err != nil { + out <- BuildEvent{ + Typename: sc.typename, + Err: err, + } + return + } + + indexer, indexEnd = index.IndexBatch() + batchCount = 0 + } + out <- BuildEvent{ Typename: sc.typename, Event: BuildEventProgress, @@ -253,13 +290,15 @@ func (sc *SubCache[EntityT, ExcerptT, CacheT]) Build() <-chan BuildEvent { } } - err = indexEnd() - if err != nil { - out <- BuildEvent{ - Typename: sc.typename, - Err: err, + if batchCount > 0 { + err = indexEnd() + if err != nil { + out <- BuildEvent{ + Typename: sc.typename, + Err: err, + } + return } - return } err = sc.write() -- cgit