From b953011e07002189f614a5c42f7bb28079558fdb Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Wed, 6 Sep 2017 14:26:30 +0200 Subject: packfile: slightly haster hash function for chunk-offset index key Signed-off-by: Miguel Molina --- plumbing/format/packfile/delta_test.go | 4 ++-- plumbing/format/packfile/diff_delta.go | 39 ++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 11 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/delta_test.go b/plumbing/format/packfile/delta_test.go index f6e93d2..42b777a 100644 --- a/plumbing/format/packfile/delta_test.go +++ b/plumbing/format/packfile/delta_test.go @@ -84,7 +84,7 @@ func (s *DeltaSuite) TestAddDelta(c *C) { for _, t := range s.testCases { baseBuf := genBytes(t.base) targetBuf := genBytes(t.target) - delta := DiffDelta(make(deltaIndex), baseBuf, targetBuf) + delta := DiffDelta(baseBuf, targetBuf) result, err := PatchDelta(baseBuf, delta) c.Log("Executing test case:", t.description) @@ -98,7 +98,7 @@ func (s *DeltaSuite) TestIncompleteDelta(c *C) { c.Log("Incomplete delta on:", t.description) baseBuf := genBytes(t.base) targetBuf := genBytes(t.target) - delta := DiffDelta(make(deltaIndex), baseBuf, targetBuf) + delta := DiffDelta(baseBuf, targetBuf) delta = delta[:len(delta)-2] result, err := PatchDelta(baseBuf, delta) c.Assert(err, NotNil) diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go index d0db46b..295b411 100644 --- a/plumbing/format/packfile/diff_delta.go +++ b/plumbing/format/packfile/diff_delta.go @@ -2,7 +2,6 @@ package packfile import ( "bytes" - "hash/adler32" "io/ioutil" "gopkg.in/src-d/go-git.v4/plumbing" @@ -49,7 +48,7 @@ func getDelta(index deltaIndex, base, target plumbing.EncodedObject) (plumbing.E return nil, err } - db := DiffDelta(index, bb, tb) + db := diffDelta(index, bb, tb) delta := &plumbing.MemoryObject{} _, err = delta.Write(db) if err != nil { @@ -63,7 +62,11 @@ func getDelta(index deltaIndex, base, target plumbing.EncodedObject) (plumbing.E } // DiffDelta returns the delta that transforms src into tgt. -func DiffDelta(sindex deltaIndex, src []byte, tgt []byte) []byte { +func DiffDelta(src, tgt []byte) []byte { + return diffDelta(make(deltaIndex), src, tgt) +} + +func diffDelta(sindex deltaIndex, src []byte, tgt []byte) []byte { buf := bufPool.Get().(*bytes.Buffer) buf.Reset() buf.Write(deltaEncodeSize(len(src))) @@ -132,24 +135,42 @@ func encodeInsertOperation(ibuf, buf *bytes.Buffer) { ibuf.Reset() } -func initMatch(index map[uint32]int, src []byte) map[uint32]int { +func initMatch(index deltaIndex, src []byte) { i := 0 for { if i+s > len(src) { break } - ch := adler32.Checksum(src[i : i+s]) + ch := hashBuf(src[i : i+s]) index[ch] = i i += s } +} + +// https://lemire.me/blog/2015/10/22/faster-hashing-without-effort/ +func hashBuf(buf []byte) int64 { + var h int64 + var i int + len := len(buf) + for ; i+3 < len; i += 4 { + h = 31*31*31*31*h + + 31*31*31*int64(buf[i]) + + 31*31*int64(buf[i+1]) + + 31*int64(buf[i+2]) + + int64(buf[i+3]) + } + + for ; i < len; i++ { + h = 31*h + int64(buf[i]) + } - return index + return h } -func findMatch(src, tgt []byte, sindex map[uint32]int, tgtOffset int) (srcOffset, l int) { +func findMatch(src, tgt []byte, sindex deltaIndex, tgtOffset int) (srcOffset, l int) { if len(tgt) >= tgtOffset+s { - ch := adler32.Checksum(tgt[tgtOffset : tgtOffset+s]) + ch := hashBuf(tgt[tgtOffset : tgtOffset+s]) var ok bool srcOffset, ok = sindex[ch] if !ok { @@ -219,4 +240,4 @@ func encodeCopyOperation(offset, length int) []byte { return append([]byte{byte(code)}, opcodes...) } -type deltaIndex map[uint32]int +type deltaIndex map[int64]int -- cgit