From db7074301b6af895b1a47ecd12a5028ac809abfc Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Mon, 30 Nov 2020 01:55:30 +0100 Subject: entity: generalize the combined Ids, use 64 length --- bug/comment.go | 46 ----------------------------- bug/comment_test.go | 27 ----------------- bug/op_add_comment.go | 2 +- bug/op_create.go | 2 +- bug/op_create_test.go | 3 +- cache/repo_cache_bug.go | 2 +- entity/id.go | 2 +- entity/id_interleaved.go | 68 +++++++++++++++++++++++++++++++++++++++++++ entity/id_interleaved_test.go | 36 +++++++++++++++++++++++ 9 files changed, 110 insertions(+), 78 deletions(-) delete mode 100644 bug/comment_test.go create mode 100644 entity/id_interleaved.go create mode 100644 entity/id_interleaved_test.go diff --git a/bug/comment.go b/bug/comment.go index 1a9ca05a..4c9d118e 100644 --- a/bug/comment.go +++ b/bug/comment.go @@ -1,8 +1,6 @@ package bug import ( - "strings" - "github.com/dustin/go-humanize" "github.com/MichaelMure/git-bug/entity" @@ -33,50 +31,6 @@ func (c Comment) Id() entity.Id { return c.id } -const compiledCommentIdFormat = "BCBCBCBBBCBBBBCBBBBCBBBBCBBBBCBBBBCBBBBC" - -// DeriveCommentId compute a merged Id for a comment holding information from -// both the Bug's Id and the Comment's Id. This allow to later find efficiently -// a Comment because we can access the bug directly instead of searching for a -// Bug that has a Comment matching the Id. -// -// To allow the use of an arbitrary length prefix of this merged Id, Ids from Bug -// and Comment are interleaved with this irregular pattern to give the best chance -// to find the Comment even with a 7 character prefix. -// -// A complete merged Id hold 30 characters for the Bug and 10 for the Comment, -// which give a key space of 36^30 for the Bug (~5 * 10^46) and 36^10 for the -// Comment (~3 * 10^15). This asymmetry assume a reasonable number of Comment -// within a Bug, while still allowing for a vast key space for Bug (that is, a -// globally merged bug database) with a low risk of collision. -func DeriveCommentId(bugId entity.Id, commentId entity.Id) entity.Id { - var id strings.Builder - for _, char := range compiledCommentIdFormat { - if char == 'B' { - id.WriteByte(bugId[0]) - bugId = bugId[1:] - } else { - id.WriteByte(commentId[0]) - commentId = commentId[1:] - } - } - return entity.Id(id.String()) -} - -func SplitCommentId(prefix string) (bugPrefix string, commentPrefix string) { - var bugIdPrefix strings.Builder - var commentIdPrefix strings.Builder - - for i, char := range prefix { - if compiledCommentIdFormat[i] == 'B' { - bugIdPrefix.WriteRune(char) - } else { - commentIdPrefix.WriteRune(char) - } - } - return bugIdPrefix.String(), commentIdPrefix.String() -} - // FormatTimeRel format the UnixTime of the comment for human consumption func (c Comment) FormatTimeRel() string { return humanize.Time(c.UnixTime.Time()) diff --git a/bug/comment_test.go b/bug/comment_test.go deleted file mode 100644 index 423d10d8..00000000 --- a/bug/comment_test.go +++ /dev/null @@ -1,27 +0,0 @@ -package bug - -import ( - "testing" - - "github.com/stretchr/testify/require" - - "github.com/MichaelMure/git-bug/entity" -) - -func TestCommentId(t *testing.T) { - bugId := entity.Id("abcdefghijklmnopqrstuvwxyz1234__________") - opId := entity.Id("ABCDEFGHIJ______________________________") - expectedId := entity.Id("aAbBcCdefDghijEklmnFopqrGstuvHwxyzI1234J") - - mergedId := DeriveCommentId(bugId, opId) - require.Equal(t, expectedId, mergedId) - - // full length - splitBugId, splitCommentId := SplitCommentId(mergedId.String()) - require.Equal(t, string(bugId[:30]), splitBugId) - require.Equal(t, string(opId[:10]), splitCommentId) - - splitBugId, splitCommentId = SplitCommentId(string(expectedId[:6])) - require.Equal(t, string(bugId[:3]), splitBugId) - require.Equal(t, string(opId[:3]), splitCommentId) -} diff --git a/bug/op_add_comment.go b/bug/op_add_comment.go index df426ee0..e52c46fd 100644 --- a/bug/op_add_comment.go +++ b/bug/op_add_comment.go @@ -36,7 +36,7 @@ func (op *AddCommentOperation) Apply(snapshot *Snapshot) { snapshot.addActor(op.Author) snapshot.addParticipant(op.Author) - commentId := DeriveCommentId(snapshot.Id(), op.Id()) + commentId := entity.CombineIds(snapshot.Id(), op.Id()) comment := Comment{ id: commentId, Message: op.Message, diff --git a/bug/op_create.go b/bug/op_create.go index 044ddd72..1e944d13 100644 --- a/bug/op_create.go +++ b/bug/op_create.go @@ -66,7 +66,7 @@ func (op *CreateOperation) Apply(snapshot *Snapshot) { snapshot.Title = op.Title - commentId := DeriveCommentId(snapshot.Id(), op.Id()) + commentId := entity.CombineIds(snapshot.Id(), op.Id()) comment := Comment{ id: commentId, Message: op.Message, diff --git a/bug/op_create_test.go b/bug/op_create_test.go index 73a65778..456357c4 100644 --- a/bug/op_create_test.go +++ b/bug/op_create_test.go @@ -7,6 +7,7 @@ import ( "github.com/stretchr/testify/require" + "github.com/MichaelMure/git-bug/entity" "github.com/MichaelMure/git-bug/identity" "github.com/MichaelMure/git-bug/repository" "github.com/MichaelMure/git-bug/util/timestamp" @@ -29,7 +30,7 @@ func TestCreate(t *testing.T) { id := create.Id() require.NoError(t, id.Validate()) - commentId := DeriveCommentId(create.Id(), create.Id()) + commentId := entity.CombineIds(create.Id(), create.Id()) comment := Comment{ id: commentId, diff --git a/cache/repo_cache_bug.go b/cache/repo_cache_bug.go index cfcbb72d..90b9a892 100644 --- a/cache/repo_cache_bug.go +++ b/cache/repo_cache_bug.go @@ -265,7 +265,7 @@ func (c *RepoCache) resolveBugMatcher(f func(*BugExcerpt) bool) (entity.Id, erro // bug/comment Id prefix. Returns the Bug containing the Comment and the Comment's // Id. func (c *RepoCache) ResolveComment(prefix string) (*BugCache, entity.Id, error) { - bugPrefix, _ := bug.SplitCommentId(prefix) + bugPrefix, _ := entity.SeparateIds(prefix) bugCandidate := make([]entity.Id, 0, 5) // build a list of possible matching bugs diff --git a/entity/id.go b/entity/id.go index 9e724012..b602452e 100644 --- a/entity/id.go +++ b/entity/id.go @@ -65,7 +65,7 @@ func (i Id) MarshalGQL(w io.Writer) { // IsValid tell if the Id is valid func (i Id) Validate() error { - // Special case to + // Special case to detect outdated repo if len(i) == 40 { return fmt.Errorf("outdated repository format, please use https://github.com/MichaelMure/git-bug-migration to upgrade") } diff --git a/entity/id_interleaved.go b/entity/id_interleaved.go new file mode 100644 index 00000000..5423afee --- /dev/null +++ b/entity/id_interleaved.go @@ -0,0 +1,68 @@ +package entity + +import ( + "strings" +) + +// CombineIds compute a merged Id holding information from both the primary Id +// and the secondary Id. +// +// This allow to later find efficiently a secondary element because we can access +// the primary one directly instead of searching for a primary that has a +// secondary matching the Id. +// +// An example usage is Comment in a Bug. The interleaved Id will hold part of the +// Bug Id and part of the Comment Id. +// +// To allow the use of an arbitrary length prefix of this Id, Ids from primary +// and secondary are interleaved with this irregular pattern to give the +// best chance to find the secondary even with a 7 character prefix. +// +// Format is: PSPSPSPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPP +// +// A complete interleaved Id hold 50 characters for the primary and 14 for the +// secondary, which give a key space of 36^50 for the primary (~6 * 10^77) and +// 36^14 for the secondary (~6 * 10^21). This asymmetry assume a reasonable number +// of secondary within a primary Entity, while still allowing for a vast key space +// for the primary (that is, a globally merged database) with a low risk of collision. +// +// Here is the breakdown of several common prefix length: +// +// 5: 3P, 2S +// 7: 4P, 3S +// 10: 6P, 4S +// 16: 11P, 5S +func CombineIds(primary Id, secondary Id) Id { + var id strings.Builder + + for i := 0; i < idLength; i++ { + switch { + default: + id.WriteByte(primary[0]) + primary = primary[1:] + case i == 1, i == 3, i == 5, i == 9, i >= 10 && i%5 == 4: + id.WriteByte(secondary[0]) + secondary = secondary[1:] + } + } + + return Id(id.String()) +} + +// SeparateIds extract primary and secondary prefix from an arbitrary length prefix +// of an Id created with CombineIds. +func SeparateIds(prefix string) (primaryPrefix string, secondaryPrefix string) { + var primary strings.Builder + var secondary strings.Builder + + for i, r := range prefix { + switch { + default: + primary.WriteRune(r) + case i == 1, i == 3, i == 5, i == 9, i >= 10 && i%5 == 4: + secondary.WriteRune(r) + } + } + + return primary.String(), secondary.String() +} diff --git a/entity/id_interleaved_test.go b/entity/id_interleaved_test.go new file mode 100644 index 00000000..ef9218c9 --- /dev/null +++ b/entity/id_interleaved_test.go @@ -0,0 +1,36 @@ +package entity + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestInterleaved(t *testing.T) { + primary := Id("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWX______________") + secondary := Id("YZ0123456789+/________________________________________________") + expectedId := Id("aYbZc0def1ghij2klmn3opqr4stuv5wxyz6ABCD7EFGH8IJKL9MNOP+QRST/UVWX") + + interleaved := CombineIds(primary, secondary) + require.Equal(t, expectedId, interleaved) + + // full length + splitPrimary, splitSecondary := SeparateIds(interleaved.String()) + require.Equal(t, string(primary[:50]), splitPrimary) + require.Equal(t, string(secondary[:14]), splitSecondary) + + // partial + splitPrimary, splitSecondary = SeparateIds(string(expectedId[:7])) + require.Equal(t, string(primary[:4]), splitPrimary) + require.Equal(t, string(secondary[:3]), splitSecondary) + + // partial + splitPrimary, splitSecondary = SeparateIds(string(expectedId[:10])) + require.Equal(t, string(primary[:6]), splitPrimary) + require.Equal(t, string(secondary[:4]), splitSecondary) + + // partial + splitPrimary, splitSecondary = SeparateIds(string(expectedId[:16])) + require.Equal(t, string(primary[:11]), splitPrimary) + require.Equal(t, string(secondary[:5]), splitSecondary) +} -- cgit