aboutsummaryrefslogtreecommitdiffstats
path: root/entity
diff options
context:
space:
mode:
authorMichael Muré <batolettre@gmail.com>2021-03-14 18:39:04 +0100
committerMichael Muré <batolettre@gmail.com>2021-03-14 18:40:50 +0100
commitf1d4a19af81fcc05ae9d90e018ff141f6521335a (patch)
tree9d82815e93a2d3a0856962249798f8adac81a1a6 /entity
parentbd09541752ef4db008500d238762ebe7f2f7be39 (diff)
downloadgit-bug-f1d4a19af81fcc05ae9d90e018ff141f6521335a.tar.gz
bug: nonce on all operation to prevent id collision
Diffstat (limited to 'entity')
-rw-r--r--entity/dag/operation.go16
-rw-r--r--entity/id.go2
2 files changed, 14 insertions, 4 deletions
diff --git a/entity/dag/operation.go b/entity/dag/operation.go
index b0a78de6..94974a82 100644
--- a/entity/dag/operation.go
+++ b/entity/dag/operation.go
@@ -10,13 +10,23 @@ import (
// data structure and storage.
type Operation interface {
// Id return the Operation identifier
+ //
// Some care need to be taken to define a correct Id derivation and enough entropy in the data used to avoid
// collisions. Notably:
- // - the Id of the first Operation will be used as the Id of the Entity. Collision need to be avoided across entities of the same type
- // (example: no collision within the "bug" namespace).
+ // - the Id of the first Operation will be used as the Id of the Entity. Collision need to be avoided across entities
+ // of the same type (example: no collision within the "bug" namespace).
// - collisions can also happen within the set of Operations of an Entity. Simple Operation might not have enough
// entropy to yield unique Ids (example: two "close" operation within the same second, same author).
- // A common way to derive an Id will be to use the DeriveId function on the serialized operation data.
+ // If this is a concern, it is recommended to include a piece of random data in the operation's data, to guarantee
+ // a minimal amount of entropy and avoid collision.
+ //
+ // Author's note: I tried to find a clever way around that inelegance (stuffing random useless data into the stored
+ // structure is not exactly elegant) but I failed to find a proper way. Essentially, anything that would reuse some
+ // other data (parent operation's Id, lamport clock) or the graph structure (depth) impose that the Id would only
+ // make sense in the context of the graph and yield some deep coupling between Entity and Operation. This in turn
+ // make the whole thing even less elegant.
+ //
+ // A common way to derive an Id will be to use the entity.DeriveId() function on the serialized operation data.
Id() entity.Id
// Validate check if the Operation data is valid
Validate() error
diff --git a/entity/id.go b/entity/id.go
index b602452e..c8dbdb94 100644
--- a/entity/id.go
+++ b/entity/id.go
@@ -18,7 +18,7 @@ const UnsetId = Id("unset")
// Id is an identifier for an entity or part of an entity
type Id string
-// DeriveId generate an Id from some data, taken from a root part of the entity.
+// DeriveId generate an Id from the serialization of the object or part of the object.
func DeriveId(data []byte) Id {
// My understanding is that sha256 is enough to prevent collision (git use that, so ...?)
// If you read this code, I'd be happy to be schooled.