From f1d4a19af81fcc05ae9d90e018ff141f6521335a Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Mar 2021 18:39:04 +0100 Subject: bug: nonce on all operation to prevent id collision --- bug/op_create.go | 15 --------------- bug/operation.go | 26 ++++++++++++++++++++++++++ entity/dag/operation.go | 16 +++++++++++++--- entity/id.go | 2 +- identity/version.go | 2 +- 5 files changed, 41 insertions(+), 20 deletions(-) diff --git a/bug/op_create.go b/bug/op_create.go index e3e38ade..37e1ddc5 100644 --- a/bug/op_create.go +++ b/bug/op_create.go @@ -1,7 +1,6 @@ package bug import ( - "crypto/rand" "encoding/json" "fmt" "strings" @@ -18,10 +17,6 @@ var _ Operation = &CreateOperation{} // CreateOperation define the initial creation of a bug type CreateOperation struct { OpBase - // mandatory random bytes to ensure a better randomness of the data of the first - // operation of a bug, used to later generate the ID - // len(Nonce) should be > 20 and < 64 bytes - Nonce []byte `json:"nonce"` Title string `json:"title"` Message string `json:"message"` Files []repository.Hash `json:"files"` @@ -147,19 +142,9 @@ func (op *CreateOperation) UnmarshalJSON(data []byte) error { // Sign post method for gqlgen func (op *CreateOperation) IsAuthored() {} -func makeNonce(len int) []byte { - result := make([]byte, len) - _, err := rand.Read(result) - if err != nil { - panic(err) - } - return result -} - func NewCreateOp(author identity.Interface, unixTime int64, title, message string, files []repository.Hash) *CreateOperation { return &CreateOperation{ OpBase: newOpBase(CreateOp, author, unixTime), - Nonce: makeNonce(20), Title: title, Message: message, Files: files, diff --git a/bug/operation.go b/bug/operation.go index 0423c229..d01f1cc9 100644 --- a/bug/operation.go +++ b/bug/operation.go @@ -1,6 +1,7 @@ package bug import ( + "crypto/rand" "encoding/json" "fmt" "time" @@ -138,6 +139,12 @@ type OpBase struct { // TODO: part of the data model upgrade, this should eventually be a timestamp + lamport UnixTime int64 `json:"timestamp"` Metadata map[string]string `json:"metadata,omitempty"` + + // mandatory random bytes to ensure a better randomness of the data used to later generate the ID + // len(Nonce) should be > 20 and < 64 bytes + // It has no functional purpose and should be ignored. + Nonce []byte `json:"nonce"` + // Not serialized. Store the op's id in memory. id entity.Id // Not serialized. Store the extra metadata in memory, @@ -151,10 +158,20 @@ func newOpBase(opType OperationType, author identity.Interface, unixTime int64) OperationType: opType, Author_: author, UnixTime: unixTime, + Nonce: makeNonce(20), id: entity.UnsetId, } } +func makeNonce(len int) []byte { + result := make([]byte, len) + _, err := rand.Read(result) + if err != nil { + panic(err) + } + return result +} + func (base *OpBase) UnmarshalJSON(data []byte) error { // Compute the Id when loading the op from disk. base.id = entity.DeriveId(data) @@ -164,6 +181,7 @@ func (base *OpBase) UnmarshalJSON(data []byte) error { Author json.RawMessage `json:"author"` UnixTime int64 `json:"timestamp"` Metadata map[string]string `json:"metadata,omitempty"` + Nonce []byte `json:"nonce"` }{} if err := json.Unmarshal(data, &aux); err != nil { @@ -180,6 +198,7 @@ func (base *OpBase) UnmarshalJSON(data []byte) error { base.Author_ = author base.UnixTime = aux.UnixTime base.Metadata = aux.Metadata + base.Nonce = aux.Nonce return nil } @@ -222,6 +241,13 @@ func (base *OpBase) Validate(op Operation, opType OperationType) error { } } + if len(base.Nonce) > 64 { + return fmt.Errorf("nonce is too big") + } + if len(base.Nonce) < 20 { + return fmt.Errorf("nonce is too small") + } + return nil } diff --git a/entity/dag/operation.go b/entity/dag/operation.go index b0a78de6..94974a82 100644 --- a/entity/dag/operation.go +++ b/entity/dag/operation.go @@ -10,13 +10,23 @@ import ( // data structure and storage. type Operation interface { // Id return the Operation identifier + // // Some care need to be taken to define a correct Id derivation and enough entropy in the data used to avoid // collisions. Notably: - // - the Id of the first Operation will be used as the Id of the Entity. Collision need to be avoided across entities of the same type - // (example: no collision within the "bug" namespace). + // - the Id of the first Operation will be used as the Id of the Entity. Collision need to be avoided across entities + // of the same type (example: no collision within the "bug" namespace). // - collisions can also happen within the set of Operations of an Entity. Simple Operation might not have enough // entropy to yield unique Ids (example: two "close" operation within the same second, same author). - // A common way to derive an Id will be to use the DeriveId function on the serialized operation data. + // If this is a concern, it is recommended to include a piece of random data in the operation's data, to guarantee + // a minimal amount of entropy and avoid collision. + // + // Author's note: I tried to find a clever way around that inelegance (stuffing random useless data into the stored + // structure is not exactly elegant) but I failed to find a proper way. Essentially, anything that would reuse some + // other data (parent operation's Id, lamport clock) or the graph structure (depth) impose that the Id would only + // make sense in the context of the graph and yield some deep coupling between Entity and Operation. This in turn + // make the whole thing even less elegant. + // + // A common way to derive an Id will be to use the entity.DeriveId() function on the serialized operation data. Id() entity.Id // Validate check if the Operation data is valid Validate() error diff --git a/entity/id.go b/entity/id.go index b602452e..c8dbdb94 100644 --- a/entity/id.go +++ b/entity/id.go @@ -18,7 +18,7 @@ const UnsetId = Id("unset") // Id is an identifier for an entity or part of an entity type Id string -// DeriveId generate an Id from some data, taken from a root part of the entity. +// DeriveId generate an Id from the serialization of the object or part of the object. func DeriveId(data []byte) Id { // My understanding is that sha256 is enough to prevent collision (git use that, so ...?) // If you read this code, I'd be happy to be schooled. diff --git a/identity/version.go b/identity/version.go index cbd56a98..1c35831e 100644 --- a/identity/version.go +++ b/identity/version.go @@ -37,7 +37,7 @@ type version struct { keys []*Key // mandatory random bytes to ensure a better randomness of the data of the first - // version of a bug, used to later generate the ID + // version of an identity, used to later generate the ID // len(Nonce) should be > 20 and < 64 bytes // It has no functional purpose and should be ignored. // TODO: optional after first version? -- cgit