From 8d63c983c982f93cc48d3996d6bd097ddeeb327f Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 3 Jan 2021 23:59:25 +0100 Subject: WIP --- entity/dag/entity.go | 389 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 389 insertions(+) create mode 100644 entity/dag/entity.go (limited to 'entity/dag/entity.go') diff --git a/entity/dag/entity.go b/entity/dag/entity.go new file mode 100644 index 00000000..78347fa0 --- /dev/null +++ b/entity/dag/entity.go @@ -0,0 +1,389 @@ +// Package dag contains the base common code to define an entity stored +// in a chain of git objects, supporting actions like Push, Pull and Merge. +package dag + +import ( + "encoding/json" + "fmt" + "sort" + + "github.com/pkg/errors" + + "github.com/MichaelMure/git-bug/entity" + "github.com/MichaelMure/git-bug/identity" + "github.com/MichaelMure/git-bug/repository" + "github.com/MichaelMure/git-bug/util/lamport" +) + +const refsPattern = "refs/%s/%s" +const creationClockPattern = "%s-create" +const editClockPattern = "%s-edit" + +// Definition hold the details defining one specialization of an Entity. +type Definition struct { + // the name of the entity (bug, pull-request, ...) + typename string + // the namespace in git (bugs, prs, ...) + namespace string + // a function decoding a JSON message into an Operation + operationUnmarshaler func(author identity.Interface, raw json.RawMessage) (Operation, error) + // a function loading an identity.Identity from its Id + identityResolver identity.Resolver + // the expected format version number, that can be used for data migration/upgrade + formatVersion uint +} + +// Entity is a data structure stored in a chain of git objects, supporting actions like Push, Pull and Merge. +type Entity struct { + Definition + + // operations that are already stored in the repository + ops []Operation + // operations not yet stored in the repository + staging []Operation + + // TODO: add here createTime and editTime + + // // TODO: doesn't seems to actually be useful over the topological sort ? Timestamp can be generated from graph depth + // // TODO: maybe EditTime is better because it could spread ops in consecutive groups on the logical timeline --> avoid interleaving + // packClock lamport.Clock + lastCommit repository.Hash +} + +// New create an empty Entity +func New(definition Definition) *Entity { + return &Entity{ + Definition: definition, + // packClock: lamport.NewMemClock(), + } +} + +// Read will read and decode a stored Entity from a repository +func Read(def Definition, repo repository.ClockedRepo, id entity.Id) (*Entity, error) { + if err := id.Validate(); err != nil { + return nil, errors.Wrap(err, "invalid id") + } + + ref := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) + + return read(def, repo, ref) +} + +// read fetch from git and decode an Entity at an arbitrary git reference. +func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, error) { + rootHash, err := repo.ResolveRef(ref) + if err != nil { + return nil, err + } + + // Perform a depth-first search to get a topological order of the DAG where we discover the + // parents commit and go back in time up to the chronological root + + stack := make([]repository.Hash, 0, 32) + visited := make(map[repository.Hash]struct{}) + DFSOrder := make([]repository.Commit, 0, 32) + + stack = append(stack, rootHash) + + for len(stack) > 0 { + // pop + hash := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + if _, ok := visited[hash]; ok { + continue + } + + // mark as visited + visited[hash] = struct{}{} + + commit, err := repo.ReadCommit(hash) + if err != nil { + return nil, err + } + + DFSOrder = append(DFSOrder, commit) + + for _, parent := range commit.Parents { + stack = append(stack, parent) + } + } + + // Now, we can reverse this topological order and read the commits in an order where + // we are sure to have read all the chronological ancestors when we read a commit. + + // Next step is to: + // 1) read the operationPacks + // 2) make sure that the clocks causality respect the DAG topology. + + oppMap := make(map[repository.Hash]*operationPack) + var opsCount int + // var packClock = lamport.NewMemClock() + + for i := len(DFSOrder) - 1; i >= 0; i-- { + commit := DFSOrder[i] + isFirstCommit := i == len(DFSOrder)-1 + isMerge := len(commit.Parents) > 1 + + // Verify DAG structure: single chronological root, so only the root + // can have no parents. Said otherwise, the DAG need to have exactly + // one leaf. + if !isFirstCommit && len(commit.Parents) == 0 { + return nil, fmt.Errorf("multiple leafs in the entity DAG") + } + + opp, err := readOperationPack(def, repo, commit) + if err != nil { + return nil, err + } + + err = opp.Validate() + if err != nil { + return nil, err + } + + // Check that the create lamport clock is set (not checked in Validate() as it's optional) + if isFirstCommit && opp.CreateTime <= 0 { + return nil, fmt.Errorf("creation lamport time not set") + } + + // make sure that the lamport clocks causality match the DAG topology + for _, parentHash := range commit.Parents { + parentPack, ok := oppMap[parentHash] + if !ok { + panic("DFS failed") + } + + if parentPack.EditTime >= opp.EditTime { + return nil, fmt.Errorf("lamport clock ordering doesn't match the DAG") + } + + // to avoid an attack where clocks are pushed toward the uint64 rollover, make sure + // that the clocks don't jump too far in the future + // we ignore merge commits here to allow merging after a loooong time without breaking anything, + // as long as there is one valid chain of small hops, it's fine. + if !isMerge && opp.EditTime-parentPack.EditTime > 1_000_000 { + return nil, fmt.Errorf("lamport clock jumping too far in the future, likely an attack") + } + + // TODO: PackTime is not checked + } + + oppMap[commit.Hash] = opp + opsCount += len(opp.Operations) + } + + // The clocks are fine, we witness them + for _, opp := range oppMap { + err = repo.Witness(fmt.Sprintf(creationClockPattern, def.namespace), opp.CreateTime) + if err != nil { + return nil, err + } + err = repo.Witness(fmt.Sprintf(editClockPattern, def.namespace), opp.EditTime) + if err != nil { + return nil, err + } + // err = packClock.Witness(opp.PackTime) + // if err != nil { + // return nil, err + // } + } + + // Now that we know that the topological order and clocks are fine, we order the operationPacks + // based on the logical clocks, entirely ignoring the DAG topology + + oppSlice := make([]*operationPack, 0, len(oppMap)) + for _, pack := range oppMap { + oppSlice = append(oppSlice, pack) + } + sort.Slice(oppSlice, func(i, j int) bool { + // Primary ordering with the dedicated "pack" Lamport time that encode causality + // within the entity + // if oppSlice[i].PackTime != oppSlice[j].PackTime { + // return oppSlice[i].PackTime < oppSlice[i].PackTime + // } + // We have equal PackTime, which means we had a concurrent edition. We can't tell which exactly + // came first. As a secondary arbitrary ordering, we can use the EditTime. It's unlikely to be + // enough but it can give us an edge to approach what really happened. + if oppSlice[i].EditTime != oppSlice[j].EditTime { + return oppSlice[i].EditTime < oppSlice[j].EditTime + } + // Well, what now? We still need a total ordering and the most stable possible. + // As a last resort, we can order based on a hash of the serialized Operations in the + // operationPack. It doesn't carry much meaning but it's unbiased and hard to abuse. + // This is a lexicographic ordering on the stringified ID. + return oppSlice[i].Id() < oppSlice[j].Id() + }) + + // Now that we ordered the operationPacks, we have the order of the Operations + + ops := make([]Operation, 0, opsCount) + for _, pack := range oppSlice { + for _, operation := range pack.Operations { + ops = append(ops, operation) + } + } + + return &Entity{ + Definition: def, + ops: ops, + // packClock: packClock, + lastCommit: rootHash, + }, nil +} + +// Id return the Entity identifier +func (e *Entity) Id() entity.Id { + // id is the id of the first operation + return e.FirstOp().Id() +} + +// Validate check if the Entity data is valid +func (e *Entity) Validate() error { + // non-empty + if len(e.ops) == 0 && len(e.staging) == 0 { + return fmt.Errorf("entity has no operations") + } + + // check if each operations are valid + for _, op := range e.ops { + if err := op.Validate(); err != nil { + return err + } + } + + // check if staging is valid if needed + for _, op := range e.staging { + if err := op.Validate(); err != nil { + return err + } + } + + // Check that there is no colliding operation's ID + ids := make(map[entity.Id]struct{}) + for _, op := range e.Operations() { + if _, ok := ids[op.Id()]; ok { + return fmt.Errorf("id collision: %s", op.Id()) + } + ids[op.Id()] = struct{}{} + } + + return nil +} + +// Operations return the ordered operations +func (e *Entity) Operations() []Operation { + return append(e.ops, e.staging...) +} + +// FirstOp lookup for the very first operation of the Entity +func (e *Entity) FirstOp() Operation { + for _, op := range e.ops { + return op + } + for _, op := range e.staging { + return op + } + return nil +} + +// LastOp lookup for the very last operation of the Entity +func (e *Entity) LastOp() Operation { + if len(e.staging) > 0 { + return e.staging[len(e.staging)-1] + } + if len(e.ops) > 0 { + return e.ops[len(e.ops)-1] + } + return nil +} + +// Append add a new Operation to the Entity +func (e *Entity) Append(op Operation) { + e.staging = append(e.staging, op) +} + +// NeedCommit indicate if the in-memory state changed and need to be commit in the repository +func (e *Entity) NeedCommit() bool { + return len(e.staging) > 0 +} + +// CommitAdNeeded execute a Commit only if necessary. This function is useful to avoid getting an error if the Entity +// is already in sync with the repository. +func (e *Entity) CommitAdNeeded(repo repository.ClockedRepo) error { + if e.NeedCommit() { + return e.Commit(repo) + } + return nil +} + +// Commit write the appended operations in the repository +// TODO: support commit signature +func (e *Entity) Commit(repo repository.ClockedRepo) error { + if !e.NeedCommit() { + return fmt.Errorf("can't commit an entity with no pending operation") + } + + if err := e.Validate(); err != nil { + return errors.Wrapf(err, "can't commit a %s with invalid data", e.Definition.typename) + } + + var author identity.Interface + for _, op := range e.staging { + if author != nil && op.Author() != author { + return fmt.Errorf("operations with different author") + } + author = op.Author() + } + + // increment the various clocks for this new operationPack + // packTime, err := e.packClock.Increment() + // if err != nil { + // return err + // } + editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) + if err != nil { + return err + } + var creationTime lamport.Time + if e.lastCommit == "" { + creationTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.namespace)) + if err != nil { + return err + } + } + + opp := &operationPack{ + Author: author, + Operations: e.staging, + CreateTime: creationTime, + EditTime: editTime, + // PackTime: packTime, + } + + treeHash, err := opp.Write(e.Definition, repo) + if err != nil { + return err + } + + // Write a Git commit referencing the tree, with the previous commit as parent + var commitHash repository.Hash + if e.lastCommit != "" { + commitHash, err = repo.StoreCommit(treeHash, e.lastCommit) + } else { + commitHash, err = repo.StoreCommit(treeHash) + } + if err != nil { + return err + } + + e.lastCommit = commitHash + e.ops = append(e.ops, e.staging...) + e.staging = nil + + // Create or update the Git reference for this entity + // When pushing later, the remote will ensure that this ref update + // is fast-forward, that is no data has been overwritten. + ref := fmt.Sprintf(refsPattern, e.namespace, e.Id().String()) + return repo.UpdateRef(ref, commitHash) +} -- cgit From dc5059bc3372941e2908739831188768335ac50b Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 24 Jan 2021 19:45:21 +0100 Subject: entity: more progress on merging and signing --- entity/dag/entity.go | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'entity/dag/entity.go') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 78347fa0..63d7fc3b 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -318,7 +318,6 @@ func (e *Entity) CommitAdNeeded(repo repository.ClockedRepo) error { } // Commit write the appended operations in the repository -// TODO: support commit signature func (e *Entity) Commit(repo repository.ClockedRepo) error { if !e.NeedCommit() { return fmt.Errorf("can't commit an entity with no pending operation") @@ -361,18 +360,13 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { // PackTime: packTime, } - treeHash, err := opp.Write(e.Definition, repo) - if err != nil { - return err - } - - // Write a Git commit referencing the tree, with the previous commit as parent var commitHash repository.Hash - if e.lastCommit != "" { - commitHash, err = repo.StoreCommit(treeHash, e.lastCommit) + if e.lastCommit == "" { + commitHash, err = opp.Write(e.Definition, repo) } else { - commitHash, err = repo.StoreCommit(treeHash) + commitHash, err = opp.Write(e.Definition, repo, e.lastCommit) } + if err != nil { return err } -- cgit From fe4237df3c62bd6dfd1f385893295f93072d0e51 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Mon, 25 Jan 2021 12:39:34 +0100 Subject: entity: readAll and more testing --- entity/dag/entity.go | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) (limited to 'entity/dag/entity.go') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 63d7fc3b..d3f5b482 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -58,7 +58,7 @@ func New(definition Definition) *Entity { } } -// Read will read and decode a stored Entity from a repository +// Read will read and decode a stored local Entity from a repository func Read(def Definition, repo repository.ClockedRepo, id entity.Id) (*Entity, error) { if err := id.Validate(); err != nil { return nil, errors.Wrap(err, "invalid id") @@ -69,6 +69,17 @@ func Read(def Definition, repo repository.ClockedRepo, id entity.Id) (*Entity, e return read(def, repo, ref) } +// readRemote will read and decode a stored remote Entity from a repository +func readRemote(def Definition, repo repository.ClockedRepo, remote string, id entity.Id) (*Entity, error) { + if err := id.Validate(); err != nil { + return nil, errors.Wrap(err, "invalid id") + } + + ref := fmt.Sprintf("refs/remotes/%s/%s/%s", def.namespace, remote, id.String()) + + return read(def, repo, ref) +} + // read fetch from git and decode an Entity at an arbitrary git reference. func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, error) { rootHash, err := repo.ResolveRef(ref) @@ -232,6 +243,41 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err }, nil } +type StreamedEntity struct { + Entity *Entity + Err error +} + +// ReadAll read and parse all local Entity +func ReadAll(def Definition, repo repository.ClockedRepo) <-chan StreamedEntity { + out := make(chan StreamedEntity) + + go func() { + defer close(out) + + refPrefix := fmt.Sprintf("refs/%s/", def.namespace) + + refs, err := repo.ListRefs(refPrefix) + if err != nil { + out <- StreamedEntity{Err: err} + return + } + + for _, ref := range refs { + e, err := read(def, repo, ref) + + if err != nil { + out <- StreamedEntity{Err: err} + return + } + + out <- StreamedEntity{Entity: e} + } + }() + + return out +} + // Id return the Entity identifier func (e *Entity) Id() entity.Id { // id is the id of the first operation -- cgit From 32c55a4985cf897774e508b13c3e63b1935d1470 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 7 Feb 2021 13:52:04 +0100 Subject: entity: use BFS instead of DFS to get the proper topological order --- entity/dag/entity.go | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'entity/dag/entity.go') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index d3f5b482..273e6ad1 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -87,36 +87,34 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err return nil, err } - // Perform a depth-first search to get a topological order of the DAG where we discover the + // Perform a breadth-first search to get a topological order of the DAG where we discover the // parents commit and go back in time up to the chronological root - stack := make([]repository.Hash, 0, 32) + queue := make([]repository.Hash, 0, 32) visited := make(map[repository.Hash]struct{}) - DFSOrder := make([]repository.Commit, 0, 32) + BFSOrder := make([]repository.Commit, 0, 32) - stack = append(stack, rootHash) + queue = append(queue, rootHash) + visited[rootHash] = struct{}{} - for len(stack) > 0 { + for len(queue) > 0 { // pop - hash := stack[len(stack)-1] - stack = stack[:len(stack)-1] - - if _, ok := visited[hash]; ok { - continue - } - - // mark as visited - visited[hash] = struct{}{} + hash := queue[0] + queue = queue[1:] commit, err := repo.ReadCommit(hash) if err != nil { return nil, err } - DFSOrder = append(DFSOrder, commit) + BFSOrder = append(BFSOrder, commit) for _, parent := range commit.Parents { - stack = append(stack, parent) + if _, ok := visited[parent]; !ok { + queue = append(queue, parent) + // mark as visited + visited[parent] = struct{}{} + } } } @@ -131,9 +129,9 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err var opsCount int // var packClock = lamport.NewMemClock() - for i := len(DFSOrder) - 1; i >= 0; i-- { - commit := DFSOrder[i] - isFirstCommit := i == len(DFSOrder)-1 + for i := len(BFSOrder) - 1; i >= 0; i-- { + commit := BFSOrder[i] + isFirstCommit := i == len(BFSOrder)-1 isMerge := len(commit.Parents) > 1 // Verify DAG structure: single chronological root, so only the root -- cgit From f74166914c344329f08823770982f12966c79a77 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Tue, 9 Feb 2021 15:47:07 +0100 Subject: entity: remove the pack lamport time that doesn't bring anything actually --- entity/dag/entity.go | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) (limited to 'entity/dag/entity.go') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 273e6ad1..3f4dfcb4 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -44,9 +44,6 @@ type Entity struct { // TODO: add here createTime and editTime - // // TODO: doesn't seems to actually be useful over the topological sort ? Timestamp can be generated from graph depth - // // TODO: maybe EditTime is better because it could spread ops in consecutive groups on the logical timeline --> avoid interleaving - // packClock lamport.Clock lastCommit repository.Hash } @@ -54,7 +51,6 @@ type Entity struct { func New(definition Definition) *Entity { return &Entity{ Definition: definition, - // packClock: lamport.NewMemClock(), } } @@ -127,7 +123,6 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err oppMap := make(map[repository.Hash]*operationPack) var opsCount int - // var packClock = lamport.NewMemClock() for i := len(BFSOrder) - 1; i >= 0; i-- { commit := BFSOrder[i] @@ -174,8 +169,6 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err if !isMerge && opp.EditTime-parentPack.EditTime > 1_000_000 { return nil, fmt.Errorf("lamport clock jumping too far in the future, likely an attack") } - - // TODO: PackTime is not checked } oppMap[commit.Hash] = opp @@ -192,10 +185,6 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err if err != nil { return nil, err } - // err = packClock.Witness(opp.PackTime) - // if err != nil { - // return nil, err - // } } // Now that we know that the topological order and clocks are fine, we order the operationPacks @@ -206,19 +195,13 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err oppSlice = append(oppSlice, pack) } sort.Slice(oppSlice, func(i, j int) bool { - // Primary ordering with the dedicated "pack" Lamport time that encode causality - // within the entity - // if oppSlice[i].PackTime != oppSlice[j].PackTime { - // return oppSlice[i].PackTime < oppSlice[i].PackTime - // } - // We have equal PackTime, which means we had a concurrent edition. We can't tell which exactly - // came first. As a secondary arbitrary ordering, we can use the EditTime. It's unlikely to be - // enough but it can give us an edge to approach what really happened. + // Primary ordering with the EditTime. if oppSlice[i].EditTime != oppSlice[j].EditTime { return oppSlice[i].EditTime < oppSlice[j].EditTime } - // Well, what now? We still need a total ordering and the most stable possible. - // As a last resort, we can order based on a hash of the serialized Operations in the + // We have equal EditTime, which means we have concurrent edition over different machines and we + // can't tell which one came first. So, what now? We still need a total ordering and the most stable possible. + // As a secondary ordering, we can order based on a hash of the serialized Operations in the // operationPack. It doesn't carry much meaning but it's unbiased and hard to abuse. // This is a lexicographic ordering on the stringified ID. return oppSlice[i].Id() < oppSlice[j].Id() @@ -236,7 +219,6 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err return &Entity{ Definition: def, ops: ops, - // packClock: packClock, lastCommit: rootHash, }, nil } @@ -379,11 +361,6 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { author = op.Author() } - // increment the various clocks for this new operationPack - // packTime, err := e.packClock.Increment() - // if err != nil { - // return err - // } editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) if err != nil { return err @@ -401,7 +378,6 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { Operations: e.staging, CreateTime: creationTime, EditTime: editTime, - // PackTime: packTime, } var commitHash repository.Hash -- cgit From 59e9981161acea461a3ef9d386f20e23e78d8433 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Thu, 11 Feb 2021 09:51:32 +0100 Subject: entity: expose create and edit lamport clocks --- entity/dag/entity.go | 51 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 13 deletions(-) (limited to 'entity/dag/entity.go') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 3f4dfcb4..d92b386e 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -35,6 +35,12 @@ type Definition struct { // Entity is a data structure stored in a chain of git objects, supporting actions like Push, Pull and Merge. type Entity struct { + // A Lamport clock is a logical clock that allow to order event + // inside a distributed system. + // It must be the first field in this struct due to https://github.com/golang/go/issues/36606 + createTime lamport.Time + editTime lamport.Time + Definition // operations that are already stored in the repository @@ -42,8 +48,6 @@ type Entity struct { // operations not yet stored in the repository staging []Operation - // TODO: add here createTime and editTime - lastCommit repository.Hash } @@ -210,16 +214,26 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err // Now that we ordered the operationPacks, we have the order of the Operations ops := make([]Operation, 0, opsCount) + var createTime lamport.Time + var editTime lamport.Time for _, pack := range oppSlice { for _, operation := range pack.Operations { ops = append(ops, operation) } + if pack.CreateTime > createTime { + createTime = pack.CreateTime + } + if pack.EditTime > editTime { + editTime = pack.EditTime + } } return &Entity{ Definition: def, ops: ops, lastCommit: rootHash, + createTime: createTime, + editTime: editTime, }, nil } @@ -349,7 +363,8 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { return fmt.Errorf("can't commit an entity with no pending operation") } - if err := e.Validate(); err != nil { + err := e.Validate() + if err != nil { return errors.Wrapf(err, "can't commit a %s with invalid data", e.Definition.typename) } @@ -361,23 +376,23 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { author = op.Author() } - editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) + e.editTime, err = repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) if err != nil { return err } - var creationTime lamport.Time - if e.lastCommit == "" { - creationTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.namespace)) - if err != nil { - return err - } - } opp := &operationPack{ Author: author, Operations: e.staging, - CreateTime: creationTime, - EditTime: editTime, + EditTime: e.editTime, + } + + if e.lastCommit == "" { + e.createTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.namespace)) + if err != nil { + return err + } + opp.CreateTime = e.createTime } var commitHash repository.Hash @@ -401,3 +416,13 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { ref := fmt.Sprintf(refsPattern, e.namespace, e.Id().String()) return repo.UpdateRef(ref, commitHash) } + +// CreateLamportTime return the Lamport time of creation +func (e *Entity) CreateLamportTime() lamport.Time { + return e.createTime +} + +// EditLamportTime return the Lamport time of the last edition +func (e *Entity) EditLamportTime() lamport.Time { + return e.editTime +} -- cgit From 94f06cd54defa73f5e8b79345597279e454c78e6 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Feb 2021 10:02:01 +0100 Subject: entity: pass the identity resolver instead of defining it once Having the resolver in Definition doesn't actually work well as the resolver is very situational. --- entity/dag/entity.go | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'entity/dag/entity.go') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index d92b386e..09576d28 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -27,8 +27,6 @@ type Definition struct { namespace string // a function decoding a JSON message into an Operation operationUnmarshaler func(author identity.Interface, raw json.RawMessage) (Operation, error) - // a function loading an identity.Identity from its Id - identityResolver identity.Resolver // the expected format version number, that can be used for data migration/upgrade formatVersion uint } @@ -59,29 +57,29 @@ func New(definition Definition) *Entity { } // Read will read and decode a stored local Entity from a repository -func Read(def Definition, repo repository.ClockedRepo, id entity.Id) (*Entity, error) { +func Read(def Definition, repo repository.ClockedRepo, resolver identity.Resolver, id entity.Id) (*Entity, error) { if err := id.Validate(); err != nil { return nil, errors.Wrap(err, "invalid id") } ref := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) - return read(def, repo, ref) + return read(def, repo, resolver, ref) } // readRemote will read and decode a stored remote Entity from a repository -func readRemote(def Definition, repo repository.ClockedRepo, remote string, id entity.Id) (*Entity, error) { +func readRemote(def Definition, repo repository.ClockedRepo, resolver identity.Resolver, remote string, id entity.Id) (*Entity, error) { if err := id.Validate(); err != nil { return nil, errors.Wrap(err, "invalid id") } ref := fmt.Sprintf("refs/remotes/%s/%s/%s", def.namespace, remote, id.String()) - return read(def, repo, ref) + return read(def, repo, resolver, ref) } // read fetch from git and decode an Entity at an arbitrary git reference. -func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, error) { +func read(def Definition, repo repository.ClockedRepo, resolver identity.Resolver, ref string) (*Entity, error) { rootHash, err := repo.ResolveRef(ref) if err != nil { return nil, err @@ -140,7 +138,7 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err return nil, fmt.Errorf("multiple leafs in the entity DAG") } - opp, err := readOperationPack(def, repo, commit) + opp, err := readOperationPack(def, repo, resolver, commit) if err != nil { return nil, err } @@ -243,7 +241,7 @@ type StreamedEntity struct { } // ReadAll read and parse all local Entity -func ReadAll(def Definition, repo repository.ClockedRepo) <-chan StreamedEntity { +func ReadAll(def Definition, repo repository.ClockedRepo, resolver identity.Resolver) <-chan StreamedEntity { out := make(chan StreamedEntity) go func() { @@ -258,7 +256,7 @@ func ReadAll(def Definition, repo repository.ClockedRepo) <-chan StreamedEntity } for _, ref := range refs { - e, err := read(def, repo, ref) + e, err := read(def, repo, resolver, ref) if err != nil { out <- StreamedEntity{Err: err} -- cgit From 99b9dd84cb4b0cfd3eb1fd50b07c8b826eb52d19 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Feb 2021 10:06:16 +0100 Subject: entity: support different author in staging operations --- entity/dag/entity.go | 99 ++++++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 45 deletions(-) (limited to 'entity/dag/entity.go') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 09576d28..196280a8 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -22,13 +22,13 @@ const editClockPattern = "%s-edit" // Definition hold the details defining one specialization of an Entity. type Definition struct { // the name of the entity (bug, pull-request, ...) - typename string - // the namespace in git (bugs, prs, ...) - namespace string + Typename string + // the Namespace in git (bugs, prs, ...) + Namespace string // a function decoding a JSON message into an Operation - operationUnmarshaler func(author identity.Interface, raw json.RawMessage) (Operation, error) + OperationUnmarshaler func(author identity.Interface, raw json.RawMessage) (Operation, error) // the expected format version number, that can be used for data migration/upgrade - formatVersion uint + FormatVersion uint } // Entity is a data structure stored in a chain of git objects, supporting actions like Push, Pull and Merge. @@ -62,7 +62,7 @@ func Read(def Definition, repo repository.ClockedRepo, resolver identity.Resolve return nil, errors.Wrap(err, "invalid id") } - ref := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) + ref := fmt.Sprintf("refs/%s/%s", def.Namespace, id.String()) return read(def, repo, resolver, ref) } @@ -73,7 +73,7 @@ func readRemote(def Definition, repo repository.ClockedRepo, resolver identity.R return nil, errors.Wrap(err, "invalid id") } - ref := fmt.Sprintf("refs/remotes/%s/%s/%s", def.namespace, remote, id.String()) + ref := fmt.Sprintf("refs/remotes/%s/%s/%s", def.Namespace, remote, id.String()) return read(def, repo, resolver, ref) } @@ -179,11 +179,11 @@ func read(def Definition, repo repository.ClockedRepo, resolver identity.Resolve // The clocks are fine, we witness them for _, opp := range oppMap { - err = repo.Witness(fmt.Sprintf(creationClockPattern, def.namespace), opp.CreateTime) + err = repo.Witness(fmt.Sprintf(creationClockPattern, def.Namespace), opp.CreateTime) if err != nil { return nil, err } - err = repo.Witness(fmt.Sprintf(editClockPattern, def.namespace), opp.EditTime) + err = repo.Witness(fmt.Sprintf(editClockPattern, def.Namespace), opp.EditTime) if err != nil { return nil, err } @@ -247,7 +247,7 @@ func ReadAll(def Definition, repo repository.ClockedRepo, resolver identity.Reso go func() { defer close(out) - refPrefix := fmt.Sprintf("refs/%s/", def.namespace) + refPrefix := fmt.Sprintf("refs/%s/", def.Namespace) refs, err := repo.ListRefs(refPrefix) if err != nil { @@ -346,9 +346,9 @@ func (e *Entity) NeedCommit() bool { return len(e.staging) > 0 } -// CommitAdNeeded execute a Commit only if necessary. This function is useful to avoid getting an error if the Entity +// CommitAsNeeded execute a Commit only if necessary. This function is useful to avoid getting an error if the Entity // is already in sync with the repository. -func (e *Entity) CommitAdNeeded(repo repository.ClockedRepo) error { +func (e *Entity) CommitAsNeeded(repo repository.ClockedRepo) error { if e.NeedCommit() { return e.Commit(repo) } @@ -363,56 +363,65 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { err := e.Validate() if err != nil { - return errors.Wrapf(err, "can't commit a %s with invalid data", e.Definition.typename) + return errors.Wrapf(err, "can't commit a %s with invalid data", e.Definition.Typename) } - var author identity.Interface - for _, op := range e.staging { - if author != nil && op.Author() != author { - return fmt.Errorf("operations with different author") + for len(e.staging) > 0 { + var author identity.Interface + var toCommit []Operation + + // Split into chunks with the same author + for len(e.staging) > 0 { + op := e.staging[0] + if author != nil && op.Author().Id() != author.Id() { + break + } + author = e.staging[0].Author() + toCommit = append(toCommit, op) + e.staging = e.staging[1:] } - author = op.Author() - } - e.editTime, err = repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) - if err != nil { - return err - } + e.editTime, err = repo.Increment(fmt.Sprintf(editClockPattern, e.Namespace)) + if err != nil { + return err + } - opp := &operationPack{ - Author: author, - Operations: e.staging, - EditTime: e.editTime, - } + opp := &operationPack{ + Author: author, + Operations: toCommit, + EditTime: e.editTime, + } - if e.lastCommit == "" { - e.createTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.namespace)) + if e.lastCommit == "" { + e.createTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.Namespace)) + if err != nil { + return err + } + opp.CreateTime = e.createTime + } + + var parentCommit []repository.Hash + if e.lastCommit != "" { + parentCommit = []repository.Hash{e.lastCommit} + } + + commitHash, err := opp.Write(e.Definition, repo, parentCommit...) if err != nil { return err } - opp.CreateTime = e.createTime - } - var commitHash repository.Hash - if e.lastCommit == "" { - commitHash, err = opp.Write(e.Definition, repo) - } else { - commitHash, err = opp.Write(e.Definition, repo, e.lastCommit) - } - - if err != nil { - return err + e.lastCommit = commitHash + e.ops = append(e.ops, toCommit...) } - e.lastCommit = commitHash - e.ops = append(e.ops, e.staging...) + // not strictly necessary but make equality testing easier in tests e.staging = nil // Create or update the Git reference for this entity // When pushing later, the remote will ensure that this ref update // is fast-forward, that is no data has been overwritten. - ref := fmt.Sprintf(refsPattern, e.namespace, e.Id().String()) - return repo.UpdateRef(ref, commitHash) + ref := fmt.Sprintf(refsPattern, e.Namespace, e.Id().String()) + return repo.UpdateRef(ref, e.lastCommit) } // CreateLamportTime return the Lamport time of creation -- cgit From 4b9862e239deb939c87be2b02970a7bfe2996e13 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Feb 2021 12:13:33 +0100 Subject: entity: make sure merge commit don't have operations --- entity/dag/entity.go | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'entity/dag/entity.go') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 196280a8..c4368514 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -148,6 +148,10 @@ func read(def Definition, repo repository.ClockedRepo, resolver identity.Resolve return nil, err } + if isMerge && len(opp.Operations) > 0 { + return nil, fmt.Errorf("merge commit cannot have operations") + } + // Check that the create lamport clock is set (not checked in Validate() as it's optional) if isFirstCommit && opp.CreateTime <= 0 { return nil, fmt.Errorf("creation lamport time not set") -- cgit