From 7163b2283b4542a4d4abfe9a71963f122322bde7 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 8 Nov 2020 19:15:06 +0100 Subject: bug: Id from first operation data, not git + remove root link --- entity/refs.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'entity') diff --git a/entity/refs.go b/entity/refs.go index 82b1741b..f505dbf0 100644 --- a/entity/refs.go +++ b/entity/refs.go @@ -6,13 +6,13 @@ func RefsToIds(refs []string) []Id { ids := make([]Id, len(refs)) for i, ref := range refs { - ids[i] = refToId(ref) + ids[i] = RefToId(ref) } return ids } -func refToId(ref string) Id { +func RefToId(ref string) Id { split := strings.Split(ref, "/") return Id(split[len(split)-1]) } -- cgit From ab57d74a312f325b9d889752aa92c00c395de20f Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 8 Nov 2020 19:18:44 +0100 Subject: deal with the previous changes --- entity/id.go | 6 +++--- entity/interface.go | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'entity') diff --git a/entity/id.go b/entity/id.go index 1b78aacd..08916987 100644 --- a/entity/id.go +++ b/entity/id.go @@ -8,8 +8,8 @@ import ( "github.com/pkg/errors" ) -const IdLengthSHA1 = 40 -const IdLengthSHA256 = 64 +// sha-256 +const idLength = 64 const humanIdLength = 7 const UnsetId = Id("unset") @@ -55,7 +55,7 @@ func (i Id) MarshalGQL(w io.Writer) { // IsValid tell if the Id is valid func (i Id) Validate() error { - if len(i) != IdLengthSHA1 && len(i) != IdLengthSHA256 { + if len(i) != idLength { return fmt.Errorf("invalid length") } for _, r := range i { diff --git a/entity/interface.go b/entity/interface.go index dd5d69b1..fb4735e4 100644 --- a/entity/interface.go +++ b/entity/interface.go @@ -2,5 +2,11 @@ package entity type Interface interface { // Id return the Entity identifier + // + // This Id need to be immutable without having to store the entity somewhere (ie, an entity only in memory + // should have a valid Id, and it should not change if further edit are done on this entity). + // How to achieve that is up to the entity itself. A common way would be to take a hash of an immutable data at + // the root of the entity. + // It is acceptable to use such a hash and keep mutating that data as long as Id() is not called. Id() Id } -- cgit From 2bf2b2d765c5003307544885b9321b32cc09d8bb Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Mon, 9 Nov 2020 00:34:48 +0100 Subject: entity: unique function to generate IDs --- entity/id.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'entity') diff --git a/entity/id.go b/entity/id.go index 08916987..9e724012 100644 --- a/entity/id.go +++ b/entity/id.go @@ -1,6 +1,7 @@ package entity import ( + "crypto/sha256" "fmt" "io" "strings" @@ -17,6 +18,15 @@ const UnsetId = Id("unset") // Id is an identifier for an entity or part of an entity type Id string +// DeriveId generate an Id from some data, taken from a root part of the entity. +func DeriveId(data []byte) Id { + // My understanding is that sha256 is enough to prevent collision (git use that, so ...?) + // If you read this code, I'd be happy to be schooled. + + sum := sha256.Sum256(data) + return Id(fmt.Sprintf("%x", sum)) +} + // String return the identifier as a string func (i Id) String() string { return string(i) @@ -55,6 +65,10 @@ func (i Id) MarshalGQL(w io.Writer) { // IsValid tell if the Id is valid func (i Id) Validate() error { + // Special case to + if len(i) == 40 { + return fmt.Errorf("outdated repository format, please use https://github.com/MichaelMure/git-bug-migration to upgrade") + } if len(i) != idLength { return fmt.Errorf("invalid length") } -- cgit From db7074301b6af895b1a47ecd12a5028ac809abfc Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Mon, 30 Nov 2020 01:55:30 +0100 Subject: entity: generalize the combined Ids, use 64 length --- entity/id.go | 2 +- entity/id_interleaved.go | 68 +++++++++++++++++++++++++++++++++++++++++++ entity/id_interleaved_test.go | 36 +++++++++++++++++++++++ 3 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 entity/id_interleaved.go create mode 100644 entity/id_interleaved_test.go (limited to 'entity') diff --git a/entity/id.go b/entity/id.go index 9e724012..b602452e 100644 --- a/entity/id.go +++ b/entity/id.go @@ -65,7 +65,7 @@ func (i Id) MarshalGQL(w io.Writer) { // IsValid tell if the Id is valid func (i Id) Validate() error { - // Special case to + // Special case to detect outdated repo if len(i) == 40 { return fmt.Errorf("outdated repository format, please use https://github.com/MichaelMure/git-bug-migration to upgrade") } diff --git a/entity/id_interleaved.go b/entity/id_interleaved.go new file mode 100644 index 00000000..5423afee --- /dev/null +++ b/entity/id_interleaved.go @@ -0,0 +1,68 @@ +package entity + +import ( + "strings" +) + +// CombineIds compute a merged Id holding information from both the primary Id +// and the secondary Id. +// +// This allow to later find efficiently a secondary element because we can access +// the primary one directly instead of searching for a primary that has a +// secondary matching the Id. +// +// An example usage is Comment in a Bug. The interleaved Id will hold part of the +// Bug Id and part of the Comment Id. +// +// To allow the use of an arbitrary length prefix of this Id, Ids from primary +// and secondary are interleaved with this irregular pattern to give the +// best chance to find the secondary even with a 7 character prefix. +// +// Format is: PSPSPSPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPPSPPPP +// +// A complete interleaved Id hold 50 characters for the primary and 14 for the +// secondary, which give a key space of 36^50 for the primary (~6 * 10^77) and +// 36^14 for the secondary (~6 * 10^21). This asymmetry assume a reasonable number +// of secondary within a primary Entity, while still allowing for a vast key space +// for the primary (that is, a globally merged database) with a low risk of collision. +// +// Here is the breakdown of several common prefix length: +// +// 5: 3P, 2S +// 7: 4P, 3S +// 10: 6P, 4S +// 16: 11P, 5S +func CombineIds(primary Id, secondary Id) Id { + var id strings.Builder + + for i := 0; i < idLength; i++ { + switch { + default: + id.WriteByte(primary[0]) + primary = primary[1:] + case i == 1, i == 3, i == 5, i == 9, i >= 10 && i%5 == 4: + id.WriteByte(secondary[0]) + secondary = secondary[1:] + } + } + + return Id(id.String()) +} + +// SeparateIds extract primary and secondary prefix from an arbitrary length prefix +// of an Id created with CombineIds. +func SeparateIds(prefix string) (primaryPrefix string, secondaryPrefix string) { + var primary strings.Builder + var secondary strings.Builder + + for i, r := range prefix { + switch { + default: + primary.WriteRune(r) + case i == 1, i == 3, i == 5, i == 9, i >= 10 && i%5 == 4: + secondary.WriteRune(r) + } + } + + return primary.String(), secondary.String() +} diff --git a/entity/id_interleaved_test.go b/entity/id_interleaved_test.go new file mode 100644 index 00000000..ef9218c9 --- /dev/null +++ b/entity/id_interleaved_test.go @@ -0,0 +1,36 @@ +package entity + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestInterleaved(t *testing.T) { + primary := Id("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWX______________") + secondary := Id("YZ0123456789+/________________________________________________") + expectedId := Id("aYbZc0def1ghij2klmn3opqr4stuv5wxyz6ABCD7EFGH8IJKL9MNOP+QRST/UVWX") + + interleaved := CombineIds(primary, secondary) + require.Equal(t, expectedId, interleaved) + + // full length + splitPrimary, splitSecondary := SeparateIds(interleaved.String()) + require.Equal(t, string(primary[:50]), splitPrimary) + require.Equal(t, string(secondary[:14]), splitSecondary) + + // partial + splitPrimary, splitSecondary = SeparateIds(string(expectedId[:7])) + require.Equal(t, string(primary[:4]), splitPrimary) + require.Equal(t, string(secondary[:3]), splitSecondary) + + // partial + splitPrimary, splitSecondary = SeparateIds(string(expectedId[:10])) + require.Equal(t, string(primary[:6]), splitPrimary) + require.Equal(t, string(secondary[:4]), splitSecondary) + + // partial + splitPrimary, splitSecondary = SeparateIds(string(expectedId[:16])) + require.Equal(t, string(primary[:11]), splitPrimary) + require.Equal(t, string(secondary[:5]), splitSecondary) +} -- cgit From 5f6a39145d9ac109d430190d0d352544d27b6561 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Mon, 21 Dec 2020 11:04:17 +0100 Subject: entity: add error to signal invalid format --- entity/err.go | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'entity') diff --git a/entity/err.go b/entity/err.go index 90304d03..9222e4da 100644 --- a/entity/err.go +++ b/entity/err.go @@ -31,28 +31,31 @@ func IsErrMultipleMatch(err error) bool { return ok } -// ErrOldFormatVersion indicate that the read data has a too old format. -type ErrOldFormatVersion struct { - formatVersion uint +type ErrInvalidFormat struct { + version uint + expected uint } -func NewErrOldFormatVersion(formatVersion uint) *ErrOldFormatVersion { - return &ErrOldFormatVersion{formatVersion: formatVersion} -} - -func (e ErrOldFormatVersion) Error() string { - return fmt.Sprintf("outdated repository format %v, please use https://github.com/MichaelMure/git-bug-migration to upgrade", e.formatVersion) -} - -// ErrNewFormatVersion indicate that the read data is too new for this software. -type ErrNewFormatVersion struct { - formatVersion uint +func NewErrInvalidFormat(version uint, expected uint) *ErrInvalidFormat { + return &ErrInvalidFormat{ + version: version, + expected: expected, + } } -func NewErrNewFormatVersion(formatVersion uint) *ErrNewFormatVersion { - return &ErrNewFormatVersion{formatVersion: formatVersion} +func NewErrUnknowFormat(expected uint) *ErrInvalidFormat { + return &ErrInvalidFormat{ + version: 0, + expected: expected, + } } -func (e ErrNewFormatVersion) Error() string { - return fmt.Sprintf("your version of git-bug is too old for this repository (version %v), please upgrade to the latest version", e.formatVersion) +func (e ErrInvalidFormat) Error() string { + if e.version == 0 { + return fmt.Sprintf("unreadable data, expected format version %v", e.expected) + } + if e.version < e.expected { + return fmt.Sprintf("outdated repository format %v, please use https://github.com/MichaelMure/git-bug-migration to upgrade to format version %v", e.version, e.expected) + } + return fmt.Sprintf("your version of git-bug is too old for this repository (format version %v, expected %v), please upgrade to the latest version", e.version, e.expected) } -- cgit From 9cca74cc334df94e37f3f3c76437da9a61e53bf2 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Mon, 21 Dec 2020 11:10:43 +0100 Subject: entity: add embryo of a generic, DAG-enabled entity --- entity/entity.go | 181 +++++++++++++++++++++++++++++++++++++++++++++++ entity/entity_actions.go | 27 +++++++ entity/operation_pack.go | 125 ++++++++++++++++++++++++++++++++ 3 files changed, 333 insertions(+) create mode 100644 entity/entity.go create mode 100644 entity/entity_actions.go create mode 100644 entity/operation_pack.go (limited to 'entity') diff --git a/entity/entity.go b/entity/entity.go new file mode 100644 index 00000000..c12dc2c4 --- /dev/null +++ b/entity/entity.go @@ -0,0 +1,181 @@ +package entity + +import ( + "encoding/json" + "fmt" + "sort" + + "github.com/pkg/errors" + + "github.com/MichaelMure/git-bug/repository" +) + +type Operation interface { + // Id() Id + // MarshalJSON() ([]byte, error) + Validate() error + + base() *OpBase +} + +type OperationIterator struct { +} + +type Definition struct { + namespace string + operationUnmarshaler func(raw json.RawMessage) (Operation, error) + formatVersion uint +} + +type Entity struct { + Definition + + ops []Operation +} + +func New(definition Definition) *Entity { + return &Entity{ + Definition: definition, + } +} + +func Read(def Definition, repo repository.ClockedRepo, id Id) (*Entity, error) { + if err := id.Validate(); err != nil { + return nil, errors.Wrap(err, "invalid id") + } + + ref := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) + + rootHash, err := repo.ResolveRef(ref) + if err != nil { + return nil, err + } + + // Perform a depth-first search to get a topological order of the DAG where we discover the + // parents commit and go back in time up to the chronological root + + stack := make([]repository.Hash, 0, 32) + visited := make(map[repository.Hash]struct{}) + DFSOrder := make([]repository.Commit, 0, 32) + + stack = append(stack, rootHash) + + for len(stack) > 0 { + // pop + hash := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + if _, ok := visited[hash]; ok { + continue + } + + // mark as visited + visited[hash] = struct{}{} + + commit, err := repo.ReadCommit(hash) + if err != nil { + return nil, err + } + + DFSOrder = append(DFSOrder, commit) + + for _, parent := range commit.Parents { + stack = append(stack, parent) + } + } + + // Now, we can reverse this topological order and read the commits in an order where + // we are sure to have read all the chronological ancestors when we read a commit. + + // Next step is to: + // 1) read the operationPacks + // 2) make sure that the clocks causality respect the DAG topology. + + oppMap := make(map[repository.Hash]*operationPack) + var opsCount int + + rootCommit := DFSOrder[len(DFSOrder)-1] + rootOpp, err := readOperationPack(def, repo, rootCommit.TreeHash) + if err != nil { + return nil, err + } + oppMap[rootCommit.Hash] = rootOpp + + for i := len(DFSOrder) - 2; i >= 0; i-- { + commit := DFSOrder[i] + + // Verify DAG structure: single chronological root + if len(commit.Parents) == 0 { + return nil, fmt.Errorf("multiple root in the entity DAG") + } + + opp, err := readOperationPack(def, repo, commit.TreeHash) + if err != nil { + return nil, err + } + + // make sure that the lamport clocks causality match the DAG topology + for _, parentHash := range commit.Parents { + parentPack, ok := oppMap[parentHash] + if !ok { + panic("DFS failed") + } + + if parentPack.EditTime >= opp.EditTime { + return nil, fmt.Errorf("lamport clock ordering doesn't match the DAG") + } + + // to avoid an attack where clocks are pushed toward the uint64 rollover, make sure + // that the clocks don't jump too far in the future + if opp.EditTime-parentPack.EditTime > 10_000 { + return nil, fmt.Errorf("lamport clock jumping too far in the future, likely an attack") + } + } + + oppMap[commit.Hash] = opp + opsCount += len(opp.Operations) + } + + // Now that we know that the topological order and clocks are fine, we order the operationPacks + // based on the logical clocks, entirely ignoring the DAG topology + + oppSlice := make([]*operationPack, 0, len(oppMap)) + for _, pack := range oppMap { + oppSlice = append(oppSlice, pack) + } + sort.Slice(oppSlice, func(i, j int) bool { + // TODO: no secondary ordering? + return oppSlice[i].EditTime < oppSlice[i].EditTime + }) + + // Now that we ordered the operationPacks, we have the order of the Operations + + ops := make([]Operation, 0, opsCount) + for _, pack := range oppSlice { + for _, operation := range pack.Operations { + ops = append(ops, operation) + } + } + + return &Entity{ + Definition: def, + ops: ops, + }, nil +} + +func Remove() error { + panic("") +} + +func (e *Entity) Id() { + +} + +// return the ordered operations +func (e *Entity) Operations() []Operation { + return e.ops +} + +func (e *Entity) Commit() error { + panic("") +} diff --git a/entity/entity_actions.go b/entity/entity_actions.go new file mode 100644 index 00000000..02e76487 --- /dev/null +++ b/entity/entity_actions.go @@ -0,0 +1,27 @@ +package entity + +import ( + "fmt" + + "github.com/MichaelMure/git-bug/repository" +) + +func ListLocalIds(typename string, repo repository.RepoData) ([]Id, error) { + refs, err := repo.ListRefs(fmt.Sprintf("refs/%s/", typename)) + if err != nil { + return nil, err + } + return RefsToIds(refs), nil +} + +func Fetch() { + +} + +func Pull() { + +} + +func Push() { + +} diff --git a/entity/operation_pack.go b/entity/operation_pack.go new file mode 100644 index 00000000..2377167f --- /dev/null +++ b/entity/operation_pack.go @@ -0,0 +1,125 @@ +package entity + +import ( + "encoding/json" + "strconv" + "strings" + + "github.com/pkg/errors" + + "github.com/MichaelMure/git-bug/repository" + "github.com/MichaelMure/git-bug/util/lamport" +) + +const opsEntryName = "ops" +const versionEntryPrefix = "version-" +const createClockEntryPrefix = "create-clock-" +const editClockEntryPrefix = "edit-clock-" + +type operationPack struct { + Operations []Operation + CreateTime lamport.Time + EditTime lamport.Time +} + +// func (opp *operationPack) MarshalJSON() ([]byte, error) { +// return json.Marshal(struct { +// Operations []Operation `json:"ops"` +// }{ +// Operations: opp.Operations, +// }) +// } + +func readOperationPack(def Definition, repo repository.RepoData, treeHash repository.Hash) (*operationPack, error) { + entries, err := repo.ReadTree(treeHash) + if err != nil { + return nil, err + } + + // check the format version first, fail early instead of trying to read something + // var version uint + // for _, entry := range entries { + // if strings.HasPrefix(entry.Name, versionEntryPrefix) { + // v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, versionEntryPrefix), 10, 64) + // if err != nil { + // return nil, errors.Wrap(err, "can't read format version") + // } + // if v > 1<<12 { + // return nil, fmt.Errorf("format version too big") + // } + // version = uint(v) + // break + // } + // } + // if version == 0 { + // return nil, NewErrUnknowFormat(def.formatVersion) + // } + // if version != def.formatVersion { + // return nil, NewErrInvalidFormat(version, def.formatVersion) + // } + + var ops []Operation + var createTime lamport.Time + var editTime lamport.Time + + for _, entry := range entries { + if entry.Name == opsEntryName { + data, err := repo.ReadData(entry.Hash) + if err != nil { + return nil, errors.Wrap(err, "failed to read git blob data") + } + + ops, err = unmarshallOperations(def, data) + if err != nil { + return nil, err + } + break + } + + if strings.HasPrefix(entry.Name, createClockEntryPrefix) { + v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, createClockEntryPrefix), 10, 64) + if err != nil { + return nil, errors.Wrap(err, "can't read creation lamport time") + } + createTime = lamport.Time(v) + } + + if strings.HasPrefix(entry.Name, editClockEntryPrefix) { + v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, editClockEntryPrefix), 10, 64) + if err != nil { + return nil, errors.Wrap(err, "can't read edit lamport time") + } + editTime = lamport.Time(v) + } + } + + return &operationPack{ + Operations: ops, + CreateTime: createTime, + EditTime: editTime, + }, nil +} + +func unmarshallOperations(def Definition, data []byte) ([]Operation, error) { + aux := struct { + Operations []json.RawMessage `json:"ops"` + }{} + + if err := json.Unmarshal(data, &aux); err != nil { + return nil, err + } + + ops := make([]Operation, 0, len(aux.Operations)) + + for _, raw := range aux.Operations { + // delegate to specialized unmarshal function + op, err := def.operationUnmarshaler(raw) + if err != nil { + return nil, err + } + + ops = append(ops, op) + } + + return ops, nil +} -- cgit From 51ece149089f9075d3d6ba1bb09fda726efde8ad Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Mon, 21 Dec 2020 18:42:04 +0100 Subject: entity: clocks and write --- entity/entity.go | 196 +++++++++++++++++++++++++++++++++++++++++------ entity/entity_actions.go | 4 + entity/entity_test.go | 107 ++++++++++++++++++++++++++ entity/operation_pack.go | 132 ++++++++++++++++++++++++------- 4 files changed, 387 insertions(+), 52 deletions(-) create mode 100644 entity/entity_test.go (limited to 'entity') diff --git a/entity/entity.go b/entity/entity.go index c12dc2c4..9ba536d6 100644 --- a/entity/entity.go +++ b/entity/entity.go @@ -8,34 +8,47 @@ import ( "github.com/pkg/errors" "github.com/MichaelMure/git-bug/repository" + "github.com/MichaelMure/git-bug/util/lamport" ) +const refsPattern = "refs/%s/%s" +const creationClockPattern = "%s-create" +const editClockPattern = "%s-edit" + type Operation interface { - // Id() Id + Id() Id // MarshalJSON() ([]byte, error) Validate() error - - base() *OpBase } type OperationIterator struct { } type Definition struct { - namespace string + // the name of the entity (bug, pull-request, ...) + typename string + // the namespace in git (bugs, prs, ...) + namespace string + // a function decoding a JSON message into an Operation operationUnmarshaler func(raw json.RawMessage) (Operation, error) - formatVersion uint + // the expected format version number + formatVersion uint } type Entity struct { Definition - ops []Operation + ops []Operation + staging []Operation + + packClock lamport.Clock + lastCommit repository.Hash } func New(definition Definition) *Entity { return &Entity{ Definition: definition, + packClock: lamport.NewMemClock(), } } @@ -93,19 +106,15 @@ func Read(def Definition, repo repository.ClockedRepo, id Id) (*Entity, error) { oppMap := make(map[repository.Hash]*operationPack) var opsCount int + var packClock = lamport.NewMemClock() - rootCommit := DFSOrder[len(DFSOrder)-1] - rootOpp, err := readOperationPack(def, repo, rootCommit.TreeHash) - if err != nil { - return nil, err - } - oppMap[rootCommit.Hash] = rootOpp - - for i := len(DFSOrder) - 2; i >= 0; i-- { + for i := len(DFSOrder) - 1; i >= 0; i-- { commit := DFSOrder[i] + firstCommit := i == len(DFSOrder)-1 - // Verify DAG structure: single chronological root - if len(commit.Parents) == 0 { + // Verify DAG structure: single chronological root, so only the root + // can have no parents + if !firstCommit && len(commit.Parents) == 0 { return nil, fmt.Errorf("multiple root in the entity DAG") } @@ -114,6 +123,17 @@ func Read(def Definition, repo repository.ClockedRepo, id Id) (*Entity, error) { return nil, err } + // Check that the lamport clocks are set + if firstCommit && opp.CreateTime <= 0 { + return nil, fmt.Errorf("creation lamport time not set") + } + if opp.EditTime <= 0 { + return nil, fmt.Errorf("edition lamport time not set") + } + if opp.PackTime <= 0 { + return nil, fmt.Errorf("pack lamport time not set") + } + // make sure that the lamport clocks causality match the DAG topology for _, parentHash := range commit.Parents { parentPack, ok := oppMap[parentHash] @@ -136,6 +156,22 @@ func Read(def Definition, repo repository.ClockedRepo, id Id) (*Entity, error) { opsCount += len(opp.Operations) } + // The clocks are fine, we witness them + for _, opp := range oppMap { + err = repo.Witness(fmt.Sprintf(creationClockPattern, def.namespace), opp.CreateTime) + if err != nil { + return nil, err + } + err = repo.Witness(fmt.Sprintf(editClockPattern, def.namespace), opp.EditTime) + if err != nil { + return nil, err + } + err = packClock.Witness(opp.PackTime) + if err != nil { + return nil, err + } + } + // Now that we know that the topological order and clocks are fine, we order the operationPacks // based on the logical clocks, entirely ignoring the DAG topology @@ -145,7 +181,8 @@ func Read(def Definition, repo repository.ClockedRepo, id Id) (*Entity, error) { } sort.Slice(oppSlice, func(i, j int) bool { // TODO: no secondary ordering? - return oppSlice[i].EditTime < oppSlice[i].EditTime + // might be useful for stable ordering + return oppSlice[i].PackTime < oppSlice[i].PackTime }) // Now that we ordered the operationPacks, we have the order of the Operations @@ -160,22 +197,135 @@ func Read(def Definition, repo repository.ClockedRepo, id Id) (*Entity, error) { return &Entity{ Definition: def, ops: ops, + lastCommit: rootHash, }, nil } -func Remove() error { - panic("") +// Id return the Entity identifier +func (e *Entity) Id() Id { + // id is the id of the first operation + return e.FirstOp().Id() } -func (e *Entity) Id() { +func (e *Entity) Validate() error { + // non-empty + if len(e.ops) == 0 && len(e.staging) == 0 { + return fmt.Errorf("entity has no operations") + } + + // check if each operations are valid + for _, op := range e.ops { + if err := op.Validate(); err != nil { + return err + } + } + + // check if staging is valid if needed + for _, op := range e.staging { + if err := op.Validate(); err != nil { + return err + } + } + + // Check that there is no colliding operation's ID + ids := make(map[Id]struct{}) + for _, op := range e.Operations() { + if _, ok := ids[op.Id()]; ok { + return fmt.Errorf("id collision: %s", op.Id()) + } + ids[op.Id()] = struct{}{} + } + return nil } // return the ordered operations func (e *Entity) Operations() []Operation { - return e.ops + return append(e.ops, e.staging...) } -func (e *Entity) Commit() error { - panic("") +// Lookup for the very first operation of the Entity. +func (e *Entity) FirstOp() Operation { + for _, op := range e.ops { + return op + } + for _, op := range e.staging { + return op + } + return nil +} + +func (e *Entity) Append(op Operation) { + e.staging = append(e.staging, op) +} + +func (e *Entity) NeedCommit() bool { + return len(e.staging) > 0 +} + +func (e *Entity) CommitAdNeeded(repo repository.ClockedRepo) error { + if e.NeedCommit() { + return e.Commit(repo) + } + return nil +} + +func (e *Entity) Commit(repo repository.ClockedRepo) error { + if !e.NeedCommit() { + return fmt.Errorf("can't commit an entity with no pending operation") + } + + if err := e.Validate(); err != nil { + return errors.Wrapf(err, "can't commit a %s with invalid data", e.Definition.typename) + } + + // increment the various clocks for this new operationPack + packTime, err := e.packClock.Increment() + if err != nil { + return err + } + editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) + if err != nil { + return err + } + var creationTime lamport.Time + if e.lastCommit == "" { + creationTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.namespace)) + if err != nil { + return err + } + } + + opp := &operationPack{ + Operations: e.staging, + CreateTime: creationTime, + EditTime: editTime, + PackTime: packTime, + } + + treeHash, err := opp.write(e.Definition, repo) + if err != nil { + return err + } + + // Write a Git commit referencing the tree, with the previous commit as parent + var commitHash repository.Hash + if e.lastCommit != "" { + commitHash, err = repo.StoreCommitWithParent(treeHash, e.lastCommit) + } else { + commitHash, err = repo.StoreCommit(treeHash) + } + if err != nil { + return err + } + + e.lastCommit = commitHash + e.ops = append(e.ops, e.staging...) + e.staging = nil + + // Create or update the Git reference for this entity + // When pushing later, the remote will ensure that this ref update + // is fast-forward, that is no data has been overwritten. + ref := fmt.Sprintf(refsPattern, e.namespace, e.Id().String()) + return repo.UpdateRef(ref, commitHash) } diff --git a/entity/entity_actions.go b/entity/entity_actions.go index 02e76487..34e76a62 100644 --- a/entity/entity_actions.go +++ b/entity/entity_actions.go @@ -25,3 +25,7 @@ func Pull() { func Push() { } + +func Remove() error { + panic("") +} diff --git a/entity/entity_test.go b/entity/entity_test.go new file mode 100644 index 00000000..92a53179 --- /dev/null +++ b/entity/entity_test.go @@ -0,0 +1,107 @@ +package entity + +import ( + "encoding/json" + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/MichaelMure/git-bug/repository" +) + +// func TestFoo(t *testing.T) { +// repo, err := repository.OpenGoGitRepo("~/dev/git-bug", nil) +// require.NoError(t, err) +// +// b, err := ReadBug(repo, Id("8b22e548c93a6ed23c31fd4e337c6286c3d1e5c9cae5537bc8e5842e11bd1099")) +// require.NoError(t, err) +// +// fmt.Println(b) +// } + +type op1 struct { + OperationType int `json:"type"` + Field1 string `json:"field_1"` +} + +func newOp1(field1 string) *op1 { + return &op1{OperationType: 1, Field1: field1} +} + +func (o op1) Id() Id { + data, _ := json.Marshal(o) + return DeriveId(data) +} + +func (o op1) Validate() error { return nil } + +type op2 struct { + OperationType int `json:"type"` + Field2 string `json:"field_2"` +} + +func newOp2(field2 string) *op2 { + return &op2{OperationType: 2, Field2: field2} +} + +func (o op2) Id() Id { + data, _ := json.Marshal(o) + return DeriveId(data) +} + +func (o op2) Validate() error { return nil } + +var def = Definition{ + typename: "foo", + namespace: "foos", + operationUnmarshaler: unmarshaller, + formatVersion: 1, +} + +func unmarshaller(raw json.RawMessage) (Operation, error) { + var t struct { + OperationType int `json:"type"` + } + + if err := json.Unmarshal(raw, &t); err != nil { + return nil, err + } + + switch t.OperationType { + case 1: + op := &op1{} + err := json.Unmarshal(raw, &op) + return op, err + case 2: + op := &op2{} + err := json.Unmarshal(raw, &op) + return op, err + default: + return nil, fmt.Errorf("unknown operation type %v", t.OperationType) + } +} + +func TestWriteRead(t *testing.T) { + repo := repository.NewMockRepo() + + entity := New(def) + require.False(t, entity.NeedCommit()) + + entity.Append(newOp1("foo")) + entity.Append(newOp2("bar")) + + require.True(t, entity.NeedCommit()) + require.NoError(t, entity.CommitAdNeeded(repo)) + require.False(t, entity.NeedCommit()) + + entity.Append(newOp2("foobar")) + require.True(t, entity.NeedCommit()) + require.NoError(t, entity.CommitAdNeeded(repo)) + require.False(t, entity.NeedCommit()) + + read, err := Read(def, repo, entity.Id()) + require.NoError(t, err) + + fmt.Println(*read) +} diff --git a/entity/operation_pack.go b/entity/operation_pack.go index 2377167f..0a16dd61 100644 --- a/entity/operation_pack.go +++ b/entity/operation_pack.go @@ -2,6 +2,7 @@ package entity import ( "encoding/json" + "fmt" "strconv" "strings" @@ -11,25 +12,82 @@ import ( "github.com/MichaelMure/git-bug/util/lamport" ) +// TODO: extra data tree +const extraEntryName = "extra" + const opsEntryName = "ops" const versionEntryPrefix = "version-" const createClockEntryPrefix = "create-clock-" const editClockEntryPrefix = "edit-clock-" +const packClockEntryPrefix = "pack-clock-" type operationPack struct { Operations []Operation + // Encode the entity's logical time of creation across all entities of the same type. + // Only exist on the root operationPack CreateTime lamport.Time - EditTime lamport.Time + // Encode the entity's logical time of last edition across all entities of the same type. + // Exist on all operationPack + EditTime lamport.Time + // Encode the operationPack's logical time of creation withing this entity. + // Exist on all operationPack + PackTime lamport.Time } -// func (opp *operationPack) MarshalJSON() ([]byte, error) { -// return json.Marshal(struct { -// Operations []Operation `json:"ops"` -// }{ -// Operations: opp.Operations, -// }) -// } +func (opp operationPack) write(def Definition, repo repository.RepoData) (repository.Hash, error) { + // For different reason, we store the clocks and format version directly in the git tree. + // Version has to be accessible before any attempt to decode to return early with a unique error. + // Clocks could possibly be stored in the git blob but it's nice to separate data and metadata, and + // we are storing something directly in the tree already so why not. + // + // To have a valid Tree, we point the "fake" entries to always the same value, the empty blob. + emptyBlobHash, err := repo.StoreData([]byte{}) + if err != nil { + return "", err + } + + // Write the Ops as a Git blob containing the serialized array + data, err := json.Marshal(struct { + Operations []Operation `json:"ops"` + }{ + Operations: opp.Operations, + }) + if err != nil { + return "", err + } + hash, err := repo.StoreData(data) + if err != nil { + return "", err + } + + // Make a Git tree referencing this blob and encoding the other values: + // - format version + // - clocks + tree := []repository.TreeEntry{ + {ObjectType: repository.Blob, Hash: emptyBlobHash, + Name: fmt.Sprintf(versionEntryPrefix+"%d", def.formatVersion)}, + {ObjectType: repository.Blob, Hash: hash, + Name: opsEntryName}, + {ObjectType: repository.Blob, Hash: emptyBlobHash, + Name: fmt.Sprintf(editClockEntryPrefix+"%d", opp.EditTime)}, + {ObjectType: repository.Blob, Hash: emptyBlobHash, + Name: fmt.Sprintf(packClockEntryPrefix+"%d", opp.PackTime)}, + } + if opp.CreateTime > 0 { + tree = append(tree, repository.TreeEntry{ + ObjectType: repository.Blob, + Hash: emptyBlobHash, + Name: fmt.Sprintf(createClockEntryPrefix+"%d", opp.CreateTime), + }) + } + // Store the tree + return repo.StoreTree(tree) +} + +// readOperationPack read the operationPack encoded in git at the given Tree hash. +// +// Validity of the Lamport clocks is left for the caller to decide. func readOperationPack(def Definition, repo repository.RepoData, treeHash repository.Hash) (*operationPack, error) { entries, err := repo.ReadTree(treeHash) if err != nil { @@ -37,30 +95,31 @@ func readOperationPack(def Definition, repo repository.RepoData, treeHash reposi } // check the format version first, fail early instead of trying to read something - // var version uint - // for _, entry := range entries { - // if strings.HasPrefix(entry.Name, versionEntryPrefix) { - // v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, versionEntryPrefix), 10, 64) - // if err != nil { - // return nil, errors.Wrap(err, "can't read format version") - // } - // if v > 1<<12 { - // return nil, fmt.Errorf("format version too big") - // } - // version = uint(v) - // break - // } - // } - // if version == 0 { - // return nil, NewErrUnknowFormat(def.formatVersion) - // } - // if version != def.formatVersion { - // return nil, NewErrInvalidFormat(version, def.formatVersion) - // } + var version uint + for _, entry := range entries { + if strings.HasPrefix(entry.Name, versionEntryPrefix) { + v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, versionEntryPrefix), 10, 64) + if err != nil { + return nil, errors.Wrap(err, "can't read format version") + } + if v > 1<<12 { + return nil, fmt.Errorf("format version too big") + } + version = uint(v) + break + } + } + if version == 0 { + return nil, NewErrUnknowFormat(def.formatVersion) + } + if version != def.formatVersion { + return nil, NewErrInvalidFormat(version, def.formatVersion) + } var ops []Operation var createTime lamport.Time var editTime lamport.Time + var packTime lamport.Time for _, entry := range entries { if entry.Name == opsEntryName { @@ -73,7 +132,7 @@ func readOperationPack(def Definition, repo repository.RepoData, treeHash reposi if err != nil { return nil, err } - break + continue } if strings.HasPrefix(entry.Name, createClockEntryPrefix) { @@ -82,6 +141,7 @@ func readOperationPack(def Definition, repo repository.RepoData, treeHash reposi return nil, errors.Wrap(err, "can't read creation lamport time") } createTime = lamport.Time(v) + continue } if strings.HasPrefix(entry.Name, editClockEntryPrefix) { @@ -90,6 +150,16 @@ func readOperationPack(def Definition, repo repository.RepoData, treeHash reposi return nil, errors.Wrap(err, "can't read edit lamport time") } editTime = lamport.Time(v) + continue + } + + if strings.HasPrefix(entry.Name, packClockEntryPrefix) { + v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, packClockEntryPrefix), 10, 64) + if err != nil { + return nil, errors.Wrap(err, "can't read pack lamport time") + } + packTime = lamport.Time(v) + continue } } @@ -97,9 +167,13 @@ func readOperationPack(def Definition, repo repository.RepoData, treeHash reposi Operations: ops, CreateTime: createTime, EditTime: editTime, + PackTime: packTime, }, nil } +// unmarshallOperations delegate the unmarshalling of the Operation's JSON to the decoding +// function provided by the concrete entity. This gives access to the concrete type of each +// Operation. func unmarshallOperations(def Definition, data []byte) ([]Operation, error) { aux := struct { Operations []json.RawMessage `json:"ops"` -- cgit From 4ef92efeb905102d37b81fafa0ac2173594ef30a Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Fri, 25 Dec 2020 11:38:01 +0100 Subject: entity: total ordering of operations --- entity/entity.go | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'entity') diff --git a/entity/entity.go b/entity/entity.go index 9ba536d6..a1e8e57e 100644 --- a/entity/entity.go +++ b/entity/entity.go @@ -21,9 +21,7 @@ type Operation interface { Validate() error } -type OperationIterator struct { -} - +// Definition hold the details defining one specialization of an Entity. type Definition struct { // the name of the entity (bug, pull-request, ...) typename string @@ -59,6 +57,11 @@ func Read(def Definition, repo repository.ClockedRepo, id Id) (*Entity, error) { ref := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) + return read(def, repo, ref) +} + +// read fetch from git and decode an Entity at an arbitrary git reference. +func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, error) { rootHash, err := repo.ResolveRef(ref) if err != nil { return nil, err @@ -180,9 +183,22 @@ func Read(def Definition, repo repository.ClockedRepo, id Id) (*Entity, error) { oppSlice = append(oppSlice, pack) } sort.Slice(oppSlice, func(i, j int) bool { - // TODO: no secondary ordering? - // might be useful for stable ordering - return oppSlice[i].PackTime < oppSlice[i].PackTime + // Primary ordering with the dedicated "pack" Lamport time that encode causality + // within the entity + if oppSlice[i].PackTime != oppSlice[j].PackTime { + return oppSlice[i].PackTime < oppSlice[i].PackTime + } + // We have equal PackTime, which means we had a concurrent edition. We can't tell which exactly + // came first. As a secondary arbitrary ordering, we can use the EditTime. It's unlikely to be + // enough but it can give us an edge to approach what really happened. + if oppSlice[i].EditTime != oppSlice[j].EditTime { + return oppSlice[i].EditTime < oppSlice[j].EditTime + } + // Well, what now? We still need a total ordering, the most stable possible. + // As a last resort, we can order based on a hash of the serialized Operations in the + // operationPack. It doesn't carry much meaning but it's unbiased and hard to abuse. + // This is a lexicographic ordering. + return oppSlice[i].Id < oppSlice[j].Id }) // Now that we ordered the operationPacks, we have the order of the Operations @@ -270,6 +286,7 @@ func (e *Entity) CommitAdNeeded(repo repository.ClockedRepo) error { return nil } +// TODO: support commit signature func (e *Entity) Commit(repo repository.ClockedRepo) error { if !e.NeedCommit() { return fmt.Errorf("can't commit an entity with no pending operation") -- cgit From 8d63c983c982f93cc48d3996d6bd097ddeeb327f Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 3 Jan 2021 23:59:25 +0100 Subject: WIP --- entity/TODO | 8 + entity/dag/common_test.go | 137 ++++++++++++++ entity/dag/entity.go | 389 ++++++++++++++++++++++++++++++++++++++ entity/dag/entity_actions.go | 227 ++++++++++++++++++++++ entity/dag/entity_test.go | 117 ++++++++++++ entity/dag/operation.go | 31 +++ entity/dag/operation_pack.go | 294 ++++++++++++++++++++++++++++ entity/dag/operation_pack_test.go | 44 +++++ entity/doc.go | 8 - entity/entity.go | 348 ---------------------------------- entity/entity_actions.go | 31 --- entity/entity_test.go | 107 ----------- entity/merge.go | 14 +- entity/operation_pack.go | 199 ------------------- entity/refs.go | 2 + 15 files changed, 1257 insertions(+), 699 deletions(-) create mode 100644 entity/TODO create mode 100644 entity/dag/common_test.go create mode 100644 entity/dag/entity.go create mode 100644 entity/dag/entity_actions.go create mode 100644 entity/dag/entity_test.go create mode 100644 entity/dag/operation.go create mode 100644 entity/dag/operation_pack.go create mode 100644 entity/dag/operation_pack_test.go delete mode 100644 entity/doc.go delete mode 100644 entity/entity.go delete mode 100644 entity/entity_actions.go delete mode 100644 entity/entity_test.go delete mode 100644 entity/operation_pack.go (limited to 'entity') diff --git a/entity/TODO b/entity/TODO new file mode 100644 index 00000000..fd3c9710 --- /dev/null +++ b/entity/TODO @@ -0,0 +1,8 @@ +- is the pack Lamport clock really useful vs only topological sort? + - topological order is enforced on the clocks, so what's the point? + - is EditTime equivalent to PackTime? no, avoid the gaps. Is it better? +- how to do commit signature? +- how to avoid id collision between Operations? +- write tests for actions +- migrate Bug to the new structure +- migrate Identity to the new structure? \ No newline at end of file diff --git a/entity/dag/common_test.go b/entity/dag/common_test.go new file mode 100644 index 00000000..29f1279e --- /dev/null +++ b/entity/dag/common_test.go @@ -0,0 +1,137 @@ +package dag + +import ( + "encoding/json" + "fmt" + + "github.com/MichaelMure/git-bug/entity" + "github.com/MichaelMure/git-bug/identity" + "github.com/MichaelMure/git-bug/repository" +) + +// This file contains an example dummy entity to be used in the tests + +/* + Operations +*/ + +type op1 struct { + author identity.Interface + + OperationType int `json:"type"` + Field1 string `json:"field_1"` +} + +func newOp1(author identity.Interface, field1 string) *op1 { + return &op1{author: author, OperationType: 1, Field1: field1} +} + +func (o op1) Id() entity.Id { + data, _ := json.Marshal(o) + return entity.DeriveId(data) +} + +func (o op1) Author() identity.Interface { + return o.author +} + +func (o op1) Validate() error { return nil } + +type op2 struct { + author identity.Interface + + OperationType int `json:"type"` + Field2 string `json:"field_2"` +} + +func newOp2(author identity.Interface, field2 string) *op2 { + return &op2{author: author, OperationType: 2, Field2: field2} +} + +func (o op2) Id() entity.Id { + data, _ := json.Marshal(o) + return entity.DeriveId(data) +} + +func (o op2) Author() identity.Interface { + return o.author +} + +func (o op2) Validate() error { return nil } + +func unmarshaler(author identity.Interface, raw json.RawMessage) (Operation, error) { + var t struct { + OperationType int `json:"type"` + } + + if err := json.Unmarshal(raw, &t); err != nil { + return nil, err + } + + switch t.OperationType { + case 1: + op := &op1{} + err := json.Unmarshal(raw, &op) + op.author = author + return op, err + case 2: + op := &op2{} + err := json.Unmarshal(raw, &op) + op.author = author + return op, err + default: + return nil, fmt.Errorf("unknown operation type %v", t.OperationType) + } +} + +/* + Identities + repo + definition +*/ + +func makeTestContext() (repository.ClockedRepo, identity.Interface, identity.Interface, Definition) { + repo := repository.NewMockRepo() + + id1, err := identity.NewIdentity(repo, "name1", "email1") + if err != nil { + panic(err) + } + err = id1.Commit(repo) + if err != nil { + panic(err) + } + id2, err := identity.NewIdentity(repo, "name2", "email2") + if err != nil { + panic(err) + } + err = id2.Commit(repo) + if err != nil { + panic(err) + } + + resolver := identityResolverFunc(func(id entity.Id) (identity.Interface, error) { + switch id { + case id1.Id(): + return id1, nil + case id2.Id(): + return id2, nil + default: + return nil, identity.ErrIdentityNotExist + } + }) + + def := Definition{ + typename: "foo", + namespace: "foos", + operationUnmarshaler: unmarshaler, + identityResolver: resolver, + formatVersion: 1, + } + + return repo, id1, id2, def +} + +type identityResolverFunc func(id entity.Id) (identity.Interface, error) + +func (fn identityResolverFunc) ResolveIdentity(id entity.Id) (identity.Interface, error) { + return fn(id) +} diff --git a/entity/dag/entity.go b/entity/dag/entity.go new file mode 100644 index 00000000..78347fa0 --- /dev/null +++ b/entity/dag/entity.go @@ -0,0 +1,389 @@ +// Package dag contains the base common code to define an entity stored +// in a chain of git objects, supporting actions like Push, Pull and Merge. +package dag + +import ( + "encoding/json" + "fmt" + "sort" + + "github.com/pkg/errors" + + "github.com/MichaelMure/git-bug/entity" + "github.com/MichaelMure/git-bug/identity" + "github.com/MichaelMure/git-bug/repository" + "github.com/MichaelMure/git-bug/util/lamport" +) + +const refsPattern = "refs/%s/%s" +const creationClockPattern = "%s-create" +const editClockPattern = "%s-edit" + +// Definition hold the details defining one specialization of an Entity. +type Definition struct { + // the name of the entity (bug, pull-request, ...) + typename string + // the namespace in git (bugs, prs, ...) + namespace string + // a function decoding a JSON message into an Operation + operationUnmarshaler func(author identity.Interface, raw json.RawMessage) (Operation, error) + // a function loading an identity.Identity from its Id + identityResolver identity.Resolver + // the expected format version number, that can be used for data migration/upgrade + formatVersion uint +} + +// Entity is a data structure stored in a chain of git objects, supporting actions like Push, Pull and Merge. +type Entity struct { + Definition + + // operations that are already stored in the repository + ops []Operation + // operations not yet stored in the repository + staging []Operation + + // TODO: add here createTime and editTime + + // // TODO: doesn't seems to actually be useful over the topological sort ? Timestamp can be generated from graph depth + // // TODO: maybe EditTime is better because it could spread ops in consecutive groups on the logical timeline --> avoid interleaving + // packClock lamport.Clock + lastCommit repository.Hash +} + +// New create an empty Entity +func New(definition Definition) *Entity { + return &Entity{ + Definition: definition, + // packClock: lamport.NewMemClock(), + } +} + +// Read will read and decode a stored Entity from a repository +func Read(def Definition, repo repository.ClockedRepo, id entity.Id) (*Entity, error) { + if err := id.Validate(); err != nil { + return nil, errors.Wrap(err, "invalid id") + } + + ref := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) + + return read(def, repo, ref) +} + +// read fetch from git and decode an Entity at an arbitrary git reference. +func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, error) { + rootHash, err := repo.ResolveRef(ref) + if err != nil { + return nil, err + } + + // Perform a depth-first search to get a topological order of the DAG where we discover the + // parents commit and go back in time up to the chronological root + + stack := make([]repository.Hash, 0, 32) + visited := make(map[repository.Hash]struct{}) + DFSOrder := make([]repository.Commit, 0, 32) + + stack = append(stack, rootHash) + + for len(stack) > 0 { + // pop + hash := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + if _, ok := visited[hash]; ok { + continue + } + + // mark as visited + visited[hash] = struct{}{} + + commit, err := repo.ReadCommit(hash) + if err != nil { + return nil, err + } + + DFSOrder = append(DFSOrder, commit) + + for _, parent := range commit.Parents { + stack = append(stack, parent) + } + } + + // Now, we can reverse this topological order and read the commits in an order where + // we are sure to have read all the chronological ancestors when we read a commit. + + // Next step is to: + // 1) read the operationPacks + // 2) make sure that the clocks causality respect the DAG topology. + + oppMap := make(map[repository.Hash]*operationPack) + var opsCount int + // var packClock = lamport.NewMemClock() + + for i := len(DFSOrder) - 1; i >= 0; i-- { + commit := DFSOrder[i] + isFirstCommit := i == len(DFSOrder)-1 + isMerge := len(commit.Parents) > 1 + + // Verify DAG structure: single chronological root, so only the root + // can have no parents. Said otherwise, the DAG need to have exactly + // one leaf. + if !isFirstCommit && len(commit.Parents) == 0 { + return nil, fmt.Errorf("multiple leafs in the entity DAG") + } + + opp, err := readOperationPack(def, repo, commit) + if err != nil { + return nil, err + } + + err = opp.Validate() + if err != nil { + return nil, err + } + + // Check that the create lamport clock is set (not checked in Validate() as it's optional) + if isFirstCommit && opp.CreateTime <= 0 { + return nil, fmt.Errorf("creation lamport time not set") + } + + // make sure that the lamport clocks causality match the DAG topology + for _, parentHash := range commit.Parents { + parentPack, ok := oppMap[parentHash] + if !ok { + panic("DFS failed") + } + + if parentPack.EditTime >= opp.EditTime { + return nil, fmt.Errorf("lamport clock ordering doesn't match the DAG") + } + + // to avoid an attack where clocks are pushed toward the uint64 rollover, make sure + // that the clocks don't jump too far in the future + // we ignore merge commits here to allow merging after a loooong time without breaking anything, + // as long as there is one valid chain of small hops, it's fine. + if !isMerge && opp.EditTime-parentPack.EditTime > 1_000_000 { + return nil, fmt.Errorf("lamport clock jumping too far in the future, likely an attack") + } + + // TODO: PackTime is not checked + } + + oppMap[commit.Hash] = opp + opsCount += len(opp.Operations) + } + + // The clocks are fine, we witness them + for _, opp := range oppMap { + err = repo.Witness(fmt.Sprintf(creationClockPattern, def.namespace), opp.CreateTime) + if err != nil { + return nil, err + } + err = repo.Witness(fmt.Sprintf(editClockPattern, def.namespace), opp.EditTime) + if err != nil { + return nil, err + } + // err = packClock.Witness(opp.PackTime) + // if err != nil { + // return nil, err + // } + } + + // Now that we know that the topological order and clocks are fine, we order the operationPacks + // based on the logical clocks, entirely ignoring the DAG topology + + oppSlice := make([]*operationPack, 0, len(oppMap)) + for _, pack := range oppMap { + oppSlice = append(oppSlice, pack) + } + sort.Slice(oppSlice, func(i, j int) bool { + // Primary ordering with the dedicated "pack" Lamport time that encode causality + // within the entity + // if oppSlice[i].PackTime != oppSlice[j].PackTime { + // return oppSlice[i].PackTime < oppSlice[i].PackTime + // } + // We have equal PackTime, which means we had a concurrent edition. We can't tell which exactly + // came first. As a secondary arbitrary ordering, we can use the EditTime. It's unlikely to be + // enough but it can give us an edge to approach what really happened. + if oppSlice[i].EditTime != oppSlice[j].EditTime { + return oppSlice[i].EditTime < oppSlice[j].EditTime + } + // Well, what now? We still need a total ordering and the most stable possible. + // As a last resort, we can order based on a hash of the serialized Operations in the + // operationPack. It doesn't carry much meaning but it's unbiased and hard to abuse. + // This is a lexicographic ordering on the stringified ID. + return oppSlice[i].Id() < oppSlice[j].Id() + }) + + // Now that we ordered the operationPacks, we have the order of the Operations + + ops := make([]Operation, 0, opsCount) + for _, pack := range oppSlice { + for _, operation := range pack.Operations { + ops = append(ops, operation) + } + } + + return &Entity{ + Definition: def, + ops: ops, + // packClock: packClock, + lastCommit: rootHash, + }, nil +} + +// Id return the Entity identifier +func (e *Entity) Id() entity.Id { + // id is the id of the first operation + return e.FirstOp().Id() +} + +// Validate check if the Entity data is valid +func (e *Entity) Validate() error { + // non-empty + if len(e.ops) == 0 && len(e.staging) == 0 { + return fmt.Errorf("entity has no operations") + } + + // check if each operations are valid + for _, op := range e.ops { + if err := op.Validate(); err != nil { + return err + } + } + + // check if staging is valid if needed + for _, op := range e.staging { + if err := op.Validate(); err != nil { + return err + } + } + + // Check that there is no colliding operation's ID + ids := make(map[entity.Id]struct{}) + for _, op := range e.Operations() { + if _, ok := ids[op.Id()]; ok { + return fmt.Errorf("id collision: %s", op.Id()) + } + ids[op.Id()] = struct{}{} + } + + return nil +} + +// Operations return the ordered operations +func (e *Entity) Operations() []Operation { + return append(e.ops, e.staging...) +} + +// FirstOp lookup for the very first operation of the Entity +func (e *Entity) FirstOp() Operation { + for _, op := range e.ops { + return op + } + for _, op := range e.staging { + return op + } + return nil +} + +// LastOp lookup for the very last operation of the Entity +func (e *Entity) LastOp() Operation { + if len(e.staging) > 0 { + return e.staging[len(e.staging)-1] + } + if len(e.ops) > 0 { + return e.ops[len(e.ops)-1] + } + return nil +} + +// Append add a new Operation to the Entity +func (e *Entity) Append(op Operation) { + e.staging = append(e.staging, op) +} + +// NeedCommit indicate if the in-memory state changed and need to be commit in the repository +func (e *Entity) NeedCommit() bool { + return len(e.staging) > 0 +} + +// CommitAdNeeded execute a Commit only if necessary. This function is useful to avoid getting an error if the Entity +// is already in sync with the repository. +func (e *Entity) CommitAdNeeded(repo repository.ClockedRepo) error { + if e.NeedCommit() { + return e.Commit(repo) + } + return nil +} + +// Commit write the appended operations in the repository +// TODO: support commit signature +func (e *Entity) Commit(repo repository.ClockedRepo) error { + if !e.NeedCommit() { + return fmt.Errorf("can't commit an entity with no pending operation") + } + + if err := e.Validate(); err != nil { + return errors.Wrapf(err, "can't commit a %s with invalid data", e.Definition.typename) + } + + var author identity.Interface + for _, op := range e.staging { + if author != nil && op.Author() != author { + return fmt.Errorf("operations with different author") + } + author = op.Author() + } + + // increment the various clocks for this new operationPack + // packTime, err := e.packClock.Increment() + // if err != nil { + // return err + // } + editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) + if err != nil { + return err + } + var creationTime lamport.Time + if e.lastCommit == "" { + creationTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.namespace)) + if err != nil { + return err + } + } + + opp := &operationPack{ + Author: author, + Operations: e.staging, + CreateTime: creationTime, + EditTime: editTime, + // PackTime: packTime, + } + + treeHash, err := opp.Write(e.Definition, repo) + if err != nil { + return err + } + + // Write a Git commit referencing the tree, with the previous commit as parent + var commitHash repository.Hash + if e.lastCommit != "" { + commitHash, err = repo.StoreCommit(treeHash, e.lastCommit) + } else { + commitHash, err = repo.StoreCommit(treeHash) + } + if err != nil { + return err + } + + e.lastCommit = commitHash + e.ops = append(e.ops, e.staging...) + e.staging = nil + + // Create or update the Git reference for this entity + // When pushing later, the remote will ensure that this ref update + // is fast-forward, that is no data has been overwritten. + ref := fmt.Sprintf(refsPattern, e.namespace, e.Id().String()) + return repo.UpdateRef(ref, commitHash) +} diff --git a/entity/dag/entity_actions.go b/entity/dag/entity_actions.go new file mode 100644 index 00000000..8dcf91e6 --- /dev/null +++ b/entity/dag/entity_actions.go @@ -0,0 +1,227 @@ +package dag + +import ( + "fmt" + + "github.com/pkg/errors" + + "github.com/MichaelMure/git-bug/entity" + "github.com/MichaelMure/git-bug/repository" +) + +func ListLocalIds(typename string, repo repository.RepoData) ([]entity.Id, error) { + refs, err := repo.ListRefs(fmt.Sprintf("refs/%s/", typename)) + if err != nil { + return nil, err + } + return entity.RefsToIds(refs), nil +} + +// Fetch retrieve updates from a remote +// This does not change the local entity state +func Fetch(def Definition, repo repository.Repo, remote string) (string, error) { + // "refs//*:refs/remotes///*" + fetchRefSpec := fmt.Sprintf("refs/%s/*:refs/remotes/%s/%s/*", + def.namespace, remote, def.namespace) + + return repo.FetchRefs(remote, fetchRefSpec) +} + +// Push update a remote with the local changes +func Push(def Definition, repo repository.Repo, remote string) (string, error) { + // "refs//*:refs//*" + refspec := fmt.Sprintf("refs/%s/*:refs/%s/*", + def.namespace, def.namespace) + + return repo.PushRefs(remote, refspec) +} + +// Pull will do a Fetch + MergeAll +// Contrary to MergeAll, this function will return an error if a merge fail. +func Pull(def Definition, repo repository.ClockedRepo, remote string) error { + _, err := Fetch(def, repo, remote) + if err != nil { + return err + } + + for merge := range MergeAll(def, repo, remote) { + if merge.Err != nil { + return merge.Err + } + if merge.Status == entity.MergeStatusInvalid { + return errors.Errorf("merge failure: %s", merge.Reason) + } + } + + return nil +} + +func MergeAll(def Definition, repo repository.ClockedRepo, remote string) <-chan entity.MergeResult { + out := make(chan entity.MergeResult) + + // no caching for the merge, we load everything from git even if that means multiple + // copy of the same entity in memory. The cache layer will intercept the results to + // invalidate entities if necessary. + + go func() { + defer close(out) + + remoteRefSpec := fmt.Sprintf("refs/remotes/%s/%s/", remote, def.namespace) + remoteRefs, err := repo.ListRefs(remoteRefSpec) + if err != nil { + out <- entity.MergeResult{Err: err} + return + } + + for _, remoteRef := range remoteRefs { + out <- merge(def, repo, remoteRef) + } + }() + + return out +} + +func merge(def Definition, repo repository.ClockedRepo, remoteRef string) entity.MergeResult { + id := entity.RefToId(remoteRef) + + if err := id.Validate(); err != nil { + return entity.NewMergeInvalidStatus(id, errors.Wrap(err, "invalid ref").Error()) + } + + remoteEntity, err := read(def, repo, remoteRef) + if err != nil { + return entity.NewMergeInvalidStatus(id, + errors.Wrapf(err, "remote %s is not readable", def.typename).Error()) + } + + // Check for error in remote data + if err := remoteEntity.Validate(); err != nil { + return entity.NewMergeInvalidStatus(id, + errors.Wrapf(err, "remote %s data is invalid", def.typename).Error()) + } + + localRef := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) + + localExist, err := repo.RefExist(localRef) + if err != nil { + return entity.NewMergeError(err, id) + } + + // the bug is not local yet, simply create the reference + if !localExist { + err := repo.CopyRef(remoteRef, localRef) + if err != nil { + return entity.NewMergeError(err, id) + } + + return entity.NewMergeStatus(entity.MergeStatusNew, id, remoteEntity) + } + + // var updated bool + // err = repo.MergeRef(localRef, remoteRef, func() repository.Hash { + // updated = true + // + // }) + // if err != nil { + // return entity.NewMergeError(err, id) + // } + // + // if updated { + // return entity.NewMergeStatus(entity.MergeStatusUpdated, id, ) + // } else { + // return entity.NewMergeStatus(entity.MergeStatusNothing, id, ) + // } + + localCommit, err := repo.ResolveRef(localRef) + if err != nil { + return entity.NewMergeError(err, id) + } + + remoteCommit, err := repo.ResolveRef(remoteRef) + if err != nil { + return entity.NewMergeError(err, id) + } + + if localCommit == remoteCommit { + // nothing to merge + return entity.NewMergeStatus(entity.MergeStatusNothing, id, remoteEntity) + } + + // fast-forward is possible if otherRef include ref + + remoteCommits, err := repo.ListCommits(remoteRef) + if err != nil { + return entity.NewMergeError(err, id) + } + + fastForwardPossible := false + for _, hash := range remoteCommits { + if hash == localCommit { + fastForwardPossible = true + break + } + } + + if fastForwardPossible { + err = repo.UpdateRef(localRef, remoteCommit) + if err != nil { + return entity.NewMergeError(err, id) + } + return entity.NewMergeStatus(entity.MergeStatusUpdated, id, remoteEntity) + } + + // fast-forward is not possible, we need to create a merge commit + // For simplicity when reading and to have clocks that record this change, we store + // an empty operationPack. + // First step is to collect those clocks. + + localEntity, err := read(def, repo, localRef) + if err != nil { + return entity.NewMergeError(err, id) + } + + // err = localEntity.packClock.Witness(remoteEntity.packClock.Time()) + // if err != nil { + // return entity.NewMergeError(err, id) + // } + // + // packTime, err := localEntity.packClock.Increment() + // if err != nil { + // return entity.NewMergeError(err, id) + // } + + editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, def.namespace)) + if err != nil { + return entity.NewMergeError(err, id) + } + + opp := &operationPack{ + Operations: nil, + CreateTime: 0, + EditTime: editTime, + // PackTime: packTime, + } + + treeHash, err := opp.Write(def, repo) + if err != nil { + return entity.NewMergeError(err, id) + } + + // Create the merge commit with two parents + newHash, err := repo.StoreCommit(treeHash, localCommit, remoteCommit) + if err != nil { + return entity.NewMergeError(err, id) + } + + // finally update the ref + err = repo.UpdateRef(localRef, newHash) + if err != nil { + return entity.NewMergeError(err, id) + } + + return entity.NewMergeStatus(entity.MergeStatusUpdated, id, localEntity) +} + +func Remove() error { + panic("") +} diff --git a/entity/dag/entity_test.go b/entity/dag/entity_test.go new file mode 100644 index 00000000..c5c83567 --- /dev/null +++ b/entity/dag/entity_test.go @@ -0,0 +1,117 @@ +package dag + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestWriteRead(t *testing.T) { + repo, id1, id2, def := makeTestContext() + + entity := New(def) + require.False(t, entity.NeedCommit()) + + entity.Append(newOp1(id1, "foo")) + entity.Append(newOp2(id1, "bar")) + + require.True(t, entity.NeedCommit()) + require.NoError(t, entity.CommitAdNeeded(repo)) + require.False(t, entity.NeedCommit()) + + entity.Append(newOp2(id2, "foobar")) + require.True(t, entity.NeedCommit()) + require.NoError(t, entity.CommitAdNeeded(repo)) + require.False(t, entity.NeedCommit()) + + read, err := Read(def, repo, entity.Id()) + require.NoError(t, err) + + assertEqualEntities(t, entity, read) +} + +func assertEqualEntities(t *testing.T, a, b *Entity) { + // testify doesn't support comparing functions and systematically fail if they are not nil + // so we have to set them to nil temporarily + + backOpUnA := a.Definition.operationUnmarshaler + backOpUnB := b.Definition.operationUnmarshaler + + a.Definition.operationUnmarshaler = nil + b.Definition.operationUnmarshaler = nil + + backIdResA := a.Definition.identityResolver + backIdResB := b.Definition.identityResolver + + a.Definition.identityResolver = nil + b.Definition.identityResolver = nil + + defer func() { + a.Definition.operationUnmarshaler = backOpUnA + b.Definition.operationUnmarshaler = backOpUnB + a.Definition.identityResolver = backIdResA + b.Definition.identityResolver = backIdResB + }() + + require.Equal(t, a, b) +} + +// // Merge +// +// merge1 := makeCommit(t, repo) +// merge1 = makeCommit(t, repo, merge1) +// err = repo.UpdateRef("merge1", merge1) +// require.NoError(t, err) +// +// err = repo.UpdateRef("merge2", merge1) +// require.NoError(t, err) +// +// // identical merge +// err = repo.MergeRef("merge1", "merge2") +// require.NoError(t, err) +// +// refMerge1, err := repo.ResolveRef("merge1") +// require.NoError(t, err) +// require.Equal(t, merge1, refMerge1) +// refMerge2, err := repo.ResolveRef("merge2") +// require.NoError(t, err) +// require.Equal(t, merge1, refMerge2) +// +// // fast-forward merge +// merge2 := makeCommit(t, repo, merge1) +// merge2 = makeCommit(t, repo, merge2) +// +// err = repo.UpdateRef("merge2", merge2) +// require.NoError(t, err) +// +// err = repo.MergeRef("merge1", "merge2") +// require.NoError(t, err) +// +// refMerge1, err = repo.ResolveRef("merge1") +// require.NoError(t, err) +// require.Equal(t, merge2, refMerge1) +// refMerge2, err = repo.ResolveRef("merge2") +// require.NoError(t, err) +// require.Equal(t, merge2, refMerge2) +// +// // merge commit +// merge1 = makeCommit(t, repo, merge1) +// err = repo.UpdateRef("merge1", merge1) +// require.NoError(t, err) +// +// merge2 = makeCommit(t, repo, merge2) +// err = repo.UpdateRef("merge2", merge2) +// require.NoError(t, err) +// +// err = repo.MergeRef("merge1", "merge2") +// require.NoError(t, err) +// +// refMerge1, err = repo.ResolveRef("merge1") +// require.NoError(t, err) +// require.NotEqual(t, merge1, refMerge1) +// commitRefMerge1, err := repo.ReadCommit(refMerge1) +// require.NoError(t, err) +// require.ElementsMatch(t, commitRefMerge1.Parents, []Hash{merge1, merge2}) +// refMerge2, err = repo.ResolveRef("merge2") +// require.NoError(t, err) +// require.Equal(t, merge2, refMerge2) diff --git a/entity/dag/operation.go b/entity/dag/operation.go new file mode 100644 index 00000000..9fcc055b --- /dev/null +++ b/entity/dag/operation.go @@ -0,0 +1,31 @@ +package dag + +import ( + "github.com/MichaelMure/git-bug/entity" + "github.com/MichaelMure/git-bug/identity" +) + +// Operation is a piece of data defining a change to reflect on the state of an Entity. +// What this Operation or Entity's state looks like is not of the resort of this package as it only deals with the +// data structure and storage. +type Operation interface { + // Id return the Operation identifier + // Some care need to be taken to define a correct Id derivation and enough entropy in the data used to avoid + // collisions. Notably: + // - the Id of the first Operation will be used as the Id of the Entity. Collision need to be avoided across Entities. + // - collisions can also happen within the set of Operations of an Entity. Simple Operation might not have enough + // entropy to yield unique Ids. + // A common way to derive an Id will be to use the DeriveId function on the serialized operation data. + Id() entity.Id + // Validate check if the Operation data is valid + Validate() error + + Author() identity.Interface +} + +type operationBase struct { + author identity.Interface + + // Not serialized. Store the op's id in memory. + id entity.Id +} diff --git a/entity/dag/operation_pack.go b/entity/dag/operation_pack.go new file mode 100644 index 00000000..7cf4ee58 --- /dev/null +++ b/entity/dag/operation_pack.go @@ -0,0 +1,294 @@ +package dag + +import ( + "encoding/json" + "fmt" + "strconv" + "strings" + + "github.com/pkg/errors" + "golang.org/x/crypto/openpgp" + + "github.com/MichaelMure/git-bug/entity" + "github.com/MichaelMure/git-bug/identity" + "github.com/MichaelMure/git-bug/repository" + "github.com/MichaelMure/git-bug/util/lamport" +) + +// TODO: extra data tree +const extraEntryName = "extra" + +const opsEntryName = "ops" +const versionEntryPrefix = "version-" +const createClockEntryPrefix = "create-clock-" +const editClockEntryPrefix = "edit-clock-" +const packClockEntryPrefix = "pack-clock-" + +// operationPack is a wrapper structure to store multiple operations in a single git blob. +// Additionally, it holds and store the metadata for those operations. +type operationPack struct { + // An identifier, taken from a hash of the serialized Operations. + id entity.Id + + // The author of the Operations. Must be the same author for all the Operations. + Author identity.Interface + // The list of Operation stored in the operationPack + Operations []Operation + // Encode the entity's logical time of creation across all entities of the same type. + // Only exist on the root operationPack + CreateTime lamport.Time + // Encode the entity's logical time of last edition across all entities of the same type. + // Exist on all operationPack + EditTime lamport.Time + // // Encode the operationPack's logical time of creation withing this entity. + // // Exist on all operationPack + // PackTime lamport.Time +} + +func (opp *operationPack) Id() entity.Id { + if opp.id == "" || opp.id == entity.UnsetId { + // This means we are trying to get the opp's Id *before* it has been stored. + // As the Id is computed based on the actual bytes written on the disk, we are going to predict + // those and then get the Id. This is safe as it will be the exact same code writing on disk later. + + data, err := json.Marshal(opp) + if err != nil { + panic(err) + } + opp.id = entity.DeriveId(data) + } + + return opp.id +} + +func (opp *operationPack) MarshalJSON() ([]byte, error) { + return json.Marshal(struct { + Author identity.Interface `json:"author"` + Operations []Operation `json:"ops"` + }{ + Author: opp.Author, + Operations: opp.Operations, + }) +} + +func (opp *operationPack) Validate() error { + if opp.Author == nil { + return fmt.Errorf("missing author") + } + for _, op := range opp.Operations { + if op.Author() != opp.Author { + return fmt.Errorf("operation has different author than the operationPack's") + } + } + if opp.EditTime == 0 { + return fmt.Errorf("lamport edit time is zero") + } + return nil +} + +func (opp *operationPack) Write(def Definition, repo repository.RepoData, parentCommit ...repository.Hash) (repository.Hash, error) { + if err := opp.Validate(); err != nil { + return "", err + } + + // For different reason, we store the clocks and format version directly in the git tree. + // Version has to be accessible before any attempt to decode to return early with a unique error. + // Clocks could possibly be stored in the git blob but it's nice to separate data and metadata, and + // we are storing something directly in the tree already so why not. + // + // To have a valid Tree, we point the "fake" entries to always the same value, the empty blob. + emptyBlobHash, err := repo.StoreData([]byte{}) + if err != nil { + return "", err + } + + // Write the Ops as a Git blob containing the serialized array of operations + data, err := json.Marshal(opp) + if err != nil { + return "", err + } + + // compute the Id while we have the serialized data + opp.id = entity.DeriveId(data) + + hash, err := repo.StoreData(data) + if err != nil { + return "", err + } + + // Make a Git tree referencing this blob and encoding the other values: + // - format version + // - clocks + tree := []repository.TreeEntry{ + {ObjectType: repository.Blob, Hash: emptyBlobHash, + Name: fmt.Sprintf(versionEntryPrefix+"%d", def.formatVersion)}, + {ObjectType: repository.Blob, Hash: hash, + Name: opsEntryName}, + {ObjectType: repository.Blob, Hash: emptyBlobHash, + Name: fmt.Sprintf(editClockEntryPrefix+"%d", opp.EditTime)}, + // {ObjectType: repository.Blob, Hash: emptyBlobHash, + // Name: fmt.Sprintf(packClockEntryPrefix+"%d", opp.PackTime)}, + } + if opp.CreateTime > 0 { + tree = append(tree, repository.TreeEntry{ + ObjectType: repository.Blob, + Hash: emptyBlobHash, + Name: fmt.Sprintf(createClockEntryPrefix+"%d", opp.CreateTime), + }) + } + + // Store the tree + treeHash, err := repo.StoreTree(tree) + if err != nil { + return "", err + } + + // Write a Git commit referencing the tree, with the previous commit as parent + // If we have keys, sign. + var commitHash repository.Hash + + // Sign the commit if we have a key + if opp.Author.SigningKey() != nil { + commitHash, err = repo.StoreSignedCommit(treeHash, opp.Author.SigningKey().PGPEntity(), parentCommit...) + } else { + commitHash, err = repo.StoreCommit(treeHash, parentCommit...) + } + + if err != nil { + return "", err + } + + return commitHash, nil +} + +// readOperationPack read the operationPack encoded in git at the given Tree hash. +// +// Validity of the Lamport clocks is left for the caller to decide. +func readOperationPack(def Definition, repo repository.RepoData, commit repository.Commit) (*operationPack, error) { + entries, err := repo.ReadTree(commit.TreeHash) + if err != nil { + return nil, err + } + + // check the format version first, fail early instead of trying to read something + var version uint + for _, entry := range entries { + if strings.HasPrefix(entry.Name, versionEntryPrefix) { + v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, versionEntryPrefix), 10, 64) + if err != nil { + return nil, errors.Wrap(err, "can't read format version") + } + if v > 1<<12 { + return nil, fmt.Errorf("format version too big") + } + version = uint(v) + break + } + } + if version == 0 { + return nil, entity.NewErrUnknowFormat(def.formatVersion) + } + if version != def.formatVersion { + return nil, entity.NewErrInvalidFormat(version, def.formatVersion) + } + + var id entity.Id + var author identity.Interface + var ops []Operation + var createTime lamport.Time + var editTime lamport.Time + // var packTime lamport.Time + + for _, entry := range entries { + switch { + case entry.Name == opsEntryName: + data, err := repo.ReadData(entry.Hash) + if err != nil { + return nil, errors.Wrap(err, "failed to read git blob data") + } + ops, author, err = unmarshallPack(def, data) + if err != nil { + return nil, err + } + id = entity.DeriveId(data) + + case strings.HasPrefix(entry.Name, createClockEntryPrefix): + v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, createClockEntryPrefix), 10, 64) + if err != nil { + return nil, errors.Wrap(err, "can't read creation lamport time") + } + createTime = lamport.Time(v) + + case strings.HasPrefix(entry.Name, editClockEntryPrefix): + v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, editClockEntryPrefix), 10, 64) + if err != nil { + return nil, errors.Wrap(err, "can't read edit lamport time") + } + editTime = lamport.Time(v) + + // case strings.HasPrefix(entry.Name, packClockEntryPrefix): + // found &= 1 << 3 + // + // v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, packClockEntryPrefix), 10, 64) + // if err != nil { + // return nil, errors.Wrap(err, "can't read pack lamport time") + // } + // packTime = lamport.Time(v) + } + } + + // Verify signature if we expect one + keys := author.ValidKeysAtTime(fmt.Sprintf(editClockPattern, def.namespace), editTime) + if len(keys) > 0 { + keyring := identity.PGPKeyring(keys) + _, err = openpgp.CheckDetachedSignature(keyring, commit.SignedData, commit.Signature) + if err != nil { + return nil, fmt.Errorf("signature failure: %v", err) + } + } + + return &operationPack{ + id: id, + Author: author, + Operations: ops, + CreateTime: createTime, + EditTime: editTime, + // PackTime: packTime, + }, nil +} + +// unmarshallPack delegate the unmarshalling of the Operation's JSON to the decoding +// function provided by the concrete entity. This gives access to the concrete type of each +// Operation. +func unmarshallPack(def Definition, data []byte) ([]Operation, identity.Interface, error) { + aux := struct { + Author identity.IdentityStub `json:"author"` + Operations []json.RawMessage `json:"ops"` + }{} + + if err := json.Unmarshal(data, &aux); err != nil { + return nil, nil, err + } + + if aux.Author.Id() == "" || aux.Author.Id() == entity.UnsetId { + return nil, nil, fmt.Errorf("missing author") + } + + author, err := def.identityResolver.ResolveIdentity(aux.Author.Id()) + if err != nil { + return nil, nil, err + } + + ops := make([]Operation, 0, len(aux.Operations)) + + for _, raw := range aux.Operations { + // delegate to specialized unmarshal function + op, err := def.operationUnmarshaler(author, raw) + if err != nil { + return nil, nil, err + } + ops = append(ops, op) + } + + return ops, author, nil +} diff --git a/entity/dag/operation_pack_test.go b/entity/dag/operation_pack_test.go new file mode 100644 index 00000000..ad2a9859 --- /dev/null +++ b/entity/dag/operation_pack_test.go @@ -0,0 +1,44 @@ +package dag + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestOperationPackReadWrite(t *testing.T) { + repo, id1, _, def := makeTestContext() + + opp := &operationPack{ + Author: id1, + Operations: []Operation{ + newOp1(id1, "foo"), + newOp2(id1, "bar"), + }, + CreateTime: 123, + EditTime: 456, + } + + commitHash, err := opp.Write(def, repo) + require.NoError(t, err) + + commit, err := repo.ReadCommit(commitHash) + require.NoError(t, err) + + opp2, err := readOperationPack(def, repo, commit) + require.NoError(t, err) + + require.Equal(t, opp, opp2) + + // make sure we get the same Id with the same data + opp3 := &operationPack{ + Author: id1, + Operations: []Operation{ + newOp1(id1, "foo"), + newOp2(id1, "bar"), + }, + CreateTime: 123, + EditTime: 456, + } + require.Equal(t, opp.Id(), opp3.Id()) +} diff --git a/entity/doc.go b/entity/doc.go deleted file mode 100644 index 4682d545..00000000 --- a/entity/doc.go +++ /dev/null @@ -1,8 +0,0 @@ -// Package entity contains the base common code to define an entity stored -// in a chain of git objects, supporting actions like Push, Pull and Merge. -package entity - -// TODO: Bug and Identity are very similar, right ? I expect that this package -// will eventually hold the common code to define an entity and the related -// helpers, errors and so on. When this work is done, it will become easier -// to add new entities, for example to support pull requests. diff --git a/entity/entity.go b/entity/entity.go deleted file mode 100644 index a1e8e57e..00000000 --- a/entity/entity.go +++ /dev/null @@ -1,348 +0,0 @@ -package entity - -import ( - "encoding/json" - "fmt" - "sort" - - "github.com/pkg/errors" - - "github.com/MichaelMure/git-bug/repository" - "github.com/MichaelMure/git-bug/util/lamport" -) - -const refsPattern = "refs/%s/%s" -const creationClockPattern = "%s-create" -const editClockPattern = "%s-edit" - -type Operation interface { - Id() Id - // MarshalJSON() ([]byte, error) - Validate() error -} - -// Definition hold the details defining one specialization of an Entity. -type Definition struct { - // the name of the entity (bug, pull-request, ...) - typename string - // the namespace in git (bugs, prs, ...) - namespace string - // a function decoding a JSON message into an Operation - operationUnmarshaler func(raw json.RawMessage) (Operation, error) - // the expected format version number - formatVersion uint -} - -type Entity struct { - Definition - - ops []Operation - staging []Operation - - packClock lamport.Clock - lastCommit repository.Hash -} - -func New(definition Definition) *Entity { - return &Entity{ - Definition: definition, - packClock: lamport.NewMemClock(), - } -} - -func Read(def Definition, repo repository.ClockedRepo, id Id) (*Entity, error) { - if err := id.Validate(); err != nil { - return nil, errors.Wrap(err, "invalid id") - } - - ref := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) - - return read(def, repo, ref) -} - -// read fetch from git and decode an Entity at an arbitrary git reference. -func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, error) { - rootHash, err := repo.ResolveRef(ref) - if err != nil { - return nil, err - } - - // Perform a depth-first search to get a topological order of the DAG where we discover the - // parents commit and go back in time up to the chronological root - - stack := make([]repository.Hash, 0, 32) - visited := make(map[repository.Hash]struct{}) - DFSOrder := make([]repository.Commit, 0, 32) - - stack = append(stack, rootHash) - - for len(stack) > 0 { - // pop - hash := stack[len(stack)-1] - stack = stack[:len(stack)-1] - - if _, ok := visited[hash]; ok { - continue - } - - // mark as visited - visited[hash] = struct{}{} - - commit, err := repo.ReadCommit(hash) - if err != nil { - return nil, err - } - - DFSOrder = append(DFSOrder, commit) - - for _, parent := range commit.Parents { - stack = append(stack, parent) - } - } - - // Now, we can reverse this topological order and read the commits in an order where - // we are sure to have read all the chronological ancestors when we read a commit. - - // Next step is to: - // 1) read the operationPacks - // 2) make sure that the clocks causality respect the DAG topology. - - oppMap := make(map[repository.Hash]*operationPack) - var opsCount int - var packClock = lamport.NewMemClock() - - for i := len(DFSOrder) - 1; i >= 0; i-- { - commit := DFSOrder[i] - firstCommit := i == len(DFSOrder)-1 - - // Verify DAG structure: single chronological root, so only the root - // can have no parents - if !firstCommit && len(commit.Parents) == 0 { - return nil, fmt.Errorf("multiple root in the entity DAG") - } - - opp, err := readOperationPack(def, repo, commit.TreeHash) - if err != nil { - return nil, err - } - - // Check that the lamport clocks are set - if firstCommit && opp.CreateTime <= 0 { - return nil, fmt.Errorf("creation lamport time not set") - } - if opp.EditTime <= 0 { - return nil, fmt.Errorf("edition lamport time not set") - } - if opp.PackTime <= 0 { - return nil, fmt.Errorf("pack lamport time not set") - } - - // make sure that the lamport clocks causality match the DAG topology - for _, parentHash := range commit.Parents { - parentPack, ok := oppMap[parentHash] - if !ok { - panic("DFS failed") - } - - if parentPack.EditTime >= opp.EditTime { - return nil, fmt.Errorf("lamport clock ordering doesn't match the DAG") - } - - // to avoid an attack where clocks are pushed toward the uint64 rollover, make sure - // that the clocks don't jump too far in the future - if opp.EditTime-parentPack.EditTime > 10_000 { - return nil, fmt.Errorf("lamport clock jumping too far in the future, likely an attack") - } - } - - oppMap[commit.Hash] = opp - opsCount += len(opp.Operations) - } - - // The clocks are fine, we witness them - for _, opp := range oppMap { - err = repo.Witness(fmt.Sprintf(creationClockPattern, def.namespace), opp.CreateTime) - if err != nil { - return nil, err - } - err = repo.Witness(fmt.Sprintf(editClockPattern, def.namespace), opp.EditTime) - if err != nil { - return nil, err - } - err = packClock.Witness(opp.PackTime) - if err != nil { - return nil, err - } - } - - // Now that we know that the topological order and clocks are fine, we order the operationPacks - // based on the logical clocks, entirely ignoring the DAG topology - - oppSlice := make([]*operationPack, 0, len(oppMap)) - for _, pack := range oppMap { - oppSlice = append(oppSlice, pack) - } - sort.Slice(oppSlice, func(i, j int) bool { - // Primary ordering with the dedicated "pack" Lamport time that encode causality - // within the entity - if oppSlice[i].PackTime != oppSlice[j].PackTime { - return oppSlice[i].PackTime < oppSlice[i].PackTime - } - // We have equal PackTime, which means we had a concurrent edition. We can't tell which exactly - // came first. As a secondary arbitrary ordering, we can use the EditTime. It's unlikely to be - // enough but it can give us an edge to approach what really happened. - if oppSlice[i].EditTime != oppSlice[j].EditTime { - return oppSlice[i].EditTime < oppSlice[j].EditTime - } - // Well, what now? We still need a total ordering, the most stable possible. - // As a last resort, we can order based on a hash of the serialized Operations in the - // operationPack. It doesn't carry much meaning but it's unbiased and hard to abuse. - // This is a lexicographic ordering. - return oppSlice[i].Id < oppSlice[j].Id - }) - - // Now that we ordered the operationPacks, we have the order of the Operations - - ops := make([]Operation, 0, opsCount) - for _, pack := range oppSlice { - for _, operation := range pack.Operations { - ops = append(ops, operation) - } - } - - return &Entity{ - Definition: def, - ops: ops, - lastCommit: rootHash, - }, nil -} - -// Id return the Entity identifier -func (e *Entity) Id() Id { - // id is the id of the first operation - return e.FirstOp().Id() -} - -func (e *Entity) Validate() error { - // non-empty - if len(e.ops) == 0 && len(e.staging) == 0 { - return fmt.Errorf("entity has no operations") - } - - // check if each operations are valid - for _, op := range e.ops { - if err := op.Validate(); err != nil { - return err - } - } - - // check if staging is valid if needed - for _, op := range e.staging { - if err := op.Validate(); err != nil { - return err - } - } - - // Check that there is no colliding operation's ID - ids := make(map[Id]struct{}) - for _, op := range e.Operations() { - if _, ok := ids[op.Id()]; ok { - return fmt.Errorf("id collision: %s", op.Id()) - } - ids[op.Id()] = struct{}{} - } - - return nil -} - -// return the ordered operations -func (e *Entity) Operations() []Operation { - return append(e.ops, e.staging...) -} - -// Lookup for the very first operation of the Entity. -func (e *Entity) FirstOp() Operation { - for _, op := range e.ops { - return op - } - for _, op := range e.staging { - return op - } - return nil -} - -func (e *Entity) Append(op Operation) { - e.staging = append(e.staging, op) -} - -func (e *Entity) NeedCommit() bool { - return len(e.staging) > 0 -} - -func (e *Entity) CommitAdNeeded(repo repository.ClockedRepo) error { - if e.NeedCommit() { - return e.Commit(repo) - } - return nil -} - -// TODO: support commit signature -func (e *Entity) Commit(repo repository.ClockedRepo) error { - if !e.NeedCommit() { - return fmt.Errorf("can't commit an entity with no pending operation") - } - - if err := e.Validate(); err != nil { - return errors.Wrapf(err, "can't commit a %s with invalid data", e.Definition.typename) - } - - // increment the various clocks for this new operationPack - packTime, err := e.packClock.Increment() - if err != nil { - return err - } - editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) - if err != nil { - return err - } - var creationTime lamport.Time - if e.lastCommit == "" { - creationTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.namespace)) - if err != nil { - return err - } - } - - opp := &operationPack{ - Operations: e.staging, - CreateTime: creationTime, - EditTime: editTime, - PackTime: packTime, - } - - treeHash, err := opp.write(e.Definition, repo) - if err != nil { - return err - } - - // Write a Git commit referencing the tree, with the previous commit as parent - var commitHash repository.Hash - if e.lastCommit != "" { - commitHash, err = repo.StoreCommitWithParent(treeHash, e.lastCommit) - } else { - commitHash, err = repo.StoreCommit(treeHash) - } - if err != nil { - return err - } - - e.lastCommit = commitHash - e.ops = append(e.ops, e.staging...) - e.staging = nil - - // Create or update the Git reference for this entity - // When pushing later, the remote will ensure that this ref update - // is fast-forward, that is no data has been overwritten. - ref := fmt.Sprintf(refsPattern, e.namespace, e.Id().String()) - return repo.UpdateRef(ref, commitHash) -} diff --git a/entity/entity_actions.go b/entity/entity_actions.go deleted file mode 100644 index 34e76a62..00000000 --- a/entity/entity_actions.go +++ /dev/null @@ -1,31 +0,0 @@ -package entity - -import ( - "fmt" - - "github.com/MichaelMure/git-bug/repository" -) - -func ListLocalIds(typename string, repo repository.RepoData) ([]Id, error) { - refs, err := repo.ListRefs(fmt.Sprintf("refs/%s/", typename)) - if err != nil { - return nil, err - } - return RefsToIds(refs), nil -} - -func Fetch() { - -} - -func Pull() { - -} - -func Push() { - -} - -func Remove() error { - panic("") -} diff --git a/entity/entity_test.go b/entity/entity_test.go deleted file mode 100644 index 92a53179..00000000 --- a/entity/entity_test.go +++ /dev/null @@ -1,107 +0,0 @@ -package entity - -import ( - "encoding/json" - "fmt" - "testing" - - "github.com/stretchr/testify/require" - - "github.com/MichaelMure/git-bug/repository" -) - -// func TestFoo(t *testing.T) { -// repo, err := repository.OpenGoGitRepo("~/dev/git-bug", nil) -// require.NoError(t, err) -// -// b, err := ReadBug(repo, Id("8b22e548c93a6ed23c31fd4e337c6286c3d1e5c9cae5537bc8e5842e11bd1099")) -// require.NoError(t, err) -// -// fmt.Println(b) -// } - -type op1 struct { - OperationType int `json:"type"` - Field1 string `json:"field_1"` -} - -func newOp1(field1 string) *op1 { - return &op1{OperationType: 1, Field1: field1} -} - -func (o op1) Id() Id { - data, _ := json.Marshal(o) - return DeriveId(data) -} - -func (o op1) Validate() error { return nil } - -type op2 struct { - OperationType int `json:"type"` - Field2 string `json:"field_2"` -} - -func newOp2(field2 string) *op2 { - return &op2{OperationType: 2, Field2: field2} -} - -func (o op2) Id() Id { - data, _ := json.Marshal(o) - return DeriveId(data) -} - -func (o op2) Validate() error { return nil } - -var def = Definition{ - typename: "foo", - namespace: "foos", - operationUnmarshaler: unmarshaller, - formatVersion: 1, -} - -func unmarshaller(raw json.RawMessage) (Operation, error) { - var t struct { - OperationType int `json:"type"` - } - - if err := json.Unmarshal(raw, &t); err != nil { - return nil, err - } - - switch t.OperationType { - case 1: - op := &op1{} - err := json.Unmarshal(raw, &op) - return op, err - case 2: - op := &op2{} - err := json.Unmarshal(raw, &op) - return op, err - default: - return nil, fmt.Errorf("unknown operation type %v", t.OperationType) - } -} - -func TestWriteRead(t *testing.T) { - repo := repository.NewMockRepo() - - entity := New(def) - require.False(t, entity.NeedCommit()) - - entity.Append(newOp1("foo")) - entity.Append(newOp2("bar")) - - require.True(t, entity.NeedCommit()) - require.NoError(t, entity.CommitAdNeeded(repo)) - require.False(t, entity.NeedCommit()) - - entity.Append(newOp2("foobar")) - require.True(t, entity.NeedCommit()) - require.NoError(t, entity.CommitAdNeeded(repo)) - require.False(t, entity.NeedCommit()) - - read, err := Read(def, repo, entity.Id()) - require.NoError(t, err) - - fmt.Println(*read) -} diff --git a/entity/merge.go b/entity/merge.go index 3ce8edac..7d1f3f43 100644 --- a/entity/merge.go +++ b/entity/merge.go @@ -8,14 +8,15 @@ import ( type MergeStatus int const ( - _ MergeStatus = iota - MergeStatusNew - MergeStatusInvalid - MergeStatusUpdated - MergeStatusNothing - MergeStatusError + _ MergeStatus = iota + MergeStatusNew // a new Entity was created locally + MergeStatusInvalid // the remote data is invalid + MergeStatusUpdated // a local Entity has been updated + MergeStatusNothing // no changes were made to a local Entity (already up to date) + MergeStatusError // a terminal error happened ) +// MergeResult hold the result of a merge operation on an Entity. type MergeResult struct { // Err is set when a terminal error occur in the process Err error @@ -55,6 +56,7 @@ func NewMergeError(err error, id Id) MergeResult { } } +// TODO: Interface --> *Entity ? func NewMergeStatus(status MergeStatus, id Id, entity Interface) MergeResult { return MergeResult{ Id: id, diff --git a/entity/operation_pack.go b/entity/operation_pack.go deleted file mode 100644 index 0a16dd61..00000000 --- a/entity/operation_pack.go +++ /dev/null @@ -1,199 +0,0 @@ -package entity - -import ( - "encoding/json" - "fmt" - "strconv" - "strings" - - "github.com/pkg/errors" - - "github.com/MichaelMure/git-bug/repository" - "github.com/MichaelMure/git-bug/util/lamport" -) - -// TODO: extra data tree -const extraEntryName = "extra" - -const opsEntryName = "ops" -const versionEntryPrefix = "version-" -const createClockEntryPrefix = "create-clock-" -const editClockEntryPrefix = "edit-clock-" -const packClockEntryPrefix = "pack-clock-" - -type operationPack struct { - Operations []Operation - // Encode the entity's logical time of creation across all entities of the same type. - // Only exist on the root operationPack - CreateTime lamport.Time - // Encode the entity's logical time of last edition across all entities of the same type. - // Exist on all operationPack - EditTime lamport.Time - // Encode the operationPack's logical time of creation withing this entity. - // Exist on all operationPack - PackTime lamport.Time -} - -func (opp operationPack) write(def Definition, repo repository.RepoData) (repository.Hash, error) { - // For different reason, we store the clocks and format version directly in the git tree. - // Version has to be accessible before any attempt to decode to return early with a unique error. - // Clocks could possibly be stored in the git blob but it's nice to separate data and metadata, and - // we are storing something directly in the tree already so why not. - // - // To have a valid Tree, we point the "fake" entries to always the same value, the empty blob. - emptyBlobHash, err := repo.StoreData([]byte{}) - if err != nil { - return "", err - } - - // Write the Ops as a Git blob containing the serialized array - data, err := json.Marshal(struct { - Operations []Operation `json:"ops"` - }{ - Operations: opp.Operations, - }) - if err != nil { - return "", err - } - hash, err := repo.StoreData(data) - if err != nil { - return "", err - } - - // Make a Git tree referencing this blob and encoding the other values: - // - format version - // - clocks - tree := []repository.TreeEntry{ - {ObjectType: repository.Blob, Hash: emptyBlobHash, - Name: fmt.Sprintf(versionEntryPrefix+"%d", def.formatVersion)}, - {ObjectType: repository.Blob, Hash: hash, - Name: opsEntryName}, - {ObjectType: repository.Blob, Hash: emptyBlobHash, - Name: fmt.Sprintf(editClockEntryPrefix+"%d", opp.EditTime)}, - {ObjectType: repository.Blob, Hash: emptyBlobHash, - Name: fmt.Sprintf(packClockEntryPrefix+"%d", opp.PackTime)}, - } - if opp.CreateTime > 0 { - tree = append(tree, repository.TreeEntry{ - ObjectType: repository.Blob, - Hash: emptyBlobHash, - Name: fmt.Sprintf(createClockEntryPrefix+"%d", opp.CreateTime), - }) - } - - // Store the tree - return repo.StoreTree(tree) -} - -// readOperationPack read the operationPack encoded in git at the given Tree hash. -// -// Validity of the Lamport clocks is left for the caller to decide. -func readOperationPack(def Definition, repo repository.RepoData, treeHash repository.Hash) (*operationPack, error) { - entries, err := repo.ReadTree(treeHash) - if err != nil { - return nil, err - } - - // check the format version first, fail early instead of trying to read something - var version uint - for _, entry := range entries { - if strings.HasPrefix(entry.Name, versionEntryPrefix) { - v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, versionEntryPrefix), 10, 64) - if err != nil { - return nil, errors.Wrap(err, "can't read format version") - } - if v > 1<<12 { - return nil, fmt.Errorf("format version too big") - } - version = uint(v) - break - } - } - if version == 0 { - return nil, NewErrUnknowFormat(def.formatVersion) - } - if version != def.formatVersion { - return nil, NewErrInvalidFormat(version, def.formatVersion) - } - - var ops []Operation - var createTime lamport.Time - var editTime lamport.Time - var packTime lamport.Time - - for _, entry := range entries { - if entry.Name == opsEntryName { - data, err := repo.ReadData(entry.Hash) - if err != nil { - return nil, errors.Wrap(err, "failed to read git blob data") - } - - ops, err = unmarshallOperations(def, data) - if err != nil { - return nil, err - } - continue - } - - if strings.HasPrefix(entry.Name, createClockEntryPrefix) { - v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, createClockEntryPrefix), 10, 64) - if err != nil { - return nil, errors.Wrap(err, "can't read creation lamport time") - } - createTime = lamport.Time(v) - continue - } - - if strings.HasPrefix(entry.Name, editClockEntryPrefix) { - v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, editClockEntryPrefix), 10, 64) - if err != nil { - return nil, errors.Wrap(err, "can't read edit lamport time") - } - editTime = lamport.Time(v) - continue - } - - if strings.HasPrefix(entry.Name, packClockEntryPrefix) { - v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, packClockEntryPrefix), 10, 64) - if err != nil { - return nil, errors.Wrap(err, "can't read pack lamport time") - } - packTime = lamport.Time(v) - continue - } - } - - return &operationPack{ - Operations: ops, - CreateTime: createTime, - EditTime: editTime, - PackTime: packTime, - }, nil -} - -// unmarshallOperations delegate the unmarshalling of the Operation's JSON to the decoding -// function provided by the concrete entity. This gives access to the concrete type of each -// Operation. -func unmarshallOperations(def Definition, data []byte) ([]Operation, error) { - aux := struct { - Operations []json.RawMessage `json:"ops"` - }{} - - if err := json.Unmarshal(data, &aux); err != nil { - return nil, err - } - - ops := make([]Operation, 0, len(aux.Operations)) - - for _, raw := range aux.Operations { - // delegate to specialized unmarshal function - op, err := def.operationUnmarshaler(raw) - if err != nil { - return nil, err - } - - ops = append(ops, op) - } - - return ops, nil -} diff --git a/entity/refs.go b/entity/refs.go index f505dbf0..070d4dba 100644 --- a/entity/refs.go +++ b/entity/refs.go @@ -2,6 +2,7 @@ package entity import "strings" +// RefsToIds parse a slice of git references and return the corresponding Entity's Id. func RefsToIds(refs []string) []Id { ids := make([]Id, len(refs)) @@ -12,6 +13,7 @@ func RefsToIds(refs []string) []Id { return ids } +// RefsToIds parse a git reference and return the corresponding Entity's Id. func RefToId(ref string) Id { split := strings.Split(ref, "/") return Id(split[len(split)-1]) -- cgit From dc5059bc3372941e2908739831188768335ac50b Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 24 Jan 2021 19:45:21 +0100 Subject: entity: more progress on merging and signing --- entity/TODO | 1 + entity/dag/entity.go | 14 ++----- entity/dag/entity_actions.go | 87 +++++++++++++++++++++++++++++--------------- entity/dag/operation.go | 8 ++-- entity/dag/operation_pack.go | 50 +++++++++++++++++++++++-- entity/merge.go | 37 +++++++++++++------ 6 files changed, 139 insertions(+), 58 deletions(-) (limited to 'entity') diff --git a/entity/TODO b/entity/TODO index fd3c9710..9f33dd09 100644 --- a/entity/TODO +++ b/entity/TODO @@ -1,6 +1,7 @@ - is the pack Lamport clock really useful vs only topological sort? - topological order is enforced on the clocks, so what's the point? - is EditTime equivalent to PackTime? no, avoid the gaps. Is it better? + --> PackTime is contained within a bug and might avoid extreme reordering? - how to do commit signature? - how to avoid id collision between Operations? - write tests for actions diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 78347fa0..63d7fc3b 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -318,7 +318,6 @@ func (e *Entity) CommitAdNeeded(repo repository.ClockedRepo) error { } // Commit write the appended operations in the repository -// TODO: support commit signature func (e *Entity) Commit(repo repository.ClockedRepo) error { if !e.NeedCommit() { return fmt.Errorf("can't commit an entity with no pending operation") @@ -361,18 +360,13 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { // PackTime: packTime, } - treeHash, err := opp.Write(e.Definition, repo) - if err != nil { - return err - } - - // Write a Git commit referencing the tree, with the previous commit as parent var commitHash repository.Hash - if e.lastCommit != "" { - commitHash, err = repo.StoreCommit(treeHash, e.lastCommit) + if e.lastCommit == "" { + commitHash, err = opp.Write(e.Definition, repo) } else { - commitHash, err = repo.StoreCommit(treeHash) + commitHash, err = opp.Write(e.Definition, repo, e.lastCommit) } + if err != nil { return err } diff --git a/entity/dag/entity_actions.go b/entity/dag/entity_actions.go index 8dcf91e6..83ff7ddc 100644 --- a/entity/dag/entity_actions.go +++ b/entity/dag/entity_actions.go @@ -9,6 +9,7 @@ import ( "github.com/MichaelMure/git-bug/repository" ) +// ListLocalIds list all the available local Entity's Id func ListLocalIds(typename string, repo repository.RepoData) ([]entity.Id, error) { refs, err := repo.ListRefs(fmt.Sprintf("refs/%s/", typename)) if err != nil { @@ -56,6 +57,21 @@ func Pull(def Definition, repo repository.ClockedRepo, remote string) error { return nil } +// MergeAll will merge all the available remote Entity: +// +// Multiple scenario exist: +// 1. if the remote Entity doesn't exist locally, it's created +// --> emit entity.MergeStatusNew +// 2. if the remote and local Entity have the same state, nothing is changed +// --> emit entity.MergeStatusNothing +// 3. if the local Entity has new commits but the remote don't, nothing is changed +// --> emit entity.MergeStatusNothing +// 4. if the remote has new commit, the local bug is updated to match the same history +// (fast-forward update) +// --> emit entity.MergeStatusUpdated +// 5. if both local and remote Entity have new commits (that is, we have a concurrent edition), +// a merge commit with an empty operationPack is created to join both branch and form a DAG. +// --> emit entity.MergeStatusUpdated func MergeAll(def Definition, repo repository.ClockedRepo, remote string) <-chan entity.MergeResult { out := make(chan entity.MergeResult) @@ -81,6 +97,8 @@ func MergeAll(def Definition, repo repository.ClockedRepo, remote string) <-chan return out } +// merge perform a merge to make sure a local Entity is up to date. +// See MergeAll for more details. func merge(def Definition, repo repository.ClockedRepo, remoteRef string) entity.MergeResult { id := entity.RefToId(remoteRef) @@ -102,36 +120,24 @@ func merge(def Definition, repo repository.ClockedRepo, remoteRef string) entity localRef := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) + // SCENARIO 1 + // if the remote Entity doesn't exist locally, it's created + localExist, err := repo.RefExist(localRef) if err != nil { return entity.NewMergeError(err, id) } - // the bug is not local yet, simply create the reference if !localExist { + // the bug is not local yet, simply create the reference err := repo.CopyRef(remoteRef, localRef) if err != nil { return entity.NewMergeError(err, id) } - return entity.NewMergeStatus(entity.MergeStatusNew, id, remoteEntity) + return entity.NewMergeNewStatus(id, remoteEntity) } - // var updated bool - // err = repo.MergeRef(localRef, remoteRef, func() repository.Hash { - // updated = true - // - // }) - // if err != nil { - // return entity.NewMergeError(err, id) - // } - // - // if updated { - // return entity.NewMergeStatus(entity.MergeStatusUpdated, id, ) - // } else { - // return entity.NewMergeStatus(entity.MergeStatusNothing, id, ) - // } - localCommit, err := repo.ResolveRef(localRef) if err != nil { return entity.NewMergeError(err, id) @@ -142,18 +148,38 @@ func merge(def Definition, repo repository.ClockedRepo, remoteRef string) entity return entity.NewMergeError(err, id) } + // SCENARIO 2 + // if the remote and local Entity have the same state, nothing is changed + if localCommit == remoteCommit { // nothing to merge - return entity.NewMergeStatus(entity.MergeStatusNothing, id, remoteEntity) + return entity.NewMergeNothingStatus(id) } - // fast-forward is possible if otherRef include ref + // SCENARIO 3 + // if the local Entity has new commits but the remote don't, nothing is changed + + localCommits, err := repo.ListCommits(localRef) + if err != nil { + return entity.NewMergeError(err, id) + } + + for _, hash := range localCommits { + if hash == localCommit { + return entity.NewMergeNothingStatus(id) + } + } + + // SCENARIO 4 + // if the remote has new commit, the local bug is updated to match the same history + // (fast-forward update) remoteCommits, err := repo.ListCommits(remoteRef) if err != nil { return entity.NewMergeError(err, id) } + // fast-forward is possible if otherRef include ref fastForwardPossible := false for _, hash := range remoteCommits { if hash == localCommit { @@ -167,9 +193,13 @@ func merge(def Definition, repo repository.ClockedRepo, remoteRef string) entity if err != nil { return entity.NewMergeError(err, id) } - return entity.NewMergeStatus(entity.MergeStatusUpdated, id, remoteEntity) + return entity.NewMergeUpdatedStatus(id, remoteEntity) } + // SCENARIO 5 + // if both local and remote Entity have new commits (that is, we have a concurrent edition), + // a merge commit with an empty operationPack is created to join both branch and form a DAG. + // fast-forward is not possible, we need to create a merge commit // For simplicity when reading and to have clocks that record this change, we store // an empty operationPack. @@ -180,6 +210,7 @@ func merge(def Definition, repo repository.ClockedRepo, remoteRef string) entity return entity.NewMergeError(err, id) } + // TODO: pack clock // err = localEntity.packClock.Witness(remoteEntity.packClock.Time()) // if err != nil { // return entity.NewMergeError(err, id) @@ -199,27 +230,25 @@ func merge(def Definition, repo repository.ClockedRepo, remoteRef string) entity Operations: nil, CreateTime: 0, EditTime: editTime, + // TODO: pack clock // PackTime: packTime, } - treeHash, err := opp.Write(def, repo) - if err != nil { - return entity.NewMergeError(err, id) - } - - // Create the merge commit with two parents - newHash, err := repo.StoreCommit(treeHash, localCommit, remoteCommit) + commitHash, err := opp.Write(def, repo, localCommit, remoteCommit) if err != nil { return entity.NewMergeError(err, id) } // finally update the ref - err = repo.UpdateRef(localRef, newHash) + err = repo.UpdateRef(localRef, commitHash) if err != nil { return entity.NewMergeError(err, id) } - return entity.NewMergeStatus(entity.MergeStatusUpdated, id, localEntity) + // Note: we don't need to update localEntity state (lastCommit, operations...) as we + // discard it entirely anyway. + + return entity.NewMergeUpdatedStatus(id, localEntity) } func Remove() error { diff --git a/entity/dag/operation.go b/entity/dag/operation.go index 9fcc055b..86e2f7d7 100644 --- a/entity/dag/operation.go +++ b/entity/dag/operation.go @@ -12,17 +12,19 @@ type Operation interface { // Id return the Operation identifier // Some care need to be taken to define a correct Id derivation and enough entropy in the data used to avoid // collisions. Notably: - // - the Id of the first Operation will be used as the Id of the Entity. Collision need to be avoided across Entities. + // - the Id of the first Operation will be used as the Id of the Entity. Collision need to be avoided across entities of the same type + // (example: no collision within the "bug" namespace). // - collisions can also happen within the set of Operations of an Entity. Simple Operation might not have enough - // entropy to yield unique Ids. + // entropy to yield unique Ids (example: two "close" operation within the same second, same author). // A common way to derive an Id will be to use the DeriveId function on the serialized operation data. Id() entity.Id // Validate check if the Operation data is valid Validate() error - + // Author returns the author of this operation Author() identity.Interface } +// TODO: remove? type operationBase struct { author identity.Interface diff --git a/entity/dag/operation_pack.go b/entity/dag/operation_pack.go index 7cf4ee58..ebacdbd9 100644 --- a/entity/dag/operation_pack.go +++ b/entity/dag/operation_pack.go @@ -86,7 +86,10 @@ func (opp *operationPack) Validate() error { return nil } -func (opp *operationPack) Write(def Definition, repo repository.RepoData, parentCommit ...repository.Hash) (repository.Hash, error) { +// Write write the OperationPack in git, with zero, one or more parent commits. +// If the repository has a keypair able to sign (that is, with a private key), the resulting commit is signed with that key. +// Return the hash of the created commit. +func (opp *operationPack) Write(def Definition, repo repository.Repo, parentCommit ...repository.Hash) (repository.Hash, error) { if err := opp.Validate(); err != nil { return "", err } @@ -148,8 +151,13 @@ func (opp *operationPack) Write(def Definition, repo repository.RepoData, parent var commitHash repository.Hash // Sign the commit if we have a key - if opp.Author.SigningKey() != nil { - commitHash, err = repo.StoreSignedCommit(treeHash, opp.Author.SigningKey().PGPEntity(), parentCommit...) + signingKey, err := opp.Author.SigningKey(repo) + if err != nil { + return "", err + } + + if signingKey != nil { + commitHash, err = repo.StoreSignedCommit(treeHash, signingKey.PGPEntity(), parentCommit...) } else { commitHash, err = repo.StoreCommit(treeHash, parentCommit...) } @@ -240,7 +248,7 @@ func readOperationPack(def Definition, repo repository.RepoData, commit reposito // Verify signature if we expect one keys := author.ValidKeysAtTime(fmt.Sprintf(editClockPattern, def.namespace), editTime) if len(keys) > 0 { - keyring := identity.PGPKeyring(keys) + keyring := PGPKeyring(keys) _, err = openpgp.CheckDetachedSignature(keyring, commit.SignedData, commit.Signature) if err != nil { return nil, fmt.Errorf("signature failure: %v", err) @@ -292,3 +300,37 @@ func unmarshallPack(def Definition, data []byte) ([]Operation, identity.Interfac return ops, author, nil } + +var _ openpgp.KeyRing = &PGPKeyring{} + +// PGPKeyring implement a openpgp.KeyRing from an slice of Key +type PGPKeyring []*identity.Key + +func (pk PGPKeyring) KeysById(id uint64) []openpgp.Key { + var result []openpgp.Key + for _, key := range pk { + if key.Public().KeyId == id { + result = append(result, openpgp.Key{ + PublicKey: key.Public(), + PrivateKey: key.Private(), + }) + } + } + return result +} + +func (pk PGPKeyring) KeysByIdUsage(id uint64, requiredUsage byte) []openpgp.Key { + // the only usage we care about is the ability to sign, which all keys should already be capable of + return pk.KeysById(id) +} + +func (pk PGPKeyring) DecryptionKeys() []openpgp.Key { + result := make([]openpgp.Key, len(pk)) + for i, key := range pk { + result[i] = openpgp.Key{ + PublicKey: key.Public(), + PrivateKey: key.Private(), + } + } + return result +} diff --git a/entity/merge.go b/entity/merge.go index 7d1f3f43..1b68b4de 100644 --- a/entity/merge.go +++ b/entity/merge.go @@ -24,10 +24,10 @@ type MergeResult struct { Id Id Status MergeStatus - // Only set for invalid status + // Only set for Invalid status Reason string - // Not set for invalid status + // Only set for New or Updated status Entity Interface } @@ -48,29 +48,42 @@ func (mr MergeResult) String() string { } } -func NewMergeError(err error, id Id) MergeResult { +// TODO: Interface --> *Entity ? +func NewMergeNewStatus(id Id, entity Interface) MergeResult { return MergeResult{ - Err: err, Id: id, - Status: MergeStatusError, + Status: MergeStatusNew, + Entity: entity, } } -// TODO: Interface --> *Entity ? -func NewMergeStatus(status MergeStatus, id Id, entity Interface) MergeResult { +func NewMergeInvalidStatus(id Id, reason string) MergeResult { return MergeResult{ Id: id, - Status: status, + Status: MergeStatusInvalid, + Reason: reason, + } +} - // Entity is not set for an invalid merge result +func NewMergeUpdatedStatus(id Id, entity Interface) MergeResult { + return MergeResult{ + Id: id, + Status: MergeStatusUpdated, Entity: entity, } } -func NewMergeInvalidStatus(id Id, reason string) MergeResult { +func NewMergeNothingStatus(id Id) MergeResult { return MergeResult{ Id: id, - Status: MergeStatusInvalid, - Reason: reason, + Status: MergeStatusNothing, + } +} + +func NewMergeError(err error, id Id) MergeResult { + return MergeResult{ + Id: id, + Status: MergeStatusError, + Err: err, } } -- cgit From fe4237df3c62bd6dfd1f385893295f93072d0e51 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Mon, 25 Jan 2021 12:39:34 +0100 Subject: entity: readAll and more testing --- entity/dag/common_test.go | 38 ++++++++++--- entity/dag/entity.go | 48 ++++++++++++++++- entity/dag/entity_actions.go | 8 +-- entity/dag/entity_actions_test.go | 110 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 190 insertions(+), 14 deletions(-) create mode 100644 entity/dag/entity_actions_test.go (limited to 'entity') diff --git a/entity/dag/common_test.go b/entity/dag/common_test.go index 29f1279e..b822fc79 100644 --- a/entity/dag/common_test.go +++ b/entity/dag/common_test.go @@ -26,16 +26,16 @@ func newOp1(author identity.Interface, field1 string) *op1 { return &op1{author: author, OperationType: 1, Field1: field1} } -func (o op1) Id() entity.Id { +func (o *op1) Id() entity.Id { data, _ := json.Marshal(o) return entity.DeriveId(data) } -func (o op1) Author() identity.Interface { +func (o *op1) Author() identity.Interface { return o.author } -func (o op1) Validate() error { return nil } +func (o *op1) Validate() error { return nil } type op2 struct { author identity.Interface @@ -48,16 +48,16 @@ func newOp2(author identity.Interface, field2 string) *op2 { return &op2{author: author, OperationType: 2, Field2: field2} } -func (o op2) Id() entity.Id { +func (o *op2) Id() entity.Id { data, _ := json.Marshal(o) return entity.DeriveId(data) } -func (o op2) Author() identity.Interface { +func (o *op2) Author() identity.Interface { return o.author } -func (o op2) Validate() error { return nil } +func (o *op2) Validate() error { return nil } func unmarshaler(author identity.Interface, raw json.RawMessage) (Operation, error) { var t struct { @@ -90,7 +90,31 @@ func unmarshaler(author identity.Interface, raw json.RawMessage) (Operation, err func makeTestContext() (repository.ClockedRepo, identity.Interface, identity.Interface, Definition) { repo := repository.NewMockRepo() + id1, id2, def := makeTestContextInternal(repo) + return repo, id1, id2, def +} + +func makeTestContextRemote() (repository.ClockedRepo, repository.ClockedRepo, repository.ClockedRepo, identity.Interface, identity.Interface, Definition) { + repoA := repository.CreateGoGitTestRepo(false) + repoB := repository.CreateGoGitTestRepo(false) + remote := repository.CreateGoGitTestRepo(true) + + err := repoA.AddRemote("origin", remote.GetLocalRemote()) + if err != nil { + panic(err) + } + err = repoB.AddRemote("origin", remote.GetLocalRemote()) + if err != nil { + panic(err) + } + + id1, id2, def := makeTestContextInternal(repoA) + + return repoA, repoB, remote, id1, id2, def +} + +func makeTestContextInternal(repo repository.ClockedRepo) (identity.Interface, identity.Interface, Definition) { id1, err := identity.NewIdentity(repo, "name1", "email1") if err != nil { panic(err) @@ -127,7 +151,7 @@ func makeTestContext() (repository.ClockedRepo, identity.Interface, identity.Int formatVersion: 1, } - return repo, id1, id2, def + return id1, id2, def } type identityResolverFunc func(id entity.Id) (identity.Interface, error) diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 63d7fc3b..d3f5b482 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -58,7 +58,7 @@ func New(definition Definition) *Entity { } } -// Read will read and decode a stored Entity from a repository +// Read will read and decode a stored local Entity from a repository func Read(def Definition, repo repository.ClockedRepo, id entity.Id) (*Entity, error) { if err := id.Validate(); err != nil { return nil, errors.Wrap(err, "invalid id") @@ -69,6 +69,17 @@ func Read(def Definition, repo repository.ClockedRepo, id entity.Id) (*Entity, e return read(def, repo, ref) } +// readRemote will read and decode a stored remote Entity from a repository +func readRemote(def Definition, repo repository.ClockedRepo, remote string, id entity.Id) (*Entity, error) { + if err := id.Validate(); err != nil { + return nil, errors.Wrap(err, "invalid id") + } + + ref := fmt.Sprintf("refs/remotes/%s/%s/%s", def.namespace, remote, id.String()) + + return read(def, repo, ref) +} + // read fetch from git and decode an Entity at an arbitrary git reference. func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, error) { rootHash, err := repo.ResolveRef(ref) @@ -232,6 +243,41 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err }, nil } +type StreamedEntity struct { + Entity *Entity + Err error +} + +// ReadAll read and parse all local Entity +func ReadAll(def Definition, repo repository.ClockedRepo) <-chan StreamedEntity { + out := make(chan StreamedEntity) + + go func() { + defer close(out) + + refPrefix := fmt.Sprintf("refs/%s/", def.namespace) + + refs, err := repo.ListRefs(refPrefix) + if err != nil { + out <- StreamedEntity{Err: err} + return + } + + for _, ref := range refs { + e, err := read(def, repo, ref) + + if err != nil { + out <- StreamedEntity{Err: err} + return + } + + out <- StreamedEntity{Entity: e} + } + }() + + return out +} + // Id return the Entity identifier func (e *Entity) Id() entity.Id { // id is the id of the first operation diff --git a/entity/dag/entity_actions.go b/entity/dag/entity_actions.go index 83ff7ddc..db3a545c 100644 --- a/entity/dag/entity_actions.go +++ b/entity/dag/entity_actions.go @@ -10,8 +10,8 @@ import ( ) // ListLocalIds list all the available local Entity's Id -func ListLocalIds(typename string, repo repository.RepoData) ([]entity.Id, error) { - refs, err := repo.ListRefs(fmt.Sprintf("refs/%s/", typename)) +func ListLocalIds(def Definition, repo repository.RepoData) ([]entity.Id, error) { + refs, err := repo.ListRefs(fmt.Sprintf("refs/%s/", def.namespace)) if err != nil { return nil, err } @@ -75,10 +75,6 @@ func Pull(def Definition, repo repository.ClockedRepo, remote string) error { func MergeAll(def Definition, repo repository.ClockedRepo, remote string) <-chan entity.MergeResult { out := make(chan entity.MergeResult) - // no caching for the merge, we load everything from git even if that means multiple - // copy of the same entity in memory. The cache layer will intercept the results to - // invalidate entities if necessary. - go func() { defer close(out) diff --git a/entity/dag/entity_actions_test.go b/entity/dag/entity_actions_test.go new file mode 100644 index 00000000..6cc544b6 --- /dev/null +++ b/entity/dag/entity_actions_test.go @@ -0,0 +1,110 @@ +package dag + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/MichaelMure/git-bug/identity" + "github.com/MichaelMure/git-bug/repository" +) + +func allEntities(t testing.TB, bugs <-chan StreamedEntity) []*Entity { + var result []*Entity + for streamed := range bugs { + if streamed.Err != nil { + t.Fatal(streamed.Err) + } + result = append(result, streamed.Entity) + } + return result +} + +func TestPushPull(t *testing.T) { + repoA, repoB, remote, id1, id2, def := makeTestContextRemote() + defer repository.CleanupTestRepos(repoA, repoB, remote) + + // distribute the identities + _, err := identity.Push(repoA, "origin") + require.NoError(t, err) + err = identity.Pull(repoB, "origin") + require.NoError(t, err) + + // A --> remote --> B + entity := New(def) + entity.Append(newOp1(id1, "foo")) + + err = entity.Commit(repoA) + require.NoError(t, err) + + _, err = Push(def, repoA, "origin") + require.NoError(t, err) + + err = Pull(def, repoB, "origin") + require.NoError(t, err) + + entities := allEntities(t, ReadAll(def, repoB)) + require.Len(t, entities, 1) + + // B --> remote --> A + entity = New(def) + entity.Append(newOp2(id2, "bar")) + + err = entity.Commit(repoB) + require.NoError(t, err) + + _, err = Push(def, repoB, "origin") + require.NoError(t, err) + + err = Pull(def, repoA, "origin") + require.NoError(t, err) + + entities = allEntities(t, ReadAll(def, repoB)) + require.Len(t, entities, 2) +} + +func TestListLocalIds(t *testing.T) { + repoA, repoB, remote, id1, id2, def := makeTestContextRemote() + defer repository.CleanupTestRepos(repoA, repoB, remote) + + // distribute the identities + _, err := identity.Push(repoA, "origin") + require.NoError(t, err) + err = identity.Pull(repoB, "origin") + require.NoError(t, err) + + // A --> remote --> B + entity := New(def) + entity.Append(newOp1(id1, "foo")) + err = entity.Commit(repoA) + require.NoError(t, err) + + entity = New(def) + entity.Append(newOp2(id2, "bar")) + err = entity.Commit(repoA) + require.NoError(t, err) + + listLocalIds(t, def, repoA, 2) + listLocalIds(t, def, repoB, 0) + + _, err = Push(def, repoA, "origin") + require.NoError(t, err) + + _, err = Fetch(def, repoB, "origin") + require.NoError(t, err) + + listLocalIds(t, def, repoA, 2) + listLocalIds(t, def, repoB, 0) + + err = Pull(def, repoB, "origin") + require.NoError(t, err) + + listLocalIds(t, def, repoA, 2) + listLocalIds(t, def, repoB, 2) +} + +func listLocalIds(t *testing.T, def Definition, repo repository.RepoData, expectedCount int) { + ids, err := ListLocalIds(def, repo) + require.NoError(t, err) + require.Len(t, ids, expectedCount) +} -- cgit From e35c7c4d170d1b682992c95f1c14772158501015 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Fri, 5 Feb 2021 11:18:38 +0100 Subject: entity: more testing and bug fixing --- entity/dag/common_test.go | 28 +++--- entity/dag/entity_actions.go | 12 +-- entity/dag/entity_actions_test.go | 181 ++++++++++++++++++++++++++++++-------- 3 files changed, 164 insertions(+), 57 deletions(-) (limited to 'entity') diff --git a/entity/dag/common_test.go b/entity/dag/common_test.go index b822fc79..05d85898 100644 --- a/entity/dag/common_test.go +++ b/entity/dag/common_test.go @@ -3,6 +3,9 @@ package dag import ( "encoding/json" "fmt" + "testing" + + "github.com/stretchr/testify/require" "github.com/MichaelMure/git-bug/entity" "github.com/MichaelMure/git-bug/identity" @@ -94,23 +97,28 @@ func makeTestContext() (repository.ClockedRepo, identity.Interface, identity.Int return repo, id1, id2, def } -func makeTestContextRemote() (repository.ClockedRepo, repository.ClockedRepo, repository.ClockedRepo, identity.Interface, identity.Interface, Definition) { +func makeTestContextRemote(t *testing.T) (repository.ClockedRepo, repository.ClockedRepo, repository.ClockedRepo, identity.Interface, identity.Interface, Definition) { repoA := repository.CreateGoGitTestRepo(false) repoB := repository.CreateGoGitTestRepo(false) remote := repository.CreateGoGitTestRepo(true) - err := repoA.AddRemote("origin", remote.GetLocalRemote()) - if err != nil { - panic(err) - } - - err = repoB.AddRemote("origin", remote.GetLocalRemote()) - if err != nil { - panic(err) - } + err := repoA.AddRemote("remote", remote.GetLocalRemote()) + require.NoError(t, err) + err = repoA.AddRemote("repoB", repoB.GetLocalRemote()) + require.NoError(t, err) + err = repoB.AddRemote("remote", remote.GetLocalRemote()) + require.NoError(t, err) + err = repoB.AddRemote("repoA", repoA.GetLocalRemote()) + require.NoError(t, err) id1, id2, def := makeTestContextInternal(repoA) + // distribute the identities + _, err = identity.Push(repoA, "remote") + require.NoError(t, err) + err = identity.Pull(repoB, "remote") + require.NoError(t, err) + return repoA, repoB, remote, id1, id2, def } diff --git a/entity/dag/entity_actions.go b/entity/dag/entity_actions.go index db3a545c..edc47d52 100644 --- a/entity/dag/entity_actions.go +++ b/entity/dag/entity_actions.go @@ -21,20 +21,12 @@ func ListLocalIds(def Definition, repo repository.RepoData) ([]entity.Id, error) // Fetch retrieve updates from a remote // This does not change the local entity state func Fetch(def Definition, repo repository.Repo, remote string) (string, error) { - // "refs//*:refs/remotes///*" - fetchRefSpec := fmt.Sprintf("refs/%s/*:refs/remotes/%s/%s/*", - def.namespace, remote, def.namespace) - - return repo.FetchRefs(remote, fetchRefSpec) + return repo.FetchRefs(remote, def.namespace) } // Push update a remote with the local changes func Push(def Definition, repo repository.Repo, remote string) (string, error) { - // "refs//*:refs//*" - refspec := fmt.Sprintf("refs/%s/*:refs/%s/*", - def.namespace, def.namespace) - - return repo.PushRefs(remote, refspec) + return repo.PushRefs(remote, def.namespace) } // Pull will do a Fetch + MergeAll diff --git a/entity/dag/entity_actions_test.go b/entity/dag/entity_actions_test.go index 6cc544b6..d7717056 100644 --- a/entity/dag/entity_actions_test.go +++ b/entity/dag/entity_actions_test.go @@ -1,62 +1,58 @@ package dag import ( + "sort" "testing" "github.com/stretchr/testify/require" - "github.com/MichaelMure/git-bug/identity" + "github.com/MichaelMure/git-bug/entity" "github.com/MichaelMure/git-bug/repository" ) func allEntities(t testing.TB, bugs <-chan StreamedEntity) []*Entity { + t.Helper() + var result []*Entity for streamed := range bugs { - if streamed.Err != nil { - t.Fatal(streamed.Err) - } + require.NoError(t, streamed.Err) + result = append(result, streamed.Entity) } return result } func TestPushPull(t *testing.T) { - repoA, repoB, remote, id1, id2, def := makeTestContextRemote() + repoA, repoB, remote, id1, id2, def := makeTestContextRemote(t) defer repository.CleanupTestRepos(repoA, repoB, remote) - // distribute the identities - _, err := identity.Push(repoA, "origin") - require.NoError(t, err) - err = identity.Pull(repoB, "origin") - require.NoError(t, err) - // A --> remote --> B - entity := New(def) - entity.Append(newOp1(id1, "foo")) + e := New(def) + e.Append(newOp1(id1, "foo")) - err = entity.Commit(repoA) + err := e.Commit(repoA) require.NoError(t, err) - _, err = Push(def, repoA, "origin") + _, err = Push(def, repoA, "remote") require.NoError(t, err) - err = Pull(def, repoB, "origin") + err = Pull(def, repoB, "remote") require.NoError(t, err) entities := allEntities(t, ReadAll(def, repoB)) require.Len(t, entities, 1) // B --> remote --> A - entity = New(def) - entity.Append(newOp2(id2, "bar")) + e = New(def) + e.Append(newOp2(id2, "bar")) - err = entity.Commit(repoB) + err = e.Commit(repoB) require.NoError(t, err) - _, err = Push(def, repoB, "origin") + _, err = Push(def, repoB, "remote") require.NoError(t, err) - err = Pull(def, repoA, "origin") + err = Pull(def, repoA, "remote") require.NoError(t, err) entities = allEntities(t, ReadAll(def, repoB)) @@ -64,39 +60,33 @@ func TestPushPull(t *testing.T) { } func TestListLocalIds(t *testing.T) { - repoA, repoB, remote, id1, id2, def := makeTestContextRemote() + repoA, repoB, remote, id1, id2, def := makeTestContextRemote(t) defer repository.CleanupTestRepos(repoA, repoB, remote) - // distribute the identities - _, err := identity.Push(repoA, "origin") - require.NoError(t, err) - err = identity.Pull(repoB, "origin") - require.NoError(t, err) - // A --> remote --> B - entity := New(def) - entity.Append(newOp1(id1, "foo")) - err = entity.Commit(repoA) + e := New(def) + e.Append(newOp1(id1, "foo")) + err := e.Commit(repoA) require.NoError(t, err) - entity = New(def) - entity.Append(newOp2(id2, "bar")) - err = entity.Commit(repoA) + e = New(def) + e.Append(newOp2(id2, "bar")) + err = e.Commit(repoA) require.NoError(t, err) listLocalIds(t, def, repoA, 2) listLocalIds(t, def, repoB, 0) - _, err = Push(def, repoA, "origin") + _, err = Push(def, repoA, "remote") require.NoError(t, err) - _, err = Fetch(def, repoB, "origin") + _, err = Fetch(def, repoB, "remote") require.NoError(t, err) listLocalIds(t, def, repoA, 2) listLocalIds(t, def, repoB, 0) - err = Pull(def, repoB, "origin") + err = Pull(def, repoB, "remote") require.NoError(t, err) listLocalIds(t, def, repoA, 2) @@ -108,3 +98,120 @@ func listLocalIds(t *testing.T, def Definition, repo repository.RepoData, expect require.NoError(t, err) require.Len(t, ids, expectedCount) } + +func assertMergeResults(t *testing.T, expected []entity.MergeResult, results <-chan entity.MergeResult) { + t.Helper() + + var allResults []entity.MergeResult + for result := range results { + allResults = append(allResults, result) + } + + require.Equal(t, len(expected), len(allResults)) + + sort.Slice(allResults, func(i, j int) bool { + return allResults[i].Id < allResults[j].Id + }) + sort.Slice(expected, func(i, j int) bool { + return expected[i].Id < expected[j].Id + }) + + for i, result := range allResults { + require.NoError(t, result.Err) + + require.Equal(t, expected[i].Id, result.Id) + require.Equal(t, expected[i].Status, result.Status) + + switch result.Status { + case entity.MergeStatusNew, entity.MergeStatusUpdated: + require.NotNil(t, result.Entity) + require.Equal(t, expected[i].Id, result.Entity.Id()) + } + + i++ + } +} + +func TestMerge(t *testing.T) { + repoA, repoB, remote, id1, id2, def := makeTestContextRemote(t) + defer repository.CleanupTestRepos(repoA, repoB, remote) + + // SCENARIO 1 + // if the remote Entity doesn't exist locally, it's created + + // 2 entities in repoA + push to remote + e1 := New(def) + e1.Append(newOp1(id1, "foo")) + err := e1.Commit(repoA) + require.NoError(t, err) + + e2 := New(def) + e2.Append(newOp2(id2, "bar")) + err = e2.Commit(repoA) + require.NoError(t, err) + + _, err = Push(def, repoA, "remote") + require.NoError(t, err) + + // repoB: fetch + merge from remote + + _, err = Fetch(def, repoB, "remote") + require.NoError(t, err) + + results := MergeAll(def, repoB, "remote") + + assertMergeResults(t, []entity.MergeResult{ + { + Id: e1.Id(), + Status: entity.MergeStatusNew, + }, + { + Id: e2.Id(), + Status: entity.MergeStatusNew, + }, + }, results) + + // SCENARIO 2 + // if the remote and local Entity have the same state, nothing is changed + + results = MergeAll(def, repoB, "remote") + + assertMergeResults(t, []entity.MergeResult{ + { + Id: e1.Id(), + Status: entity.MergeStatusNothing, + }, + { + Id: e2.Id(), + Status: entity.MergeStatusNothing, + }, + }, results) + + // SCENARIO 3 + // if the local Entity has new commits but the remote don't, nothing is changed + + e1.Append(newOp1(id1, "barbar")) + err = e1.Commit(repoA) + require.NoError(t, err) + + e2.Append(newOp2(id2, "barbarbar")) + err = e2.Commit(repoA) + require.NoError(t, err) + + results = MergeAll(def, repoA, "remote") + + assertMergeResults(t, []entity.MergeResult{ + { + Id: e1.Id(), + Status: entity.MergeStatusNothing, + }, + { + Id: e2.Id(), + Status: entity.MergeStatusNothing, + }, + }, results) + + // SCENARIO 4 + // if the remote has new commit, the local bug is updated to match the same history + // (fast-forward update) +} -- cgit From 32c55a4985cf897774e508b13c3e63b1935d1470 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 7 Feb 2021 13:52:04 +0100 Subject: entity: use BFS instead of DFS to get the proper topological order --- entity/dag/entity.go | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'entity') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index d3f5b482..273e6ad1 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -87,36 +87,34 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err return nil, err } - // Perform a depth-first search to get a topological order of the DAG where we discover the + // Perform a breadth-first search to get a topological order of the DAG where we discover the // parents commit and go back in time up to the chronological root - stack := make([]repository.Hash, 0, 32) + queue := make([]repository.Hash, 0, 32) visited := make(map[repository.Hash]struct{}) - DFSOrder := make([]repository.Commit, 0, 32) + BFSOrder := make([]repository.Commit, 0, 32) - stack = append(stack, rootHash) + queue = append(queue, rootHash) + visited[rootHash] = struct{}{} - for len(stack) > 0 { + for len(queue) > 0 { // pop - hash := stack[len(stack)-1] - stack = stack[:len(stack)-1] - - if _, ok := visited[hash]; ok { - continue - } - - // mark as visited - visited[hash] = struct{}{} + hash := queue[0] + queue = queue[1:] commit, err := repo.ReadCommit(hash) if err != nil { return nil, err } - DFSOrder = append(DFSOrder, commit) + BFSOrder = append(BFSOrder, commit) for _, parent := range commit.Parents { - stack = append(stack, parent) + if _, ok := visited[parent]; !ok { + queue = append(queue, parent) + // mark as visited + visited[parent] = struct{}{} + } } } @@ -131,9 +129,9 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err var opsCount int // var packClock = lamport.NewMemClock() - for i := len(DFSOrder) - 1; i >= 0; i-- { - commit := DFSOrder[i] - isFirstCommit := i == len(DFSOrder)-1 + for i := len(BFSOrder) - 1; i >= 0; i-- { + commit := BFSOrder[i] + isFirstCommit := i == len(BFSOrder)-1 isMerge := len(commit.Parents) > 1 // Verify DAG structure: single chronological root, so only the root -- cgit From 26a4b0332e0f0a52026ac6e333e0bbd78a588171 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 7 Feb 2021 13:54:03 +0100 Subject: entity: test all merge scenario --- entity/dag/entity_actions.go | 14 +-- entity/dag/entity_actions_test.go | 214 ++++++++++++++++++++++++++++++++++---- 2 files changed, 200 insertions(+), 28 deletions(-) (limited to 'entity') diff --git a/entity/dag/entity_actions.go b/entity/dag/entity_actions.go index edc47d52..fe912557 100644 --- a/entity/dag/entity_actions.go +++ b/entity/dag/entity_actions.go @@ -6,6 +6,7 @@ import ( "github.com/pkg/errors" "github.com/MichaelMure/git-bug/entity" + "github.com/MichaelMure/git-bug/identity" "github.com/MichaelMure/git-bug/repository" ) @@ -31,13 +32,13 @@ func Push(def Definition, repo repository.Repo, remote string) (string, error) { // Pull will do a Fetch + MergeAll // Contrary to MergeAll, this function will return an error if a merge fail. -func Pull(def Definition, repo repository.ClockedRepo, remote string) error { +func Pull(def Definition, repo repository.ClockedRepo, remote string, author identity.Interface) error { _, err := Fetch(def, repo, remote) if err != nil { return err } - for merge := range MergeAll(def, repo, remote) { + for merge := range MergeAll(def, repo, remote, author) { if merge.Err != nil { return merge.Err } @@ -64,7 +65,7 @@ func Pull(def Definition, repo repository.ClockedRepo, remote string) error { // 5. if both local and remote Entity have new commits (that is, we have a concurrent edition), // a merge commit with an empty operationPack is created to join both branch and form a DAG. // --> emit entity.MergeStatusUpdated -func MergeAll(def Definition, repo repository.ClockedRepo, remote string) <-chan entity.MergeResult { +func MergeAll(def Definition, repo repository.ClockedRepo, remote string, author identity.Interface) <-chan entity.MergeResult { out := make(chan entity.MergeResult) go func() { @@ -78,7 +79,7 @@ func MergeAll(def Definition, repo repository.ClockedRepo, remote string) <-chan } for _, remoteRef := range remoteRefs { - out <- merge(def, repo, remoteRef) + out <- merge(def, repo, remoteRef, author) } }() @@ -87,7 +88,7 @@ func MergeAll(def Definition, repo repository.ClockedRepo, remote string) <-chan // merge perform a merge to make sure a local Entity is up to date. // See MergeAll for more details. -func merge(def Definition, repo repository.ClockedRepo, remoteRef string) entity.MergeResult { +func merge(def Definition, repo repository.ClockedRepo, remoteRef string, author identity.Interface) entity.MergeResult { id := entity.RefToId(remoteRef) if err := id.Validate(); err != nil { @@ -153,7 +154,7 @@ func merge(def Definition, repo repository.ClockedRepo, remoteRef string) entity } for _, hash := range localCommits { - if hash == localCommit { + if hash == remoteCommit { return entity.NewMergeNothingStatus(id) } } @@ -215,6 +216,7 @@ func merge(def Definition, repo repository.ClockedRepo, remoteRef string) entity } opp := &operationPack{ + Author: author, Operations: nil, CreateTime: 0, EditTime: editTime, diff --git a/entity/dag/entity_actions_test.go b/entity/dag/entity_actions_test.go index d7717056..78baf41f 100644 --- a/entity/dag/entity_actions_test.go +++ b/entity/dag/entity_actions_test.go @@ -2,6 +2,7 @@ package dag import ( "sort" + "strings" "testing" "github.com/stretchr/testify/require" @@ -36,7 +37,7 @@ func TestPushPull(t *testing.T) { _, err = Push(def, repoA, "remote") require.NoError(t, err) - err = Pull(def, repoB, "remote") + err = Pull(def, repoB, "remote", id1) require.NoError(t, err) entities := allEntities(t, ReadAll(def, repoB)) @@ -52,7 +53,7 @@ func TestPushPull(t *testing.T) { _, err = Push(def, repoB, "remote") require.NoError(t, err) - err = Pull(def, repoA, "remote") + err = Pull(def, repoA, "remote", id1) require.NoError(t, err) entities = allEntities(t, ReadAll(def, repoB)) @@ -86,7 +87,7 @@ func TestListLocalIds(t *testing.T) { listLocalIds(t, def, repoA, 2) listLocalIds(t, def, repoB, 0) - err = Pull(def, repoB, "remote") + err = Pull(def, repoB, "remote", id1) require.NoError(t, err) listLocalIds(t, def, repoA, 2) @@ -132,6 +133,78 @@ func assertMergeResults(t *testing.T, expected []entity.MergeResult, results <-c } } +func assertEqualRefs(t *testing.T, repoA, repoB repository.RepoData, prefix string) { + t.Helper() + + refsA, err := repoA.ListRefs("") + require.NoError(t, err) + + var refsAFiltered []string + for _, ref := range refsA { + if strings.HasPrefix(ref, prefix) { + refsAFiltered = append(refsAFiltered, ref) + } + } + + refsB, err := repoB.ListRefs("") + require.NoError(t, err) + + var refsBFiltered []string + for _, ref := range refsB { + if strings.HasPrefix(ref, prefix) { + refsBFiltered = append(refsBFiltered, ref) + } + } + + require.NotEmpty(t, refsAFiltered) + require.Equal(t, refsAFiltered, refsBFiltered) + + for _, ref := range refsAFiltered { + commitA, err := repoA.ResolveRef(ref) + require.NoError(t, err) + commitB, err := repoB.ResolveRef(ref) + require.NoError(t, err) + + require.Equal(t, commitA, commitB) + } +} + +func assertNotEqualRefs(t *testing.T, repoA, repoB repository.RepoData, prefix string) { + t.Helper() + + refsA, err := repoA.ListRefs("") + require.NoError(t, err) + + var refsAFiltered []string + for _, ref := range refsA { + if strings.HasPrefix(ref, prefix) { + refsAFiltered = append(refsAFiltered, ref) + } + } + + refsB, err := repoB.ListRefs("") + require.NoError(t, err) + + var refsBFiltered []string + for _, ref := range refsB { + if strings.HasPrefix(ref, prefix) { + refsBFiltered = append(refsBFiltered, ref) + } + } + + require.NotEmpty(t, refsAFiltered) + require.Equal(t, refsAFiltered, refsBFiltered) + + for _, ref := range refsAFiltered { + commitA, err := repoA.ResolveRef(ref) + require.NoError(t, err) + commitB, err := repoB.ResolveRef(ref) + require.NoError(t, err) + + require.NotEqual(t, commitA, commitB) + } +} + func TestMerge(t *testing.T) { repoA, repoB, remote, id1, id2, def := makeTestContextRemote(t) defer repository.CleanupTestRepos(repoA, repoB, remote) @@ -140,14 +213,14 @@ func TestMerge(t *testing.T) { // if the remote Entity doesn't exist locally, it's created // 2 entities in repoA + push to remote - e1 := New(def) - e1.Append(newOp1(id1, "foo")) - err := e1.Commit(repoA) + e1A := New(def) + e1A.Append(newOp1(id1, "foo")) + err := e1A.Commit(repoA) require.NoError(t, err) - e2 := New(def) - e2.Append(newOp2(id2, "bar")) - err = e2.Commit(repoA) + e2A := New(def) + e2A.Append(newOp2(id2, "bar")) + err = e2A.Commit(repoA) require.NoError(t, err) _, err = Push(def, repoA, "remote") @@ -158,60 +231,157 @@ func TestMerge(t *testing.T) { _, err = Fetch(def, repoB, "remote") require.NoError(t, err) - results := MergeAll(def, repoB, "remote") + results := MergeAll(def, repoB, "remote", id1) assertMergeResults(t, []entity.MergeResult{ { - Id: e1.Id(), + Id: e1A.Id(), Status: entity.MergeStatusNew, }, { - Id: e2.Id(), + Id: e2A.Id(), Status: entity.MergeStatusNew, }, }, results) + assertEqualRefs(t, repoA, repoB, "refs/"+def.namespace) + // SCENARIO 2 // if the remote and local Entity have the same state, nothing is changed - results = MergeAll(def, repoB, "remote") + results = MergeAll(def, repoB, "remote", id1) assertMergeResults(t, []entity.MergeResult{ { - Id: e1.Id(), + Id: e1A.Id(), Status: entity.MergeStatusNothing, }, { - Id: e2.Id(), + Id: e2A.Id(), Status: entity.MergeStatusNothing, }, }, results) + assertEqualRefs(t, repoA, repoB, "refs/"+def.namespace) + // SCENARIO 3 // if the local Entity has new commits but the remote don't, nothing is changed - e1.Append(newOp1(id1, "barbar")) - err = e1.Commit(repoA) + e1A.Append(newOp1(id1, "barbar")) + err = e1A.Commit(repoA) require.NoError(t, err) - e2.Append(newOp2(id2, "barbarbar")) - err = e2.Commit(repoA) + e2A.Append(newOp2(id2, "barbarbar")) + err = e2A.Commit(repoA) require.NoError(t, err) - results = MergeAll(def, repoA, "remote") + results = MergeAll(def, repoA, "remote", id1) assertMergeResults(t, []entity.MergeResult{ { - Id: e1.Id(), + Id: e1A.Id(), Status: entity.MergeStatusNothing, }, { - Id: e2.Id(), + Id: e2A.Id(), Status: entity.MergeStatusNothing, }, }, results) + assertNotEqualRefs(t, repoA, repoB, "refs/"+def.namespace) + // SCENARIO 4 // if the remote has new commit, the local bug is updated to match the same history // (fast-forward update) + + _, err = Push(def, repoA, "remote") + require.NoError(t, err) + + _, err = Fetch(def, repoB, "remote") + require.NoError(t, err) + + results = MergeAll(def, repoB, "remote", id1) + + assertMergeResults(t, []entity.MergeResult{ + { + Id: e1A.Id(), + Status: entity.MergeStatusUpdated, + }, + { + Id: e2A.Id(), + Status: entity.MergeStatusUpdated, + }, + }, results) + + assertEqualRefs(t, repoA, repoB, "refs/"+def.namespace) + + // SCENARIO 5 + // if both local and remote Entity have new commits (that is, we have a concurrent edition), + // a merge commit with an empty operationPack is created to join both branch and form a DAG. + + e1A.Append(newOp1(id1, "barbarfoo")) + err = e1A.Commit(repoA) + require.NoError(t, err) + + e2A.Append(newOp2(id2, "barbarbarfoo")) + err = e2A.Commit(repoA) + require.NoError(t, err) + + e1B, err := Read(def, repoB, e1A.Id()) + require.NoError(t, err) + + e2B, err := Read(def, repoB, e2A.Id()) + require.NoError(t, err) + + e1B.Append(newOp1(id1, "barbarfoofoo")) + err = e1B.Commit(repoB) + require.NoError(t, err) + + e2B.Append(newOp2(id2, "barbarbarfoofoo")) + err = e2B.Commit(repoB) + require.NoError(t, err) + + _, err = Push(def, repoA, "remote") + require.NoError(t, err) + + _, err = Fetch(def, repoB, "remote") + require.NoError(t, err) + + results = MergeAll(def, repoB, "remote", id1) + + assertMergeResults(t, []entity.MergeResult{ + { + Id: e1A.Id(), + Status: entity.MergeStatusUpdated, + }, + { + Id: e2A.Id(), + Status: entity.MergeStatusUpdated, + }, + }, results) + + assertNotEqualRefs(t, repoA, repoB, "refs/"+def.namespace) + + _, err = Push(def, repoB, "remote") + require.NoError(t, err) + + _, err = Fetch(def, repoA, "remote") + require.NoError(t, err) + + results = MergeAll(def, repoA, "remote", id1) + + assertMergeResults(t, []entity.MergeResult{ + { + Id: e1A.Id(), + Status: entity.MergeStatusUpdated, + }, + { + Id: e2A.Id(), + Status: entity.MergeStatusUpdated, + }, + }, results) + + // make sure that the graphs become stable over multiple repo, due to the + // fast-forward + assertEqualRefs(t, repoA, repoB, "refs/"+def.namespace) } -- cgit From 2bdb1b60ff83de157f1a0d9ed42555d96b945fa6 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Tue, 9 Feb 2021 10:46:33 +0100 Subject: entity: working commit signatures --- entity/dag/operation_pack_test.go | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'entity') diff --git a/entity/dag/operation_pack_test.go b/entity/dag/operation_pack_test.go index ad2a9859..ac979776 100644 --- a/entity/dag/operation_pack_test.go +++ b/entity/dag/operation_pack_test.go @@ -4,6 +4,8 @@ import ( "testing" "github.com/stretchr/testify/require" + + "github.com/MichaelMure/git-bug/identity" ) func TestOperationPackReadWrite(t *testing.T) { @@ -42,3 +44,45 @@ func TestOperationPackReadWrite(t *testing.T) { } require.Equal(t, opp.Id(), opp3.Id()) } + +func TestOperationPackSignedReadWrite(t *testing.T) { + repo, id1, _, def := makeTestContext() + + err := id1.(*identity.Identity).Mutate(repo, func(orig *identity.Mutator) { + orig.Keys = append(orig.Keys, identity.GenerateKey()) + }) + require.NoError(t, err) + + opp := &operationPack{ + Author: id1, + Operations: []Operation{ + newOp1(id1, "foo"), + newOp2(id1, "bar"), + }, + CreateTime: 123, + EditTime: 456, + } + + commitHash, err := opp.Write(def, repo) + require.NoError(t, err) + + commit, err := repo.ReadCommit(commitHash) + require.NoError(t, err) + + opp2, err := readOperationPack(def, repo, commit) + require.NoError(t, err) + + require.Equal(t, opp, opp2) + + // make sure we get the same Id with the same data + opp3 := &operationPack{ + Author: id1, + Operations: []Operation{ + newOp1(id1, "foo"), + newOp2(id1, "bar"), + }, + CreateTime: 123, + EditTime: 456, + } + require.Equal(t, opp.Id(), opp3.Id()) +} -- cgit From f74166914c344329f08823770982f12966c79a77 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Tue, 9 Feb 2021 15:47:07 +0100 Subject: entity: remove the pack lamport time that doesn't bring anything actually --- entity/dag/entity.go | 32 +++-------------------- entity/dag/entity_actions.go | 13 ---------- entity/dag/entity_test.go | 60 -------------------------------------------- entity/dag/operation_pack.go | 17 ------------- 4 files changed, 4 insertions(+), 118 deletions(-) (limited to 'entity') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 273e6ad1..3f4dfcb4 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -44,9 +44,6 @@ type Entity struct { // TODO: add here createTime and editTime - // // TODO: doesn't seems to actually be useful over the topological sort ? Timestamp can be generated from graph depth - // // TODO: maybe EditTime is better because it could spread ops in consecutive groups on the logical timeline --> avoid interleaving - // packClock lamport.Clock lastCommit repository.Hash } @@ -54,7 +51,6 @@ type Entity struct { func New(definition Definition) *Entity { return &Entity{ Definition: definition, - // packClock: lamport.NewMemClock(), } } @@ -127,7 +123,6 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err oppMap := make(map[repository.Hash]*operationPack) var opsCount int - // var packClock = lamport.NewMemClock() for i := len(BFSOrder) - 1; i >= 0; i-- { commit := BFSOrder[i] @@ -174,8 +169,6 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err if !isMerge && opp.EditTime-parentPack.EditTime > 1_000_000 { return nil, fmt.Errorf("lamport clock jumping too far in the future, likely an attack") } - - // TODO: PackTime is not checked } oppMap[commit.Hash] = opp @@ -192,10 +185,6 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err if err != nil { return nil, err } - // err = packClock.Witness(opp.PackTime) - // if err != nil { - // return nil, err - // } } // Now that we know that the topological order and clocks are fine, we order the operationPacks @@ -206,19 +195,13 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err oppSlice = append(oppSlice, pack) } sort.Slice(oppSlice, func(i, j int) bool { - // Primary ordering with the dedicated "pack" Lamport time that encode causality - // within the entity - // if oppSlice[i].PackTime != oppSlice[j].PackTime { - // return oppSlice[i].PackTime < oppSlice[i].PackTime - // } - // We have equal PackTime, which means we had a concurrent edition. We can't tell which exactly - // came first. As a secondary arbitrary ordering, we can use the EditTime. It's unlikely to be - // enough but it can give us an edge to approach what really happened. + // Primary ordering with the EditTime. if oppSlice[i].EditTime != oppSlice[j].EditTime { return oppSlice[i].EditTime < oppSlice[j].EditTime } - // Well, what now? We still need a total ordering and the most stable possible. - // As a last resort, we can order based on a hash of the serialized Operations in the + // We have equal EditTime, which means we have concurrent edition over different machines and we + // can't tell which one came first. So, what now? We still need a total ordering and the most stable possible. + // As a secondary ordering, we can order based on a hash of the serialized Operations in the // operationPack. It doesn't carry much meaning but it's unbiased and hard to abuse. // This is a lexicographic ordering on the stringified ID. return oppSlice[i].Id() < oppSlice[j].Id() @@ -236,7 +219,6 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err return &Entity{ Definition: def, ops: ops, - // packClock: packClock, lastCommit: rootHash, }, nil } @@ -379,11 +361,6 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { author = op.Author() } - // increment the various clocks for this new operationPack - // packTime, err := e.packClock.Increment() - // if err != nil { - // return err - // } editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) if err != nil { return err @@ -401,7 +378,6 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { Operations: e.staging, CreateTime: creationTime, EditTime: editTime, - // PackTime: packTime, } var commitHash repository.Hash diff --git a/entity/dag/entity_actions.go b/entity/dag/entity_actions.go index fe912557..6f6fe45c 100644 --- a/entity/dag/entity_actions.go +++ b/entity/dag/entity_actions.go @@ -199,17 +199,6 @@ func merge(def Definition, repo repository.ClockedRepo, remoteRef string, author return entity.NewMergeError(err, id) } - // TODO: pack clock - // err = localEntity.packClock.Witness(remoteEntity.packClock.Time()) - // if err != nil { - // return entity.NewMergeError(err, id) - // } - // - // packTime, err := localEntity.packClock.Increment() - // if err != nil { - // return entity.NewMergeError(err, id) - // } - editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, def.namespace)) if err != nil { return entity.NewMergeError(err, id) @@ -220,8 +209,6 @@ func merge(def Definition, repo repository.ClockedRepo, remoteRef string, author Operations: nil, CreateTime: 0, EditTime: editTime, - // TODO: pack clock - // PackTime: packTime, } commitHash, err := opp.Write(def, repo, localCommit, remoteCommit) diff --git a/entity/dag/entity_test.go b/entity/dag/entity_test.go index c5c83567..012c87aa 100644 --- a/entity/dag/entity_test.go +++ b/entity/dag/entity_test.go @@ -55,63 +55,3 @@ func assertEqualEntities(t *testing.T, a, b *Entity) { require.Equal(t, a, b) } - -// // Merge -// -// merge1 := makeCommit(t, repo) -// merge1 = makeCommit(t, repo, merge1) -// err = repo.UpdateRef("merge1", merge1) -// require.NoError(t, err) -// -// err = repo.UpdateRef("merge2", merge1) -// require.NoError(t, err) -// -// // identical merge -// err = repo.MergeRef("merge1", "merge2") -// require.NoError(t, err) -// -// refMerge1, err := repo.ResolveRef("merge1") -// require.NoError(t, err) -// require.Equal(t, merge1, refMerge1) -// refMerge2, err := repo.ResolveRef("merge2") -// require.NoError(t, err) -// require.Equal(t, merge1, refMerge2) -// -// // fast-forward merge -// merge2 := makeCommit(t, repo, merge1) -// merge2 = makeCommit(t, repo, merge2) -// -// err = repo.UpdateRef("merge2", merge2) -// require.NoError(t, err) -// -// err = repo.MergeRef("merge1", "merge2") -// require.NoError(t, err) -// -// refMerge1, err = repo.ResolveRef("merge1") -// require.NoError(t, err) -// require.Equal(t, merge2, refMerge1) -// refMerge2, err = repo.ResolveRef("merge2") -// require.NoError(t, err) -// require.Equal(t, merge2, refMerge2) -// -// // merge commit -// merge1 = makeCommit(t, repo, merge1) -// err = repo.UpdateRef("merge1", merge1) -// require.NoError(t, err) -// -// merge2 = makeCommit(t, repo, merge2) -// err = repo.UpdateRef("merge2", merge2) -// require.NoError(t, err) -// -// err = repo.MergeRef("merge1", "merge2") -// require.NoError(t, err) -// -// refMerge1, err = repo.ResolveRef("merge1") -// require.NoError(t, err) -// require.NotEqual(t, merge1, refMerge1) -// commitRefMerge1, err := repo.ReadCommit(refMerge1) -// require.NoError(t, err) -// require.ElementsMatch(t, commitRefMerge1.Parents, []Hash{merge1, merge2}) -// refMerge2, err = repo.ResolveRef("merge2") -// require.NoError(t, err) -// require.Equal(t, merge2, refMerge2) diff --git a/entity/dag/operation_pack.go b/entity/dag/operation_pack.go index ebacdbd9..959b1ae0 100644 --- a/entity/dag/operation_pack.go +++ b/entity/dag/operation_pack.go @@ -22,7 +22,6 @@ const opsEntryName = "ops" const versionEntryPrefix = "version-" const createClockEntryPrefix = "create-clock-" const editClockEntryPrefix = "edit-clock-" -const packClockEntryPrefix = "pack-clock-" // operationPack is a wrapper structure to store multiple operations in a single git blob. // Additionally, it holds and store the metadata for those operations. @@ -40,9 +39,6 @@ type operationPack struct { // Encode the entity's logical time of last edition across all entities of the same type. // Exist on all operationPack EditTime lamport.Time - // // Encode the operationPack's logical time of creation withing this entity. - // // Exist on all operationPack - // PackTime lamport.Time } func (opp *operationPack) Id() entity.Id { @@ -129,8 +125,6 @@ func (opp *operationPack) Write(def Definition, repo repository.Repo, parentComm Name: opsEntryName}, {ObjectType: repository.Blob, Hash: emptyBlobHash, Name: fmt.Sprintf(editClockEntryPrefix+"%d", opp.EditTime)}, - // {ObjectType: repository.Blob, Hash: emptyBlobHash, - // Name: fmt.Sprintf(packClockEntryPrefix+"%d", opp.PackTime)}, } if opp.CreateTime > 0 { tree = append(tree, repository.TreeEntry{ @@ -205,7 +199,6 @@ func readOperationPack(def Definition, repo repository.RepoData, commit reposito var ops []Operation var createTime lamport.Time var editTime lamport.Time - // var packTime lamport.Time for _, entry := range entries { switch { @@ -233,15 +226,6 @@ func readOperationPack(def Definition, repo repository.RepoData, commit reposito return nil, errors.Wrap(err, "can't read edit lamport time") } editTime = lamport.Time(v) - - // case strings.HasPrefix(entry.Name, packClockEntryPrefix): - // found &= 1 << 3 - // - // v, err := strconv.ParseUint(strings.TrimPrefix(entry.Name, packClockEntryPrefix), 10, 64) - // if err != nil { - // return nil, errors.Wrap(err, "can't read pack lamport time") - // } - // packTime = lamport.Time(v) } } @@ -261,7 +245,6 @@ func readOperationPack(def Definition, repo repository.RepoData, commit reposito Operations: ops, CreateTime: createTime, EditTime: editTime, - // PackTime: packTime, }, nil } -- cgit From ef05c15f87468e0f4f1c688b0b9359cee2181c68 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Wed, 10 Feb 2021 18:22:21 +0100 Subject: entity: implement remove --- entity/dag/entity_actions.go | 28 ++++++++++++++++++++++++++-- entity/dag/entity_actions_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) (limited to 'entity') diff --git a/entity/dag/entity_actions.go b/entity/dag/entity_actions.go index 6f6fe45c..fa50473c 100644 --- a/entity/dag/entity_actions.go +++ b/entity/dag/entity_actions.go @@ -228,6 +228,30 @@ func merge(def Definition, repo repository.ClockedRepo, remoteRef string, author return entity.NewMergeUpdatedStatus(id, localEntity) } -func Remove() error { - panic("") +// Remove delete an Entity. +// Remove is idempotent. +func Remove(def Definition, repo repository.ClockedRepo, id entity.Id) error { + var matches []string + + ref := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) + matches = append(matches, ref) + + remotes, err := repo.GetRemotes() + if err != nil { + return err + } + + for remote := range remotes { + ref = fmt.Sprintf("refs/remotes/%s/%s/%s", remote, def.namespace, id.String()) + matches = append(matches, ref) + } + + for _, ref = range matches { + err = repo.RemoveRef(ref) + if err != nil { + return err + } + } + + return nil } diff --git a/entity/dag/entity_actions_test.go b/entity/dag/entity_actions_test.go index 78baf41f..79afe525 100644 --- a/entity/dag/entity_actions_test.go +++ b/entity/dag/entity_actions_test.go @@ -385,3 +385,28 @@ func TestMerge(t *testing.T) { // fast-forward assertEqualRefs(t, repoA, repoB, "refs/"+def.namespace) } + +func TestRemove(t *testing.T) { + repoA, repoB, remote, id1, _, def := makeTestContextRemote(t) + defer repository.CleanupTestRepos(repoA, repoB, remote) + + e := New(def) + e.Append(newOp1(id1, "foo")) + require.NoError(t, e.Commit(repoA)) + + _, err := Push(def, repoA, "remote") + require.NoError(t, err) + + err = Remove(def, repoA, e.Id()) + require.NoError(t, err) + + _, err = Read(def, repoA, e.Id()) + require.Error(t, err) + + _, err = readRemote(def, repoA, "remote", e.Id()) + require.Error(t, err) + + // Remove is idempotent + err = Remove(def, repoA, e.Id()) + require.NoError(t, err) +} -- cgit From 59e9981161acea461a3ef9d386f20e23e78d8433 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Thu, 11 Feb 2021 09:51:32 +0100 Subject: entity: expose create and edit lamport clocks --- entity/dag/entity.go | 51 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 13 deletions(-) (limited to 'entity') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 3f4dfcb4..d92b386e 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -35,6 +35,12 @@ type Definition struct { // Entity is a data structure stored in a chain of git objects, supporting actions like Push, Pull and Merge. type Entity struct { + // A Lamport clock is a logical clock that allow to order event + // inside a distributed system. + // It must be the first field in this struct due to https://github.com/golang/go/issues/36606 + createTime lamport.Time + editTime lamport.Time + Definition // operations that are already stored in the repository @@ -42,8 +48,6 @@ type Entity struct { // operations not yet stored in the repository staging []Operation - // TODO: add here createTime and editTime - lastCommit repository.Hash } @@ -210,16 +214,26 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err // Now that we ordered the operationPacks, we have the order of the Operations ops := make([]Operation, 0, opsCount) + var createTime lamport.Time + var editTime lamport.Time for _, pack := range oppSlice { for _, operation := range pack.Operations { ops = append(ops, operation) } + if pack.CreateTime > createTime { + createTime = pack.CreateTime + } + if pack.EditTime > editTime { + editTime = pack.EditTime + } } return &Entity{ Definition: def, ops: ops, lastCommit: rootHash, + createTime: createTime, + editTime: editTime, }, nil } @@ -349,7 +363,8 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { return fmt.Errorf("can't commit an entity with no pending operation") } - if err := e.Validate(); err != nil { + err := e.Validate() + if err != nil { return errors.Wrapf(err, "can't commit a %s with invalid data", e.Definition.typename) } @@ -361,23 +376,23 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { author = op.Author() } - editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) + e.editTime, err = repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) if err != nil { return err } - var creationTime lamport.Time - if e.lastCommit == "" { - creationTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.namespace)) - if err != nil { - return err - } - } opp := &operationPack{ Author: author, Operations: e.staging, - CreateTime: creationTime, - EditTime: editTime, + EditTime: e.editTime, + } + + if e.lastCommit == "" { + e.createTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.namespace)) + if err != nil { + return err + } + opp.CreateTime = e.createTime } var commitHash repository.Hash @@ -401,3 +416,13 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { ref := fmt.Sprintf(refsPattern, e.namespace, e.Id().String()) return repo.UpdateRef(ref, commitHash) } + +// CreateLamportTime return the Lamport time of creation +func (e *Entity) CreateLamportTime() lamport.Time { + return e.createTime +} + +// EditLamportTime return the Lamport time of the last edition +func (e *Entity) EditLamportTime() lamport.Time { + return e.editTime +} -- cgit From 71e22d9f6e49ce0c3bc3b177323b17652a1c45a2 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Thu, 11 Feb 2021 09:52:09 +0100 Subject: entity: clock loader --- entity/dag/clock.go | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 entity/dag/clock.go (limited to 'entity') diff --git a/entity/dag/clock.go b/entity/dag/clock.go new file mode 100644 index 00000000..fa944b33 --- /dev/null +++ b/entity/dag/clock.go @@ -0,0 +1,41 @@ +package dag + +import ( + "fmt" + + "github.com/MichaelMure/git-bug/identity" + "github.com/MichaelMure/git-bug/repository" +) + +// ClockLoader is the repository.ClockLoader for Entity +func ClockLoader(defs ...Definition) repository.ClockLoader { + clocks := make([]string, len(defs)*2) + for _, def := range defs { + clocks = append(clocks, fmt.Sprintf(creationClockPattern, def.namespace)) + clocks = append(clocks, fmt.Sprintf(editClockPattern, def.namespace)) + } + + return repository.ClockLoader{ + Clocks: clocks, + Witnesser: func(repo repository.ClockedRepo) error { + // We don't care about the actual identity so an IdentityStub will do + resolver := identity.NewStubResolver() + + for _, def := range defs { + // override the resolver + def := def + def.identityResolver = resolver + + // we actually just need to read all entities, + // as that will create and update the clocks + // TODO: concurrent loading to be faster? + for b := range ReadAll(def, repo) { + if b.Err != nil { + return b.Err + } + } + } + return nil + }, + } +} -- cgit From 94f06cd54defa73f5e8b79345597279e454c78e6 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Feb 2021 10:02:01 +0100 Subject: entity: pass the identity resolver instead of defining it once Having the resolver in Definition doesn't actually work well as the resolver is very situational. --- entity/dag/clock.go | 6 +----- entity/dag/common_test.go | 17 ++++++++--------- entity/dag/entity.go | 18 ++++++++---------- entity/dag/entity_actions.go | 17 ++++++++++------- entity/dag/entity_actions_test.go | 36 ++++++++++++++++++------------------ entity/dag/operation_pack.go | 8 ++++---- entity/dag/operation_pack_test.go | 8 ++++---- 7 files changed, 53 insertions(+), 57 deletions(-) (limited to 'entity') diff --git a/entity/dag/clock.go b/entity/dag/clock.go index fa944b33..c9d2b94b 100644 --- a/entity/dag/clock.go +++ b/entity/dag/clock.go @@ -22,14 +22,10 @@ func ClockLoader(defs ...Definition) repository.ClockLoader { resolver := identity.NewStubResolver() for _, def := range defs { - // override the resolver - def := def - def.identityResolver = resolver - // we actually just need to read all entities, // as that will create and update the clocks // TODO: concurrent loading to be faster? - for b := range ReadAll(def, repo) { + for b := range ReadAll(def, repo, resolver) { if b.Err != nil { return b.Err } diff --git a/entity/dag/common_test.go b/entity/dag/common_test.go index 05d85898..0ddbca47 100644 --- a/entity/dag/common_test.go +++ b/entity/dag/common_test.go @@ -91,13 +91,13 @@ func unmarshaler(author identity.Interface, raw json.RawMessage) (Operation, err Identities + repo + definition */ -func makeTestContext() (repository.ClockedRepo, identity.Interface, identity.Interface, Definition) { +func makeTestContext() (repository.ClockedRepo, identity.Interface, identity.Interface, identity.Resolver, Definition) { repo := repository.NewMockRepo() - id1, id2, def := makeTestContextInternal(repo) - return repo, id1, id2, def + id1, id2, resolver, def := makeTestContextInternal(repo) + return repo, id1, id2, resolver, def } -func makeTestContextRemote(t *testing.T) (repository.ClockedRepo, repository.ClockedRepo, repository.ClockedRepo, identity.Interface, identity.Interface, Definition) { +func makeTestContextRemote(t *testing.T) (repository.ClockedRepo, repository.ClockedRepo, repository.ClockedRepo, identity.Interface, identity.Interface, identity.Resolver, Definition) { repoA := repository.CreateGoGitTestRepo(false) repoB := repository.CreateGoGitTestRepo(false) remote := repository.CreateGoGitTestRepo(true) @@ -111,7 +111,7 @@ func makeTestContextRemote(t *testing.T) (repository.ClockedRepo, repository.Clo err = repoB.AddRemote("repoA", repoA.GetLocalRemote()) require.NoError(t, err) - id1, id2, def := makeTestContextInternal(repoA) + id1, id2, resolver, def := makeTestContextInternal(repoA) // distribute the identities _, err = identity.Push(repoA, "remote") @@ -119,10 +119,10 @@ func makeTestContextRemote(t *testing.T) (repository.ClockedRepo, repository.Clo err = identity.Pull(repoB, "remote") require.NoError(t, err) - return repoA, repoB, remote, id1, id2, def + return repoA, repoB, remote, id1, id2, resolver, def } -func makeTestContextInternal(repo repository.ClockedRepo) (identity.Interface, identity.Interface, Definition) { +func makeTestContextInternal(repo repository.ClockedRepo) (identity.Interface, identity.Interface, identity.Resolver, Definition) { id1, err := identity.NewIdentity(repo, "name1", "email1") if err != nil { panic(err) @@ -155,11 +155,10 @@ func makeTestContextInternal(repo repository.ClockedRepo) (identity.Interface, i typename: "foo", namespace: "foos", operationUnmarshaler: unmarshaler, - identityResolver: resolver, formatVersion: 1, } - return id1, id2, def + return id1, id2, resolver, def } type identityResolverFunc func(id entity.Id) (identity.Interface, error) diff --git a/entity/dag/entity.go b/entity/dag/entity.go index d92b386e..09576d28 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -27,8 +27,6 @@ type Definition struct { namespace string // a function decoding a JSON message into an Operation operationUnmarshaler func(author identity.Interface, raw json.RawMessage) (Operation, error) - // a function loading an identity.Identity from its Id - identityResolver identity.Resolver // the expected format version number, that can be used for data migration/upgrade formatVersion uint } @@ -59,29 +57,29 @@ func New(definition Definition) *Entity { } // Read will read and decode a stored local Entity from a repository -func Read(def Definition, repo repository.ClockedRepo, id entity.Id) (*Entity, error) { +func Read(def Definition, repo repository.ClockedRepo, resolver identity.Resolver, id entity.Id) (*Entity, error) { if err := id.Validate(); err != nil { return nil, errors.Wrap(err, "invalid id") } ref := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) - return read(def, repo, ref) + return read(def, repo, resolver, ref) } // readRemote will read and decode a stored remote Entity from a repository -func readRemote(def Definition, repo repository.ClockedRepo, remote string, id entity.Id) (*Entity, error) { +func readRemote(def Definition, repo repository.ClockedRepo, resolver identity.Resolver, remote string, id entity.Id) (*Entity, error) { if err := id.Validate(); err != nil { return nil, errors.Wrap(err, "invalid id") } ref := fmt.Sprintf("refs/remotes/%s/%s/%s", def.namespace, remote, id.String()) - return read(def, repo, ref) + return read(def, repo, resolver, ref) } // read fetch from git and decode an Entity at an arbitrary git reference. -func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, error) { +func read(def Definition, repo repository.ClockedRepo, resolver identity.Resolver, ref string) (*Entity, error) { rootHash, err := repo.ResolveRef(ref) if err != nil { return nil, err @@ -140,7 +138,7 @@ func read(def Definition, repo repository.ClockedRepo, ref string) (*Entity, err return nil, fmt.Errorf("multiple leafs in the entity DAG") } - opp, err := readOperationPack(def, repo, commit) + opp, err := readOperationPack(def, repo, resolver, commit) if err != nil { return nil, err } @@ -243,7 +241,7 @@ type StreamedEntity struct { } // ReadAll read and parse all local Entity -func ReadAll(def Definition, repo repository.ClockedRepo) <-chan StreamedEntity { +func ReadAll(def Definition, repo repository.ClockedRepo, resolver identity.Resolver) <-chan StreamedEntity { out := make(chan StreamedEntity) go func() { @@ -258,7 +256,7 @@ func ReadAll(def Definition, repo repository.ClockedRepo) <-chan StreamedEntity } for _, ref := range refs { - e, err := read(def, repo, ref) + e, err := read(def, repo, resolver, ref) if err != nil { out <- StreamedEntity{Err: err} diff --git a/entity/dag/entity_actions.go b/entity/dag/entity_actions.go index fa50473c..707c93aa 100644 --- a/entity/dag/entity_actions.go +++ b/entity/dag/entity_actions.go @@ -32,13 +32,13 @@ func Push(def Definition, repo repository.Repo, remote string) (string, error) { // Pull will do a Fetch + MergeAll // Contrary to MergeAll, this function will return an error if a merge fail. -func Pull(def Definition, repo repository.ClockedRepo, remote string, author identity.Interface) error { +func Pull(def Definition, repo repository.ClockedRepo, resolver identity.Resolver, remote string, author identity.Interface) error { _, err := Fetch(def, repo, remote) if err != nil { return err } - for merge := range MergeAll(def, repo, remote, author) { + for merge := range MergeAll(def, repo, resolver, remote, author) { if merge.Err != nil { return merge.Err } @@ -65,7 +65,10 @@ func Pull(def Definition, repo repository.ClockedRepo, remote string, author ide // 5. if both local and remote Entity have new commits (that is, we have a concurrent edition), // a merge commit with an empty operationPack is created to join both branch and form a DAG. // --> emit entity.MergeStatusUpdated -func MergeAll(def Definition, repo repository.ClockedRepo, remote string, author identity.Interface) <-chan entity.MergeResult { +// +// Note: an author is necessary for the case where a merge commit is created, as this commit will +// have an author and may be signed if a signing key is available. +func MergeAll(def Definition, repo repository.ClockedRepo, resolver identity.Resolver, remote string, author identity.Interface) <-chan entity.MergeResult { out := make(chan entity.MergeResult) go func() { @@ -79,7 +82,7 @@ func MergeAll(def Definition, repo repository.ClockedRepo, remote string, author } for _, remoteRef := range remoteRefs { - out <- merge(def, repo, remoteRef, author) + out <- merge(def, repo, resolver, remoteRef, author) } }() @@ -88,14 +91,14 @@ func MergeAll(def Definition, repo repository.ClockedRepo, remote string, author // merge perform a merge to make sure a local Entity is up to date. // See MergeAll for more details. -func merge(def Definition, repo repository.ClockedRepo, remoteRef string, author identity.Interface) entity.MergeResult { +func merge(def Definition, repo repository.ClockedRepo, resolver identity.Resolver, remoteRef string, author identity.Interface) entity.MergeResult { id := entity.RefToId(remoteRef) if err := id.Validate(); err != nil { return entity.NewMergeInvalidStatus(id, errors.Wrap(err, "invalid ref").Error()) } - remoteEntity, err := read(def, repo, remoteRef) + remoteEntity, err := read(def, repo, resolver, remoteRef) if err != nil { return entity.NewMergeInvalidStatus(id, errors.Wrapf(err, "remote %s is not readable", def.typename).Error()) @@ -194,7 +197,7 @@ func merge(def Definition, repo repository.ClockedRepo, remoteRef string, author // an empty operationPack. // First step is to collect those clocks. - localEntity, err := read(def, repo, localRef) + localEntity, err := read(def, repo, resolver, localRef) if err != nil { return entity.NewMergeError(err, id) } diff --git a/entity/dag/entity_actions_test.go b/entity/dag/entity_actions_test.go index 79afe525..848d6468 100644 --- a/entity/dag/entity_actions_test.go +++ b/entity/dag/entity_actions_test.go @@ -24,7 +24,7 @@ func allEntities(t testing.TB, bugs <-chan StreamedEntity) []*Entity { } func TestPushPull(t *testing.T) { - repoA, repoB, remote, id1, id2, def := makeTestContextRemote(t) + repoA, repoB, remote, id1, id2, resolver, def := makeTestContextRemote(t) defer repository.CleanupTestRepos(repoA, repoB, remote) // A --> remote --> B @@ -37,10 +37,10 @@ func TestPushPull(t *testing.T) { _, err = Push(def, repoA, "remote") require.NoError(t, err) - err = Pull(def, repoB, "remote", id1) + err = Pull(def, repoB, resolver, "remote", id1) require.NoError(t, err) - entities := allEntities(t, ReadAll(def, repoB)) + entities := allEntities(t, ReadAll(def, repoB, resolver)) require.Len(t, entities, 1) // B --> remote --> A @@ -53,15 +53,15 @@ func TestPushPull(t *testing.T) { _, err = Push(def, repoB, "remote") require.NoError(t, err) - err = Pull(def, repoA, "remote", id1) + err = Pull(def, repoA, resolver, "remote", id1) require.NoError(t, err) - entities = allEntities(t, ReadAll(def, repoB)) + entities = allEntities(t, ReadAll(def, repoB, resolver)) require.Len(t, entities, 2) } func TestListLocalIds(t *testing.T) { - repoA, repoB, remote, id1, id2, def := makeTestContextRemote(t) + repoA, repoB, remote, id1, id2, resolver, def := makeTestContextRemote(t) defer repository.CleanupTestRepos(repoA, repoB, remote) // A --> remote --> B @@ -87,7 +87,7 @@ func TestListLocalIds(t *testing.T) { listLocalIds(t, def, repoA, 2) listLocalIds(t, def, repoB, 0) - err = Pull(def, repoB, "remote", id1) + err = Pull(def, repoB, resolver, "remote", id1) require.NoError(t, err) listLocalIds(t, def, repoA, 2) @@ -206,7 +206,7 @@ func assertNotEqualRefs(t *testing.T, repoA, repoB repository.RepoData, prefix s } func TestMerge(t *testing.T) { - repoA, repoB, remote, id1, id2, def := makeTestContextRemote(t) + repoA, repoB, remote, id1, id2, resolver, def := makeTestContextRemote(t) defer repository.CleanupTestRepos(repoA, repoB, remote) // SCENARIO 1 @@ -231,7 +231,7 @@ func TestMerge(t *testing.T) { _, err = Fetch(def, repoB, "remote") require.NoError(t, err) - results := MergeAll(def, repoB, "remote", id1) + results := MergeAll(def, repoB, resolver, "remote", id1) assertMergeResults(t, []entity.MergeResult{ { @@ -249,7 +249,7 @@ func TestMerge(t *testing.T) { // SCENARIO 2 // if the remote and local Entity have the same state, nothing is changed - results = MergeAll(def, repoB, "remote", id1) + results = MergeAll(def, repoB, resolver, "remote", id1) assertMergeResults(t, []entity.MergeResult{ { @@ -275,7 +275,7 @@ func TestMerge(t *testing.T) { err = e2A.Commit(repoA) require.NoError(t, err) - results = MergeAll(def, repoA, "remote", id1) + results = MergeAll(def, repoA, resolver, "remote", id1) assertMergeResults(t, []entity.MergeResult{ { @@ -300,7 +300,7 @@ func TestMerge(t *testing.T) { _, err = Fetch(def, repoB, "remote") require.NoError(t, err) - results = MergeAll(def, repoB, "remote", id1) + results = MergeAll(def, repoB, resolver, "remote", id1) assertMergeResults(t, []entity.MergeResult{ { @@ -327,10 +327,10 @@ func TestMerge(t *testing.T) { err = e2A.Commit(repoA) require.NoError(t, err) - e1B, err := Read(def, repoB, e1A.Id()) + e1B, err := Read(def, repoB, resolver, e1A.Id()) require.NoError(t, err) - e2B, err := Read(def, repoB, e2A.Id()) + e2B, err := Read(def, repoB, resolver, e2A.Id()) require.NoError(t, err) e1B.Append(newOp1(id1, "barbarfoofoo")) @@ -347,7 +347,7 @@ func TestMerge(t *testing.T) { _, err = Fetch(def, repoB, "remote") require.NoError(t, err) - results = MergeAll(def, repoB, "remote", id1) + results = MergeAll(def, repoB, resolver, "remote", id1) assertMergeResults(t, []entity.MergeResult{ { @@ -387,7 +387,7 @@ func TestMerge(t *testing.T) { } func TestRemove(t *testing.T) { - repoA, repoB, remote, id1, _, def := makeTestContextRemote(t) + repoA, repoB, remote, id1, _, resolver, def := makeTestContextRemote(t) defer repository.CleanupTestRepos(repoA, repoB, remote) e := New(def) @@ -400,10 +400,10 @@ func TestRemove(t *testing.T) { err = Remove(def, repoA, e.Id()) require.NoError(t, err) - _, err = Read(def, repoA, e.Id()) + _, err = Read(def, repoA, resolver, e.Id()) require.Error(t, err) - _, err = readRemote(def, repoA, "remote", e.Id()) + _, err = readRemote(def, repoA, resolver, "remote", e.Id()) require.Error(t, err) // Remove is idempotent diff --git a/entity/dag/operation_pack.go b/entity/dag/operation_pack.go index 959b1ae0..d6bce9f2 100644 --- a/entity/dag/operation_pack.go +++ b/entity/dag/operation_pack.go @@ -166,7 +166,7 @@ func (opp *operationPack) Write(def Definition, repo repository.Repo, parentComm // readOperationPack read the operationPack encoded in git at the given Tree hash. // // Validity of the Lamport clocks is left for the caller to decide. -func readOperationPack(def Definition, repo repository.RepoData, commit repository.Commit) (*operationPack, error) { +func readOperationPack(def Definition, repo repository.RepoData, resolver identity.Resolver, commit repository.Commit) (*operationPack, error) { entries, err := repo.ReadTree(commit.TreeHash) if err != nil { return nil, err @@ -207,7 +207,7 @@ func readOperationPack(def Definition, repo repository.RepoData, commit reposito if err != nil { return nil, errors.Wrap(err, "failed to read git blob data") } - ops, author, err = unmarshallPack(def, data) + ops, author, err = unmarshallPack(def, resolver, data) if err != nil { return nil, err } @@ -251,7 +251,7 @@ func readOperationPack(def Definition, repo repository.RepoData, commit reposito // unmarshallPack delegate the unmarshalling of the Operation's JSON to the decoding // function provided by the concrete entity. This gives access to the concrete type of each // Operation. -func unmarshallPack(def Definition, data []byte) ([]Operation, identity.Interface, error) { +func unmarshallPack(def Definition, resolver identity.Resolver, data []byte) ([]Operation, identity.Interface, error) { aux := struct { Author identity.IdentityStub `json:"author"` Operations []json.RawMessage `json:"ops"` @@ -265,7 +265,7 @@ func unmarshallPack(def Definition, data []byte) ([]Operation, identity.Interfac return nil, nil, fmt.Errorf("missing author") } - author, err := def.identityResolver.ResolveIdentity(aux.Author.Id()) + author, err := resolver.ResolveIdentity(aux.Author.Id()) if err != nil { return nil, nil, err } diff --git a/entity/dag/operation_pack_test.go b/entity/dag/operation_pack_test.go index ac979776..a12382af 100644 --- a/entity/dag/operation_pack_test.go +++ b/entity/dag/operation_pack_test.go @@ -9,7 +9,7 @@ import ( ) func TestOperationPackReadWrite(t *testing.T) { - repo, id1, _, def := makeTestContext() + repo, id1, _, resolver, def := makeTestContext() opp := &operationPack{ Author: id1, @@ -27,7 +27,7 @@ func TestOperationPackReadWrite(t *testing.T) { commit, err := repo.ReadCommit(commitHash) require.NoError(t, err) - opp2, err := readOperationPack(def, repo, commit) + opp2, err := readOperationPack(def, repo, resolver, commit) require.NoError(t, err) require.Equal(t, opp, opp2) @@ -46,7 +46,7 @@ func TestOperationPackReadWrite(t *testing.T) { } func TestOperationPackSignedReadWrite(t *testing.T) { - repo, id1, _, def := makeTestContext() + repo, id1, _, resolver, def := makeTestContext() err := id1.(*identity.Identity).Mutate(repo, func(orig *identity.Mutator) { orig.Keys = append(orig.Keys, identity.GenerateKey()) @@ -69,7 +69,7 @@ func TestOperationPackSignedReadWrite(t *testing.T) { commit, err := repo.ReadCommit(commitHash) require.NoError(t, err) - opp2, err := readOperationPack(def, repo, commit) + opp2, err := readOperationPack(def, repo, resolver, commit) require.NoError(t, err) require.Equal(t, opp, opp2) -- cgit From 99b9dd84cb4b0cfd3eb1fd50b07c8b826eb52d19 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Feb 2021 10:06:16 +0100 Subject: entity: support different author in staging operations --- entity/dag/clock.go | 4 +- entity/dag/common_test.go | 8 ++-- entity/dag/entity.go | 99 +++++++++++++++++++++------------------ entity/dag/entity_actions.go | 20 ++++---- entity/dag/entity_actions_test.go | 14 +++--- entity/dag/entity_test.go | 47 ++++++++++++------- entity/dag/operation.go | 8 ---- entity/dag/operation_pack.go | 14 +++--- 8 files changed, 113 insertions(+), 101 deletions(-) (limited to 'entity') diff --git a/entity/dag/clock.go b/entity/dag/clock.go index c9d2b94b..dc9bb72d 100644 --- a/entity/dag/clock.go +++ b/entity/dag/clock.go @@ -11,8 +11,8 @@ import ( func ClockLoader(defs ...Definition) repository.ClockLoader { clocks := make([]string, len(defs)*2) for _, def := range defs { - clocks = append(clocks, fmt.Sprintf(creationClockPattern, def.namespace)) - clocks = append(clocks, fmt.Sprintf(editClockPattern, def.namespace)) + clocks = append(clocks, fmt.Sprintf(creationClockPattern, def.Namespace)) + clocks = append(clocks, fmt.Sprintf(editClockPattern, def.Namespace)) } return repository.ClockLoader{ diff --git a/entity/dag/common_test.go b/entity/dag/common_test.go index 0ddbca47..fa15cd1f 100644 --- a/entity/dag/common_test.go +++ b/entity/dag/common_test.go @@ -152,10 +152,10 @@ func makeTestContextInternal(repo repository.ClockedRepo) (identity.Interface, i }) def := Definition{ - typename: "foo", - namespace: "foos", - operationUnmarshaler: unmarshaler, - formatVersion: 1, + Typename: "foo", + Namespace: "foos", + OperationUnmarshaler: unmarshaler, + FormatVersion: 1, } return id1, id2, resolver, def diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 09576d28..196280a8 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -22,13 +22,13 @@ const editClockPattern = "%s-edit" // Definition hold the details defining one specialization of an Entity. type Definition struct { // the name of the entity (bug, pull-request, ...) - typename string - // the namespace in git (bugs, prs, ...) - namespace string + Typename string + // the Namespace in git (bugs, prs, ...) + Namespace string // a function decoding a JSON message into an Operation - operationUnmarshaler func(author identity.Interface, raw json.RawMessage) (Operation, error) + OperationUnmarshaler func(author identity.Interface, raw json.RawMessage) (Operation, error) // the expected format version number, that can be used for data migration/upgrade - formatVersion uint + FormatVersion uint } // Entity is a data structure stored in a chain of git objects, supporting actions like Push, Pull and Merge. @@ -62,7 +62,7 @@ func Read(def Definition, repo repository.ClockedRepo, resolver identity.Resolve return nil, errors.Wrap(err, "invalid id") } - ref := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) + ref := fmt.Sprintf("refs/%s/%s", def.Namespace, id.String()) return read(def, repo, resolver, ref) } @@ -73,7 +73,7 @@ func readRemote(def Definition, repo repository.ClockedRepo, resolver identity.R return nil, errors.Wrap(err, "invalid id") } - ref := fmt.Sprintf("refs/remotes/%s/%s/%s", def.namespace, remote, id.String()) + ref := fmt.Sprintf("refs/remotes/%s/%s/%s", def.Namespace, remote, id.String()) return read(def, repo, resolver, ref) } @@ -179,11 +179,11 @@ func read(def Definition, repo repository.ClockedRepo, resolver identity.Resolve // The clocks are fine, we witness them for _, opp := range oppMap { - err = repo.Witness(fmt.Sprintf(creationClockPattern, def.namespace), opp.CreateTime) + err = repo.Witness(fmt.Sprintf(creationClockPattern, def.Namespace), opp.CreateTime) if err != nil { return nil, err } - err = repo.Witness(fmt.Sprintf(editClockPattern, def.namespace), opp.EditTime) + err = repo.Witness(fmt.Sprintf(editClockPattern, def.Namespace), opp.EditTime) if err != nil { return nil, err } @@ -247,7 +247,7 @@ func ReadAll(def Definition, repo repository.ClockedRepo, resolver identity.Reso go func() { defer close(out) - refPrefix := fmt.Sprintf("refs/%s/", def.namespace) + refPrefix := fmt.Sprintf("refs/%s/", def.Namespace) refs, err := repo.ListRefs(refPrefix) if err != nil { @@ -346,9 +346,9 @@ func (e *Entity) NeedCommit() bool { return len(e.staging) > 0 } -// CommitAdNeeded execute a Commit only if necessary. This function is useful to avoid getting an error if the Entity +// CommitAsNeeded execute a Commit only if necessary. This function is useful to avoid getting an error if the Entity // is already in sync with the repository. -func (e *Entity) CommitAdNeeded(repo repository.ClockedRepo) error { +func (e *Entity) CommitAsNeeded(repo repository.ClockedRepo) error { if e.NeedCommit() { return e.Commit(repo) } @@ -363,56 +363,65 @@ func (e *Entity) Commit(repo repository.ClockedRepo) error { err := e.Validate() if err != nil { - return errors.Wrapf(err, "can't commit a %s with invalid data", e.Definition.typename) + return errors.Wrapf(err, "can't commit a %s with invalid data", e.Definition.Typename) } - var author identity.Interface - for _, op := range e.staging { - if author != nil && op.Author() != author { - return fmt.Errorf("operations with different author") + for len(e.staging) > 0 { + var author identity.Interface + var toCommit []Operation + + // Split into chunks with the same author + for len(e.staging) > 0 { + op := e.staging[0] + if author != nil && op.Author().Id() != author.Id() { + break + } + author = e.staging[0].Author() + toCommit = append(toCommit, op) + e.staging = e.staging[1:] } - author = op.Author() - } - e.editTime, err = repo.Increment(fmt.Sprintf(editClockPattern, e.namespace)) - if err != nil { - return err - } + e.editTime, err = repo.Increment(fmt.Sprintf(editClockPattern, e.Namespace)) + if err != nil { + return err + } - opp := &operationPack{ - Author: author, - Operations: e.staging, - EditTime: e.editTime, - } + opp := &operationPack{ + Author: author, + Operations: toCommit, + EditTime: e.editTime, + } - if e.lastCommit == "" { - e.createTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.namespace)) + if e.lastCommit == "" { + e.createTime, err = repo.Increment(fmt.Sprintf(creationClockPattern, e.Namespace)) + if err != nil { + return err + } + opp.CreateTime = e.createTime + } + + var parentCommit []repository.Hash + if e.lastCommit != "" { + parentCommit = []repository.Hash{e.lastCommit} + } + + commitHash, err := opp.Write(e.Definition, repo, parentCommit...) if err != nil { return err } - opp.CreateTime = e.createTime - } - var commitHash repository.Hash - if e.lastCommit == "" { - commitHash, err = opp.Write(e.Definition, repo) - } else { - commitHash, err = opp.Write(e.Definition, repo, e.lastCommit) - } - - if err != nil { - return err + e.lastCommit = commitHash + e.ops = append(e.ops, toCommit...) } - e.lastCommit = commitHash - e.ops = append(e.ops, e.staging...) + // not strictly necessary but make equality testing easier in tests e.staging = nil // Create or update the Git reference for this entity // When pushing later, the remote will ensure that this ref update // is fast-forward, that is no data has been overwritten. - ref := fmt.Sprintf(refsPattern, e.namespace, e.Id().String()) - return repo.UpdateRef(ref, commitHash) + ref := fmt.Sprintf(refsPattern, e.Namespace, e.Id().String()) + return repo.UpdateRef(ref, e.lastCommit) } // CreateLamportTime return the Lamport time of creation diff --git a/entity/dag/entity_actions.go b/entity/dag/entity_actions.go index 707c93aa..2926e992 100644 --- a/entity/dag/entity_actions.go +++ b/entity/dag/entity_actions.go @@ -12,7 +12,7 @@ import ( // ListLocalIds list all the available local Entity's Id func ListLocalIds(def Definition, repo repository.RepoData) ([]entity.Id, error) { - refs, err := repo.ListRefs(fmt.Sprintf("refs/%s/", def.namespace)) + refs, err := repo.ListRefs(fmt.Sprintf("refs/%s/", def.Namespace)) if err != nil { return nil, err } @@ -22,12 +22,12 @@ func ListLocalIds(def Definition, repo repository.RepoData) ([]entity.Id, error) // Fetch retrieve updates from a remote // This does not change the local entity state func Fetch(def Definition, repo repository.Repo, remote string) (string, error) { - return repo.FetchRefs(remote, def.namespace) + return repo.FetchRefs(remote, def.Namespace) } // Push update a remote with the local changes func Push(def Definition, repo repository.Repo, remote string) (string, error) { - return repo.PushRefs(remote, def.namespace) + return repo.PushRefs(remote, def.Namespace) } // Pull will do a Fetch + MergeAll @@ -74,7 +74,7 @@ func MergeAll(def Definition, repo repository.ClockedRepo, resolver identity.Res go func() { defer close(out) - remoteRefSpec := fmt.Sprintf("refs/remotes/%s/%s/", remote, def.namespace) + remoteRefSpec := fmt.Sprintf("refs/remotes/%s/%s/", remote, def.Namespace) remoteRefs, err := repo.ListRefs(remoteRefSpec) if err != nil { out <- entity.MergeResult{Err: err} @@ -101,16 +101,16 @@ func merge(def Definition, repo repository.ClockedRepo, resolver identity.Resolv remoteEntity, err := read(def, repo, resolver, remoteRef) if err != nil { return entity.NewMergeInvalidStatus(id, - errors.Wrapf(err, "remote %s is not readable", def.typename).Error()) + errors.Wrapf(err, "remote %s is not readable", def.Typename).Error()) } // Check for error in remote data if err := remoteEntity.Validate(); err != nil { return entity.NewMergeInvalidStatus(id, - errors.Wrapf(err, "remote %s data is invalid", def.typename).Error()) + errors.Wrapf(err, "remote %s data is invalid", def.Typename).Error()) } - localRef := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) + localRef := fmt.Sprintf("refs/%s/%s", def.Namespace, id.String()) // SCENARIO 1 // if the remote Entity doesn't exist locally, it's created @@ -202,7 +202,7 @@ func merge(def Definition, repo repository.ClockedRepo, resolver identity.Resolv return entity.NewMergeError(err, id) } - editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, def.namespace)) + editTime, err := repo.Increment(fmt.Sprintf(editClockPattern, def.Namespace)) if err != nil { return entity.NewMergeError(err, id) } @@ -236,7 +236,7 @@ func merge(def Definition, repo repository.ClockedRepo, resolver identity.Resolv func Remove(def Definition, repo repository.ClockedRepo, id entity.Id) error { var matches []string - ref := fmt.Sprintf("refs/%s/%s", def.namespace, id.String()) + ref := fmt.Sprintf("refs/%s/%s", def.Namespace, id.String()) matches = append(matches, ref) remotes, err := repo.GetRemotes() @@ -245,7 +245,7 @@ func Remove(def Definition, repo repository.ClockedRepo, id entity.Id) error { } for remote := range remotes { - ref = fmt.Sprintf("refs/remotes/%s/%s/%s", remote, def.namespace, id.String()) + ref = fmt.Sprintf("refs/remotes/%s/%s/%s", remote, def.Namespace, id.String()) matches = append(matches, ref) } diff --git a/entity/dag/entity_actions_test.go b/entity/dag/entity_actions_test.go index 848d6468..402f459c 100644 --- a/entity/dag/entity_actions_test.go +++ b/entity/dag/entity_actions_test.go @@ -244,7 +244,7 @@ func TestMerge(t *testing.T) { }, }, results) - assertEqualRefs(t, repoA, repoB, "refs/"+def.namespace) + assertEqualRefs(t, repoA, repoB, "refs/"+def.Namespace) // SCENARIO 2 // if the remote and local Entity have the same state, nothing is changed @@ -262,7 +262,7 @@ func TestMerge(t *testing.T) { }, }, results) - assertEqualRefs(t, repoA, repoB, "refs/"+def.namespace) + assertEqualRefs(t, repoA, repoB, "refs/"+def.Namespace) // SCENARIO 3 // if the local Entity has new commits but the remote don't, nothing is changed @@ -288,7 +288,7 @@ func TestMerge(t *testing.T) { }, }, results) - assertNotEqualRefs(t, repoA, repoB, "refs/"+def.namespace) + assertNotEqualRefs(t, repoA, repoB, "refs/"+def.Namespace) // SCENARIO 4 // if the remote has new commit, the local bug is updated to match the same history @@ -313,7 +313,7 @@ func TestMerge(t *testing.T) { }, }, results) - assertEqualRefs(t, repoA, repoB, "refs/"+def.namespace) + assertEqualRefs(t, repoA, repoB, "refs/"+def.Namespace) // SCENARIO 5 // if both local and remote Entity have new commits (that is, we have a concurrent edition), @@ -360,7 +360,7 @@ func TestMerge(t *testing.T) { }, }, results) - assertNotEqualRefs(t, repoA, repoB, "refs/"+def.namespace) + assertNotEqualRefs(t, repoA, repoB, "refs/"+def.Namespace) _, err = Push(def, repoB, "remote") require.NoError(t, err) @@ -368,7 +368,7 @@ func TestMerge(t *testing.T) { _, err = Fetch(def, repoA, "remote") require.NoError(t, err) - results = MergeAll(def, repoA, "remote", id1) + results = MergeAll(def, repoA, resolver, "remote", id1) assertMergeResults(t, []entity.MergeResult{ { @@ -383,7 +383,7 @@ func TestMerge(t *testing.T) { // make sure that the graphs become stable over multiple repo, due to the // fast-forward - assertEqualRefs(t, repoA, repoB, "refs/"+def.namespace) + assertEqualRefs(t, repoA, repoB, "refs/"+def.Namespace) } func TestRemove(t *testing.T) { diff --git a/entity/dag/entity_test.go b/entity/dag/entity_test.go index 012c87aa..6d621bbe 100644 --- a/entity/dag/entity_test.go +++ b/entity/dag/entity_test.go @@ -7,7 +7,7 @@ import ( ) func TestWriteRead(t *testing.T) { - repo, id1, id2, def := makeTestContext() + repo, id1, id2, resolver, def := makeTestContext() entity := New(def) require.False(t, entity.NeedCommit()) @@ -16,15 +16,34 @@ func TestWriteRead(t *testing.T) { entity.Append(newOp2(id1, "bar")) require.True(t, entity.NeedCommit()) - require.NoError(t, entity.CommitAdNeeded(repo)) + require.NoError(t, entity.CommitAsNeeded(repo)) require.False(t, entity.NeedCommit()) entity.Append(newOp2(id2, "foobar")) require.True(t, entity.NeedCommit()) - require.NoError(t, entity.CommitAdNeeded(repo)) + require.NoError(t, entity.CommitAsNeeded(repo)) require.False(t, entity.NeedCommit()) - read, err := Read(def, repo, entity.Id()) + read, err := Read(def, repo, resolver, entity.Id()) + require.NoError(t, err) + + assertEqualEntities(t, entity, read) +} + +func TestWriteReadMultipleAuthor(t *testing.T) { + repo, id1, id2, resolver, def := makeTestContext() + + entity := New(def) + + entity.Append(newOp1(id1, "foo")) + entity.Append(newOp2(id2, "bar")) + + require.NoError(t, entity.CommitAsNeeded(repo)) + + entity.Append(newOp2(id1, "foobar")) + require.NoError(t, entity.CommitAsNeeded(repo)) + + read, err := Read(def, repo, resolver, entity.Id()) require.NoError(t, err) assertEqualEntities(t, entity, read) @@ -34,23 +53,15 @@ func assertEqualEntities(t *testing.T, a, b *Entity) { // testify doesn't support comparing functions and systematically fail if they are not nil // so we have to set them to nil temporarily - backOpUnA := a.Definition.operationUnmarshaler - backOpUnB := b.Definition.operationUnmarshaler - - a.Definition.operationUnmarshaler = nil - b.Definition.operationUnmarshaler = nil - - backIdResA := a.Definition.identityResolver - backIdResB := b.Definition.identityResolver + backOpUnA := a.Definition.OperationUnmarshaler + backOpUnB := b.Definition.OperationUnmarshaler - a.Definition.identityResolver = nil - b.Definition.identityResolver = nil + a.Definition.OperationUnmarshaler = nil + b.Definition.OperationUnmarshaler = nil defer func() { - a.Definition.operationUnmarshaler = backOpUnA - b.Definition.operationUnmarshaler = backOpUnB - a.Definition.identityResolver = backIdResA - b.Definition.identityResolver = backIdResB + a.Definition.OperationUnmarshaler = backOpUnA + b.Definition.OperationUnmarshaler = backOpUnB }() require.Equal(t, a, b) diff --git a/entity/dag/operation.go b/entity/dag/operation.go index 86e2f7d7..b0a78de6 100644 --- a/entity/dag/operation.go +++ b/entity/dag/operation.go @@ -23,11 +23,3 @@ type Operation interface { // Author returns the author of this operation Author() identity.Interface } - -// TODO: remove? -type operationBase struct { - author identity.Interface - - // Not serialized. Store the op's id in memory. - id entity.Id -} diff --git a/entity/dag/operation_pack.go b/entity/dag/operation_pack.go index d6bce9f2..00cf2557 100644 --- a/entity/dag/operation_pack.go +++ b/entity/dag/operation_pack.go @@ -72,7 +72,7 @@ func (opp *operationPack) Validate() error { return fmt.Errorf("missing author") } for _, op := range opp.Operations { - if op.Author() != opp.Author { + if op.Author().Id() != opp.Author.Id() { return fmt.Errorf("operation has different author than the operationPack's") } } @@ -120,7 +120,7 @@ func (opp *operationPack) Write(def Definition, repo repository.Repo, parentComm // - clocks tree := []repository.TreeEntry{ {ObjectType: repository.Blob, Hash: emptyBlobHash, - Name: fmt.Sprintf(versionEntryPrefix+"%d", def.formatVersion)}, + Name: fmt.Sprintf(versionEntryPrefix+"%d", def.FormatVersion)}, {ObjectType: repository.Blob, Hash: hash, Name: opsEntryName}, {ObjectType: repository.Blob, Hash: emptyBlobHash, @@ -188,10 +188,10 @@ func readOperationPack(def Definition, repo repository.RepoData, resolver identi } } if version == 0 { - return nil, entity.NewErrUnknowFormat(def.formatVersion) + return nil, entity.NewErrUnknowFormat(def.FormatVersion) } - if version != def.formatVersion { - return nil, entity.NewErrInvalidFormat(version, def.formatVersion) + if version != def.FormatVersion { + return nil, entity.NewErrInvalidFormat(version, def.FormatVersion) } var id entity.Id @@ -230,7 +230,7 @@ func readOperationPack(def Definition, repo repository.RepoData, resolver identi } // Verify signature if we expect one - keys := author.ValidKeysAtTime(fmt.Sprintf(editClockPattern, def.namespace), editTime) + keys := author.ValidKeysAtTime(fmt.Sprintf(editClockPattern, def.Namespace), editTime) if len(keys) > 0 { keyring := PGPKeyring(keys) _, err = openpgp.CheckDetachedSignature(keyring, commit.SignedData, commit.Signature) @@ -274,7 +274,7 @@ func unmarshallPack(def Definition, resolver identity.Resolver, data []byte) ([] for _, raw := range aux.Operations { // delegate to specialized unmarshal function - op, err := def.operationUnmarshaler(author, raw) + op, err := def.OperationUnmarshaler(author, raw) if err != nil { return nil, nil, err } -- cgit From 3f6ef50883492f77995a7e27872d0b5ae17b9d6a Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Feb 2021 11:36:32 +0100 Subject: bug: migrate to the DAG entity structure! --- entity/TODO | 9 --------- entity/merge.go | 6 ++++-- 2 files changed, 4 insertions(+), 11 deletions(-) delete mode 100644 entity/TODO (limited to 'entity') diff --git a/entity/TODO b/entity/TODO deleted file mode 100644 index 9f33dd09..00000000 --- a/entity/TODO +++ /dev/null @@ -1,9 +0,0 @@ -- is the pack Lamport clock really useful vs only topological sort? - - topological order is enforced on the clocks, so what's the point? - - is EditTime equivalent to PackTime? no, avoid the gaps. Is it better? - --> PackTime is contained within a bug and might avoid extreme reordering? -- how to do commit signature? -- how to avoid id collision between Operations? -- write tests for actions -- migrate Bug to the new structure -- migrate Identity to the new structure? \ No newline at end of file diff --git a/entity/merge.go b/entity/merge.go index 1b68b4de..0661b7fc 100644 --- a/entity/merge.go +++ b/entity/merge.go @@ -42,13 +42,15 @@ func (mr MergeResult) String() string { case MergeStatusNothing: return "nothing to do" case MergeStatusError: - return fmt.Sprintf("merge error on %s: %s", mr.Id, mr.Err.Error()) + if mr.Id != "" { + return fmt.Sprintf("merge error on %s: %s", mr.Id, mr.Err.Error()) + } + return fmt.Sprintf("merge error: %s", mr.Err.Error()) default: panic("unknown merge status") } } -// TODO: Interface --> *Entity ? func NewMergeNewStatus(id Id, entity Interface) MergeResult { return MergeResult{ Id: id, -- cgit From 4b9862e239deb939c87be2b02970a7bfe2996e13 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Feb 2021 12:13:33 +0100 Subject: entity: make sure merge commit don't have operations --- entity/dag/entity.go | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'entity') diff --git a/entity/dag/entity.go b/entity/dag/entity.go index 196280a8..c4368514 100644 --- a/entity/dag/entity.go +++ b/entity/dag/entity.go @@ -148,6 +148,10 @@ func read(def Definition, repo repository.ClockedRepo, resolver identity.Resolve return nil, err } + if isMerge && len(opp.Operations) > 0 { + return nil, fmt.Errorf("merge commit cannot have operations") + } + // Check that the create lamport clock is set (not checked in Validate() as it's optional) if isFirstCommit && opp.CreateTime <= 0 { return nil, fmt.Errorf("creation lamport time not set") -- cgit From d0d7be8db010e2c68c98d0a34387e4fac0c4d6ee Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Feb 2021 12:14:03 +0100 Subject: minor cleanups --- entity/err.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'entity') diff --git a/entity/err.go b/entity/err.go index 9222e4da..9f7f5a1a 100644 --- a/entity/err.go +++ b/entity/err.go @@ -52,7 +52,7 @@ func NewErrUnknowFormat(expected uint) *ErrInvalidFormat { func (e ErrInvalidFormat) Error() string { if e.version == 0 { - return fmt.Sprintf("unreadable data, expected format version %v", e.expected) + return fmt.Sprintf("unreadable data, you likely have an outdated repository format, please use https://github.com/MichaelMure/git-bug-migration to upgrade to format version %v", e.expected) } if e.version < e.expected { return fmt.Sprintf("outdated repository format %v, please use https://github.com/MichaelMure/git-bug-migration to upgrade to format version %v", e.version, e.expected) -- cgit From 1ced77af1a4bdbaa212a74bf0c56b2b81cdc5bd2 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Feb 2021 12:24:40 +0100 Subject: fix merge --- entity/dag/operation_pack.go | 2 +- entity/err.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'entity') diff --git a/entity/dag/operation_pack.go b/entity/dag/operation_pack.go index 00cf2557..a436fd33 100644 --- a/entity/dag/operation_pack.go +++ b/entity/dag/operation_pack.go @@ -188,7 +188,7 @@ func readOperationPack(def Definition, repo repository.RepoData, resolver identi } } if version == 0 { - return nil, entity.NewErrUnknowFormat(def.FormatVersion) + return nil, entity.NewErrUnknownFormat(def.FormatVersion) } if version != def.FormatVersion { return nil, entity.NewErrInvalidFormat(version, def.FormatVersion) diff --git a/entity/err.go b/entity/err.go index 9f7f5a1a..408e27b4 100644 --- a/entity/err.go +++ b/entity/err.go @@ -43,7 +43,7 @@ func NewErrInvalidFormat(version uint, expected uint) *ErrInvalidFormat { } } -func NewErrUnknowFormat(expected uint) *ErrInvalidFormat { +func NewErrUnknownFormat(expected uint) *ErrInvalidFormat { return &ErrInvalidFormat{ version: 0, expected: expected, -- cgit From 45e540c178533ef9aab01b1c3e782bc63061e313 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Feb 2021 12:38:09 +0100 Subject: bug: wrap dag.Entity into a full Bug in MergeAll --- entity/dag/entity_actions_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'entity') diff --git a/entity/dag/entity_actions_test.go b/entity/dag/entity_actions_test.go index 402f459c..45e69c7d 100644 --- a/entity/dag/entity_actions_test.go +++ b/entity/dag/entity_actions_test.go @@ -23,7 +23,7 @@ func allEntities(t testing.TB, bugs <-chan StreamedEntity) []*Entity { return result } -func TestPushPull(t *testing.T) { +func TestEntityPushPull(t *testing.T) { repoA, repoB, remote, id1, id2, resolver, def := makeTestContextRemote(t) defer repository.CleanupTestRepos(repoA, repoB, remote) -- cgit From f1d4a19af81fcc05ae9d90e018ff141f6521335a Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 14 Mar 2021 18:39:04 +0100 Subject: bug: nonce on all operation to prevent id collision --- entity/dag/operation.go | 16 +++++++++++++--- entity/id.go | 2 +- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'entity') diff --git a/entity/dag/operation.go b/entity/dag/operation.go index b0a78de6..94974a82 100644 --- a/entity/dag/operation.go +++ b/entity/dag/operation.go @@ -10,13 +10,23 @@ import ( // data structure and storage. type Operation interface { // Id return the Operation identifier + // // Some care need to be taken to define a correct Id derivation and enough entropy in the data used to avoid // collisions. Notably: - // - the Id of the first Operation will be used as the Id of the Entity. Collision need to be avoided across entities of the same type - // (example: no collision within the "bug" namespace). + // - the Id of the first Operation will be used as the Id of the Entity. Collision need to be avoided across entities + // of the same type (example: no collision within the "bug" namespace). // - collisions can also happen within the set of Operations of an Entity. Simple Operation might not have enough // entropy to yield unique Ids (example: two "close" operation within the same second, same author). - // A common way to derive an Id will be to use the DeriveId function on the serialized operation data. + // If this is a concern, it is recommended to include a piece of random data in the operation's data, to guarantee + // a minimal amount of entropy and avoid collision. + // + // Author's note: I tried to find a clever way around that inelegance (stuffing random useless data into the stored + // structure is not exactly elegant) but I failed to find a proper way. Essentially, anything that would reuse some + // other data (parent operation's Id, lamport clock) or the graph structure (depth) impose that the Id would only + // make sense in the context of the graph and yield some deep coupling between Entity and Operation. This in turn + // make the whole thing even less elegant. + // + // A common way to derive an Id will be to use the entity.DeriveId() function on the serialized operation data. Id() entity.Id // Validate check if the Operation data is valid Validate() error diff --git a/entity/id.go b/entity/id.go index b602452e..c8dbdb94 100644 --- a/entity/id.go +++ b/entity/id.go @@ -18,7 +18,7 @@ const UnsetId = Id("unset") // Id is an identifier for an entity or part of an entity type Id string -// DeriveId generate an Id from some data, taken from a root part of the entity. +// DeriveId generate an Id from the serialization of the object or part of the object. func DeriveId(data []byte) Id { // My understanding is that sha256 is enough to prevent collision (git use that, so ...?) // If you read this code, I'd be happy to be schooled. -- cgit From 214abe4dea1984086e45d1399538fb12aa010642 Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sat, 20 Feb 2021 15:48:44 +0100 Subject: WIP operation with files --- entity/dag/common_test.go | 15 ++++++++----- entity/dag/operation.go | 9 ++++++++ entity/dag/operation_pack.go | 45 ++++++++++++++++++++++++++++++++++++--- entity/dag/operation_pack_test.go | 20 +++++++++++++++-- 4 files changed, 79 insertions(+), 10 deletions(-) (limited to 'entity') diff --git a/entity/dag/common_test.go b/entity/dag/common_test.go index fa15cd1f..1898451d 100644 --- a/entity/dag/common_test.go +++ b/entity/dag/common_test.go @@ -23,10 +23,11 @@ type op1 struct { OperationType int `json:"type"` Field1 string `json:"field_1"` + Files []repository.Hash } -func newOp1(author identity.Interface, field1 string) *op1 { - return &op1{author: author, OperationType: 1, Field1: field1} +func newOp1(author identity.Interface, field1 string, files ...repository.Hash) *op1 { + return &op1{author: author, OperationType: 1, Field1: field1, Files: files} } func (o *op1) Id() entity.Id { @@ -34,11 +35,15 @@ func (o *op1) Id() entity.Id { return entity.DeriveId(data) } +func (o *op1) Validate() error { return nil } + func (o *op1) Author() identity.Interface { return o.author } -func (o *op1) Validate() error { return nil } +func (o *op1) GetFiles() []repository.Hash { + return o.Files +} type op2 struct { author identity.Interface @@ -56,12 +61,12 @@ func (o *op2) Id() entity.Id { return entity.DeriveId(data) } +func (o *op2) Validate() error { return nil } + func (o *op2) Author() identity.Interface { return o.author } -func (o *op2) Validate() error { return nil } - func unmarshaler(author identity.Interface, raw json.RawMessage) (Operation, error) { var t struct { OperationType int `json:"type"` diff --git a/entity/dag/operation.go b/entity/dag/operation.go index 94974a82..1bfb3d3d 100644 --- a/entity/dag/operation.go +++ b/entity/dag/operation.go @@ -3,6 +3,7 @@ package dag import ( "github.com/MichaelMure/git-bug/entity" "github.com/MichaelMure/git-bug/identity" + "github.com/MichaelMure/git-bug/repository" ) // Operation is a piece of data defining a change to reflect on the state of an Entity. @@ -33,3 +34,11 @@ type Operation interface { // Author returns the author of this operation Author() identity.Interface } + +// OperationWithFiles is an extended Operation that has files dependency, stored in git. +type OperationWithFiles interface { + Operation + + // GetFiles return the files needed by this operation + GetFiles() []repository.Hash +} diff --git a/entity/dag/operation_pack.go b/entity/dag/operation_pack.go index a436fd33..72063c60 100644 --- a/entity/dag/operation_pack.go +++ b/entity/dag/operation_pack.go @@ -15,10 +15,8 @@ import ( "github.com/MichaelMure/git-bug/util/lamport" ) -// TODO: extra data tree -const extraEntryName = "extra" - const opsEntryName = "ops" +const extraEntryName = "extra" const versionEntryPrefix = "version-" const createClockEntryPrefix = "create-clock-" const editClockEntryPrefix = "edit-clock-" @@ -118,6 +116,7 @@ func (opp *operationPack) Write(def Definition, repo repository.Repo, parentComm // Make a Git tree referencing this blob and encoding the other values: // - format version // - clocks + // - extra data tree := []repository.TreeEntry{ {ObjectType: repository.Blob, Hash: emptyBlobHash, Name: fmt.Sprintf(versionEntryPrefix+"%d", def.FormatVersion)}, @@ -133,6 +132,17 @@ func (opp *operationPack) Write(def Definition, repo repository.Repo, parentComm Name: fmt.Sprintf(createClockEntryPrefix+"%d", opp.CreateTime), }) } + if extraTree := opp.makeExtraTree(); len(extraTree) > 0 { + extraTreeHash, err := repo.StoreTree(extraTree) + if err != nil { + return "", err + } + tree = append(tree, repository.TreeEntry{ + ObjectType: repository.Tree, + Hash: extraTreeHash, + Name: extraEntryName, + }) + } // Store the tree treeHash, err := repo.StoreTree(tree) @@ -163,6 +173,35 @@ func (opp *operationPack) Write(def Definition, repo repository.Repo, parentComm return commitHash, nil } +func (opp *operationPack) makeExtraTree() []repository.TreeEntry { + var tree []repository.TreeEntry + counter := 0 + added := make(map[repository.Hash]interface{}) + + for _, ops := range opp.Operations { + ops, ok := ops.(OperationWithFiles) + if !ok { + continue + } + + for _, file := range ops.GetFiles() { + if _, has := added[file]; !has { + tree = append(tree, repository.TreeEntry{ + ObjectType: repository.Blob, + Hash: file, + // The name is not important here, we only need to + // reference the blob. + Name: fmt.Sprintf("file%d", counter), + }) + counter++ + added[file] = struct{}{} + } + } + } + + return tree +} + // readOperationPack read the operationPack encoded in git at the given Tree hash. // // Validity of the Lamport clocks is left for the caller to decide. diff --git a/entity/dag/operation_pack_test.go b/entity/dag/operation_pack_test.go index a12382af..0fe98dc7 100644 --- a/entity/dag/operation_pack_test.go +++ b/entity/dag/operation_pack_test.go @@ -1,6 +1,7 @@ package dag import ( + "math/rand" "testing" "github.com/stretchr/testify/require" @@ -11,10 +12,16 @@ import ( func TestOperationPackReadWrite(t *testing.T) { repo, id1, _, resolver, def := makeTestContext() + blobHash1, err := repo.StoreData(randomData()) + require.NoError(t, err) + + blobHash2, err := repo.StoreData(randomData()) + require.NoError(t, err) + opp := &operationPack{ Author: id1, Operations: []Operation{ - newOp1(id1, "foo"), + newOp1(id1, "foo", blobHash1, blobHash2), newOp2(id1, "bar"), }, CreateTime: 123, @@ -36,7 +43,7 @@ func TestOperationPackReadWrite(t *testing.T) { opp3 := &operationPack{ Author: id1, Operations: []Operation{ - newOp1(id1, "foo"), + newOp1(id1, "foo", blobHash1, blobHash2), newOp2(id1, "bar"), }, CreateTime: 123, @@ -86,3 +93,12 @@ func TestOperationPackSignedReadWrite(t *testing.T) { } require.Equal(t, opp.Id(), opp3.Id()) } + +func randomData() []byte { + var letterRunes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + b := make([]byte, 32) + for i := range b { + b[i] = letterRunes[rand.Intn(len(letterRunes))] + } + return b +} -- cgit From 5215634d0dca37c545904fbc8a12ddd9b8eb72df Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 21 Mar 2021 18:22:04 +0100 Subject: entity: add support for storing files --- entity/dag/common_test.go | 6 ++-- entity/dag/operation_pack_test.go | 71 ++++++++++++++++++++++++++++++++++----- 2 files changed, 66 insertions(+), 11 deletions(-) (limited to 'entity') diff --git a/entity/dag/common_test.go b/entity/dag/common_test.go index 1898451d..25289b76 100644 --- a/entity/dag/common_test.go +++ b/entity/dag/common_test.go @@ -21,9 +21,9 @@ import ( type op1 struct { author identity.Interface - OperationType int `json:"type"` - Field1 string `json:"field_1"` - Files []repository.Hash + OperationType int `json:"type"` + Field1 string `json:"field_1"` + Files []repository.Hash `json:"files"` } func newOp1(author identity.Interface, field1 string, files ...repository.Hash) *op1 { diff --git a/entity/dag/operation_pack_test.go b/entity/dag/operation_pack_test.go index 0fe98dc7..73960800 100644 --- a/entity/dag/operation_pack_test.go +++ b/entity/dag/operation_pack_test.go @@ -7,21 +7,16 @@ import ( "github.com/stretchr/testify/require" "github.com/MichaelMure/git-bug/identity" + "github.com/MichaelMure/git-bug/repository" ) func TestOperationPackReadWrite(t *testing.T) { repo, id1, _, resolver, def := makeTestContext() - blobHash1, err := repo.StoreData(randomData()) - require.NoError(t, err) - - blobHash2, err := repo.StoreData(randomData()) - require.NoError(t, err) - opp := &operationPack{ Author: id1, Operations: []Operation{ - newOp1(id1, "foo", blobHash1, blobHash2), + newOp1(id1, "foo"), newOp2(id1, "bar"), }, CreateTime: 123, @@ -43,7 +38,7 @@ func TestOperationPackReadWrite(t *testing.T) { opp3 := &operationPack{ Author: id1, Operations: []Operation{ - newOp1(id1, "foo", blobHash1, blobHash2), + newOp1(id1, "foo"), newOp2(id1, "bar"), }, CreateTime: 123, @@ -94,6 +89,66 @@ func TestOperationPackSignedReadWrite(t *testing.T) { require.Equal(t, opp.Id(), opp3.Id()) } +func TestOperationPackFiles(t *testing.T) { + repo, id1, _, resolver, def := makeTestContext() + + blobHash1, err := repo.StoreData(randomData()) + require.NoError(t, err) + + blobHash2, err := repo.StoreData(randomData()) + require.NoError(t, err) + + opp := &operationPack{ + Author: id1, + Operations: []Operation{ + newOp1(id1, "foo", blobHash1, blobHash2), + newOp1(id1, "foo", blobHash2), + }, + CreateTime: 123, + EditTime: 456, + } + + commitHash, err := opp.Write(def, repo) + require.NoError(t, err) + + commit, err := repo.ReadCommit(commitHash) + require.NoError(t, err) + + opp2, err := readOperationPack(def, repo, resolver, commit) + require.NoError(t, err) + + require.Equal(t, opp, opp2) + + require.ElementsMatch(t, opp2.Operations[0].(OperationWithFiles).GetFiles(), []repository.Hash{ + blobHash1, + blobHash2, + }) + require.ElementsMatch(t, opp2.Operations[1].(OperationWithFiles).GetFiles(), []repository.Hash{ + blobHash2, + }) + + tree, err := repo.ReadTree(commit.TreeHash) + require.NoError(t, err) + + extraTreeHash, ok := repository.SearchTreeEntry(tree, extraEntryName) + require.True(t, ok) + + extraTree, err := repo.ReadTree(extraTreeHash.Hash) + require.NoError(t, err) + require.ElementsMatch(t, extraTree, []repository.TreeEntry{ + { + ObjectType: repository.Blob, + Hash: blobHash1, + Name: "file0", + }, + { + ObjectType: repository.Blob, + Hash: blobHash2, + Name: "file1", + }, + }) +} + func randomData() []byte { var letterRunes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" b := make([]byte, 32) -- cgit From cb9b06551ddc1fae33046733f79ede20f8d09f9a Mon Sep 17 00:00:00 2001 From: Michael Muré Date: Sun, 4 Apr 2021 11:23:04 +0200 Subject: entity: more comments --- entity/dag/operation.go | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'entity') diff --git a/entity/dag/operation.go b/entity/dag/operation.go index 1bfb3d3d..a320859f 100644 --- a/entity/dag/operation.go +++ b/entity/dag/operation.go @@ -40,5 +40,9 @@ type OperationWithFiles interface { Operation // GetFiles return the files needed by this operation + // This implies that the Operation maintain and store internally the references to those files. This is how + // this information is read later, when loading from storage. + // For example, an operation that has a text value referencing some files would maintain a mapping (text ref --> + // hash). GetFiles() []repository.Hash } -- cgit