diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2017-02-21 16:04:37 +0100 |
---|---|---|
committer | Máximo Cuadros <mcuadros@gmail.com> | 2017-02-21 16:04:37 +0100 |
commit | 790fbdaddc3c9a434f2ad97d9eb56db9b6c99495 (patch) | |
tree | 3ddb2f430ee3c958f0650cb7db6a9df44b1e1361 /utils/merkletrie | |
parent | ed288b30de1ac3dcb3ce675c4b9af89eb4e6fcba (diff) | |
parent | 0b8b8da617d5a077f282e57d0300dc106a604236 (diff) | |
download | go-git-790fbdaddc3c9a434f2ad97d9eb56db9b6c99495.tar.gz |
rebase master
Diffstat (limited to 'utils/merkletrie')
-rw-r--r-- | utils/merkletrie/change.go | 149 | ||||
-rw-r--r-- | utils/merkletrie/change_test.go | 76 | ||||
-rw-r--r-- | utils/merkletrie/difftree.go | 404 | ||||
-rw-r--r-- | utils/merkletrie/difftree_test.go | 429 | ||||
-rw-r--r-- | utils/merkletrie/doubleiter.go | 187 |
5 files changed, 1245 insertions, 0 deletions
diff --git a/utils/merkletrie/change.go b/utils/merkletrie/change.go new file mode 100644 index 0000000..e6c99f6 --- /dev/null +++ b/utils/merkletrie/change.go @@ -0,0 +1,149 @@ +package merkletrie + +import ( + "fmt" + "io" + + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +// Action values represent the kind of things a Change can represent: +// insertion, deletions or modifications of files. +type Action int + +// The set of possible actions in a change. +const ( + _ Action = iota + Insert + Delete + Modify +) + +// String returns the action as a human readable text. +func (a Action) String() string { + switch a { + case Insert: + return "Insert" + case Delete: + return "Delete" + case Modify: + return "Modify" + default: + panic(fmt.Sprintf("unsupported action: %d", a)) + } +} + +// A Change value represent how a noder has change between to merkletries. +type Change struct { + // The noder before the change or nil if it was inserted. + From noder.Path + // The noder after the change or nil if it was deleted. + To noder.Path +} + +// Action is convenience method that returns what Action c represents. +func (c *Change) Action() (Action, error) { + if c.From == nil && c.To == nil { + return Action(0), fmt.Errorf("malformed change: nil from and to") + } + if c.From == nil { + return Insert, nil + } + if c.To == nil { + return Delete, nil + } + + return Modify, nil +} + +// NewInsert returns a new Change representing the insertion of n. +func NewInsert(n noder.Path) Change { return Change{To: n} } + +// NewDelete returns a new Change representing the deletion of n. +func NewDelete(n noder.Path) Change { return Change{From: n} } + +// NewModify returns a new Change representing that a has been modified and +// it is now b. +func NewModify(a, b noder.Path) Change { + return Change{ + From: a, + To: b, + } +} + +// String returns a single change in human readable form, using the +// format: '<' + action + space + path + '>'. The contents of the file +// before or after the change are not included in this format. +// +// Example: inserting a file at the path a/b/c.txt will return "<Insert +// a/b/c.txt>". +func (c Change) String() string { + action, err := c.Action() + if err != nil { + panic(err) + } + + var path string + if action == Delete { + path = c.From.String() + } else { + path = c.To.String() + } + + return fmt.Sprintf("<%s %s>", action, path) +} + +// Changes is a list of changes between to merkletries. +type Changes []Change + +// NewChanges returns an empty list of changes. +func NewChanges() Changes { + return Changes{} +} + +// Add adds the change c to the list of changes. +func (l *Changes) Add(c Change) { + *l = append(*l, c) +} + +// AddRecursiveInsert adds the required changes to insert all the +// file-like noders found in root, recursively. +func (l *Changes) AddRecursiveInsert(root noder.Path) error { + return l.addRecursive(root, NewInsert) +} + +// AddRecursiveDelete adds the required changes to delete all the +// file-like noders found in root, recursively. +func (l *Changes) AddRecursiveDelete(root noder.Path) error { + return l.addRecursive(root, NewDelete) +} + +type noderToChangeFn func(noder.Path) Change // NewInsert or NewDelete + +func (l *Changes) addRecursive(root noder.Path, ctor noderToChangeFn) error { + if !root.IsDir() { + l.Add(ctor(root)) + return nil + } + + i, err := NewIterFromPath(root) + if err != nil { + return err + } + + var current noder.Path + for { + if current, err = i.Step(); err != nil { + if err == io.EOF { + break + } + return err + } + if current.IsDir() { + continue + } + l.Add(ctor(current)) + } + + return nil +} diff --git a/utils/merkletrie/change_test.go b/utils/merkletrie/change_test.go new file mode 100644 index 0000000..4f908ce --- /dev/null +++ b/utils/merkletrie/change_test.go @@ -0,0 +1,76 @@ +package merkletrie_test + +import ( + "srcd.works/go-git.v4/utils/merkletrie" + "srcd.works/go-git.v4/utils/merkletrie/internal/fsnoder" + "srcd.works/go-git.v4/utils/merkletrie/noder" + + . "gopkg.in/check.v1" +) + +type ChangeSuite struct{} + +var _ = Suite(&ChangeSuite{}) + +func (s *ChangeSuite) TestActionString(c *C) { + action := merkletrie.Insert + c.Assert(action.String(), Equals, "Insert") + + action = merkletrie.Delete + c.Assert(action.String(), Equals, "Delete") + + action = merkletrie.Modify + c.Assert(action.String(), Equals, "Modify") +} + +func (s *ChangeSuite) TestUnsupportedAction(c *C) { + a := merkletrie.Action(42) + c.Assert(a.String, PanicMatches, "unsupported action.*") +} + +func (s ChangeSuite) TestNewInsert(c *C) { + tree, err := fsnoder.New("(a(b(z<>)))") + c.Assert(err, IsNil) + path := find(c, tree, "z") + change := merkletrie.NewInsert(path) + c.Assert(change.String(), Equals, "<Insert a/b/z>") + + shortPath := noder.Path([]noder.Noder{path.Last()}) + change = merkletrie.NewInsert(shortPath) + c.Assert(change.String(), Equals, "<Insert z>") +} + +func (s ChangeSuite) TestNewDelete(c *C) { + tree, err := fsnoder.New("(a(b(z<>)))") + c.Assert(err, IsNil) + path := find(c, tree, "z") + change := merkletrie.NewDelete(path) + c.Assert(change.String(), Equals, "<Delete a/b/z>") + + shortPath := noder.Path([]noder.Noder{path.Last()}) + change = merkletrie.NewDelete(shortPath) + c.Assert(change.String(), Equals, "<Delete z>") +} + +func (s ChangeSuite) TestNewModify(c *C) { + tree1, err := fsnoder.New("(a(b(z<>)))") + c.Assert(err, IsNil) + path1 := find(c, tree1, "z") + + tree2, err := fsnoder.New("(a(b(z<1>)))") + c.Assert(err, IsNil) + path2 := find(c, tree2, "z") + + change := merkletrie.NewModify(path1, path2) + c.Assert(change.String(), Equals, "<Modify a/b/z>") + + shortPath1 := noder.Path([]noder.Noder{path1.Last()}) + shortPath2 := noder.Path([]noder.Noder{path2.Last()}) + change = merkletrie.NewModify(shortPath1, shortPath2) + c.Assert(change.String(), Equals, "<Modify z>") +} + +func (s ChangeSuite) TestMalformedChange(c *C) { + change := merkletrie.Change{} + c.Assert(change.String, PanicMatches, "malformed change.*") +} diff --git a/utils/merkletrie/difftree.go b/utils/merkletrie/difftree.go new file mode 100644 index 0000000..1a626cd --- /dev/null +++ b/utils/merkletrie/difftree.go @@ -0,0 +1,404 @@ +package merkletrie + +// The focus of this difftree implementation is to save time by +// skipping whole directories if their hash is the same in both +// trees. +// +// The diff algorithm implemented here is based on the doubleiter +// type defined in this same package; we will iterate over both +// trees at the same time, while comparing the current noders in +// each iterator. Depending on how they differ we will output the +// corresponding chages and move the iterators further over both +// trees. +// +// The table bellow show all the possible comparison results, along +// with what changes should we produce and how to advance the +// iterators. +// +// The table is implemented by the switches in this function, +// diffTwoNodes, diffTwoNodesSameName and diffTwoDirs. +// +// Many Bothans died to bring us this information, make sure you +// understand the table before modifying this code. + +// # Cases +// +// When comparing noders in both trees you will found yourself in +// one of 169 possible cases, but if we ignore moves, we can +// simplify a lot the search space into the following table: +// +// - "-": nothing, no file or directory +// - a<>: an empty file named "a". +// - a<1>: a file named "a", with "1" as its contents. +// - a<2>: a file named "a", with "2" as its contents. +// - a(): an empty dir named "a". +// - a(...): a dir named "a", with some files and/or dirs inside (possibly +// empty). +// - a(;;;): a dir named "a", with some other files and/or dirs inside +// (possibly empty), which different from the ones in "a(...)". +// +// \ to - a<> a<1> a<2> a() a(...) a(;;;) +// from \ +// - 00 01 02 03 04 05 06 +// a<> 10 11 12 13 14 15 16 +// a<1> 20 21 22 23 24 25 26 +// a<2> 30 31 32 33 34 35 36 +// a() 40 41 42 43 44 45 46 +// a(...) 50 51 52 53 54 55 56 +// a(;;;) 60 61 62 63 64 65 66 +// +// Every (from, to) combination in the table is a special case, but +// some of them can be merged into some more general cases, for +// instance 11 and 22 can be merged into the general case: both +// noders are equal. +// +// Here is a full list of all the cases that are similar and how to +// merge them together into more general cases. Each general case +// is labeled wiht an uppercase letter for further reference, and it +// is followed by the pseudocode of the checks you have to perfrom +// on both noders to see if you are in such a case, the actions to +// perform (i.e. what changes to output) and how to advance the +// iterators of each tree to continue the comparison process. +// +// ## A. Impossible: 00 +// +// ## B. Same thing on both sides: 11, 22, 33, 44, 55, 66 +// - check: `SameName() && SameHash()` +// - action: do nothing. +// - advance: `FromNext(); ToNext()` +// +// ### C. To was created: 01, 02, 03, 04, 05, 06 +// - check: `DifferentName() && ToBeforeFrom()` +// - action: inserRecursively(to) +// - advance: `ToNext()` +// +// ### D. From was deleted: 10, 20, 30, 40, 50, 60 +// - check: `DifferentName() && FromBeforeTo()` +// - action: `DeleteRecursively(from)` +// - advance: `FromNext()` +// +// ### E. Empty file to file with contents: 12, 13 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// ToIsFile() && FromIsEmpty()` +// - action: `modifyFile(from, to)` +// - advance: `FromNext()` or `FromStep()` +// +// ### E'. file with contents to empty file: 21, 31 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// ToIsFile() && ToIsEmpty()` +// - action: `modifyFile(from, to)` +// - advance: `FromNext()` or `FromStep()` +// +// ### F. empty file to empty dir with the same name: 14 +// - check: `SameName() && FromIsFile() && FromIsEmpty() && +// ToIsDir() && ToIsEmpty()` +// - action: `DeleteFile(from); InsertEmptyDir(to)` +// - advance: `FromNext(); ToNext()` +// +// ### F'. empty dir to empty file of the same name: 41 +// - check: `SameName() && FromIsDir() && FromIsEmpty && +// ToIsFile() && ToIsEmpty()` +// - action: `DeleteEmptyDir(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` or step for any of them. +// +// ### G. empty file to non-empty dir of the same name: 15, 16 +// - check: `SameName() && FromIsFile() && ToIsDir() && +// FromIsEmpty() && ToIsNotEmpty()` +// - action: `DeleteFile(from); InsertDirRecursively(to)` +// - advance: `FromNext(); ToNext()` +// +// ### G'. non-empty dir to empty file of the same name: 51, 61 +// - check: `SameName() && FromIsDir() && FromIsNotEmpty() && +// ToIsFile() && FromIsEmpty()` +// - action: `DeleteDirRecursively(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` +// +// ### H. modify file contents: 23, 32 +// - check: `SameName() && FromIsFile() && ToIsFile() && +// FromIsNotEmpty() && ToIsNotEmpty()` +// - action: `ModifyFile(from, to)` +// - advance: `FromNext(); ToNext()` +// +// ### I. file with contents to empty dir: 24, 34 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// FromIsNotEmpty() && ToIsDir() && ToIsEmpty()` +// - action: `DeleteFile(from); InsertEmptyDir(to)` +// - advance: `FromNext(); ToNext()` +// +// ### I'. empty dir to file with contents: 42, 43 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsEmpty() && ToIsFile() && ToIsEmpty()` +// - action: `DeleteDir(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` +// +// ### J. file with contents to dir with contetns: 25, 26, 35, 36 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: `DeleteFile(from); InsertDirRecursively(to)` +// - advance: `FromNext(); ToNext()` +// +// ### J'. dir with contetns to file with contents: 52, 62, 53, 63 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsNotEmpty() && ToIsFile() && ToIsNotEmpty()` +// - action: `DeleteDirRecursively(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` +// +// ### K. empty dir to dir with contents: 45, 46 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: `InsertChildrenRecursively(to)` +// - advance: `FromNext(); ToNext()` +// +// ### K'. dir with contents to empty dir: 54, 64 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: `DeleteChildrenRecursively(from)` +// - advance: `FromNext(); ToNext()` +// +// ### L. dir with contents to dir with different contents: 56, 65 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: nothing +// - advance: `FromStep(); ToStep()` +// +// + +// All these cases can be further simplified by a truth table +// reduction process, in which we gather similar checks together to +// make the final code easier to read and understand. +// +// The first 6 columns are the outputs of the checks to perform on +// both noders. I have labeled them 1 to 6, this is what they mean: +// +// 1: SameName() +// 2: SameHash() +// 3: FromIsDir() +// 4: ToIsDir() +// 5: FromIsEmpty() +// 6: ToIsEmpty() +// +// The from and to columns are a fsnoder example of the elements +// that you will find on each tree under the specified comparison +// results (columns 1 to 6). +// +// The type column identifies the case we are into, from the list above. +// +// The type' column identifies the new set of reduced cases, using +// lowercase letters, and they are explained after the table. +// +// The last column is the set of actions and advances for each case. +// +// "---" means impossible except in case of hash collision. +// +// advance meaning: +// - NN: from.Next(); to.Next() +// - SS: from.Step(); to.Step() +// +// 1 2 3 4 5 6 | from | to |type|type'|action ; advance +// ------------+--------+--------+----+------------------------------------ +// 0 0 0 0 0 0 | | | | | if !SameName() { +// . | | | | | if FromBeforeTo() { +// . | | | D | d | delete(from); from.Next() +// . | | | | | } else { +// . | | | C | c | insert(to); to.Next() +// . | | | | | } +// 0 1 1 1 1 1 | | | | | } +// 1 0 0 0 0 0 | a<1> | a<2> | H | e | modify(from, to); NN +// 1 0 0 0 0 1 | a<1> | a<> | E' | e | modify(from, to); NN +// 1 0 0 0 1 0 | a<> | a<1> | E | e | modify(from, to); NN +// 1 0 0 0 1 1 | ---- | ---- | | e | +// 1 0 0 1 0 0 | a<1> | a(...) | J | f | delete(from); insert(to); NN +// 1 0 0 1 0 1 | a<1> | a() | I | f | delete(from); insert(to); NN +// 1 0 0 1 1 0 | a<> | a(...) | G | f | delete(from); insert(to); NN +// 1 0 0 1 1 1 | a<> | a() | F | f | delete(from); insert(to); NN +// 1 0 1 0 0 0 | a(...) | a<1> | J' | f | delete(from); insert(to); NN +// 1 0 1 0 0 1 | a(...) | a<> | G' | f | delete(from); insert(to); NN +// 1 0 1 0 1 0 | a() | a<1> | I' | f | delete(from); insert(to); NN +// 1 0 1 0 1 1 | a() | a<> | F' | f | delete(from); insert(to); NN +// 1 0 1 1 0 0 | a(...) | a(;;;) | L | g | nothing; SS +// 1 0 1 1 0 1 | a(...) | a() | K' | h | deleteChidren(from); NN +// 1 0 1 1 1 0 | a() | a(...) | K | i | insertChildren(to); NN +// 1 0 1 1 1 1 | ---- | ---- | | | +// 1 1 0 0 0 0 | a<1> | a<1> | B | b | nothing; NN +// 1 1 0 0 0 1 | ---- | ---- | | b | +// 1 1 0 0 1 0 | ---- | ---- | | b | +// 1 1 0 0 1 1 | a<> | a<> | B | b | nothing; NN +// 1 1 0 1 0 0 | ---- | ---- | | b | +// 1 1 0 1 0 1 | ---- | ---- | | b | +// 1 1 0 1 1 0 | ---- | ---- | | b | +// 1 1 0 1 1 1 | ---- | ---- | | b | +// 1 1 1 0 0 0 | ---- | ---- | | b | +// 1 1 1 0 0 1 | ---- | ---- | | b | +// 1 1 1 0 1 0 | ---- | ---- | | b | +// 1 1 1 0 1 1 | ---- | ---- | | b | +// 1 1 1 1 0 0 | a(...) | a(...) | B | b | nothing; NN +// 1 1 1 1 0 1 | ---- | ---- | | b | +// 1 1 1 1 1 0 | ---- | ---- | | b | +// 1 1 1 1 1 1 | a() | a() | B | b | nothing; NN +// +// c and d: +// if !SameName() +// d if FromBeforeTo() +// c else +// b: SameName) && sameHash() +// e: SameName() && !sameHash() && BothAreFiles() +// f: SameName() && !sameHash() && FileAndDir() +// g: SameName() && !sameHash() && BothAreDirs() && NoneIsEmpty +// i: SameName() && !sameHash() && BothAreDirs() && FromIsEmpty +// h: else of i + +import ( + "fmt" + "strings" + + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +// DiffTree calculates the list of changes between two merkletries. It +// uses the provided hashEqual callback to compare noders. +func DiffTree(fromTree, toTree noder.Noder, + hashEqual noder.Equal) (Changes, error) { + ret := NewChanges() + + ii, err := newDoubleIter(fromTree, toTree, hashEqual) + if err != nil { + return nil, err + } + + for { + from := ii.from.current + to := ii.to.current + + switch r := ii.remaining(); r { + case noMoreNoders: + return ret, nil + case onlyFromRemains: + if err = ret.AddRecursiveDelete(from); err != nil { + return nil, err + } + if err = ii.nextFrom(); err != nil { + return nil, err + } + case onlyToRemains: + if err = ret.AddRecursiveInsert(to); err != nil { + return nil, err + } + if err = ii.nextTo(); err != nil { + return nil, err + } + case bothHaveNodes: + if err = diffNodes(&ret, ii); err != nil { + return nil, err + } + default: + panic(fmt.Sprintf("unknown remaining value: %d", r)) + } + } +} + +func diffNodes(changes *Changes, ii *doubleIter) error { + from := ii.from.current + to := ii.to.current + var err error + + // compare their full paths as strings + switch strings.Compare(from.String(), to.String()) { + case -1: + if err = changes.AddRecursiveDelete(from); err != nil { + return err + } + if err = ii.nextFrom(); err != nil { + return err + } + case 1: + if err = changes.AddRecursiveInsert(to); err != nil { + return err + } + if err = ii.nextTo(); err != nil { + return err + } + default: + if err := diffNodesSameName(changes, ii); err != nil { + return err + } + } + + return nil +} + +func diffNodesSameName(changes *Changes, ii *doubleIter) error { + from := ii.from.current + to := ii.to.current + + status, err := ii.compare() + if err != nil { + return err + } + + switch { + case status.sameHash: + // do nothing + if err = ii.nextBoth(); err != nil { + return err + } + case status.bothAreFiles: + changes.Add(NewModify(from, to)) + if err = ii.nextBoth(); err != nil { + return err + } + case status.fileAndDir: + if err = changes.AddRecursiveDelete(from); err != nil { + return err + } + if err = changes.AddRecursiveInsert(to); err != nil { + return err + } + if err = ii.nextBoth(); err != nil { + return err + } + case status.bothAreDirs: + if err = diffDirs(changes, ii); err != nil { + return err + } + default: + return fmt.Errorf("bad status from double iterator") + } + + return nil +} + +func diffDirs(changes *Changes, ii *doubleIter) error { + from := ii.from.current + to := ii.to.current + + status, err := ii.compare() + if err != nil { + return err + } + + switch { + case status.fromIsEmptyDir: + if err = changes.AddRecursiveInsert(to); err != nil { + return err + } + if err = ii.nextBoth(); err != nil { + return err + } + case status.toIsEmptyDir: + if err = changes.AddRecursiveDelete(from); err != nil { + return err + } + if err = ii.nextBoth(); err != nil { + return err + } + case !status.fromIsEmptyDir && !status.toIsEmptyDir: + // do nothing + if err = ii.stepBoth(); err != nil { + return err + } + default: + return fmt.Errorf("both dirs are empty but has different hash") + } + + return nil +} diff --git a/utils/merkletrie/difftree_test.go b/utils/merkletrie/difftree_test.go new file mode 100644 index 0000000..6a193af --- /dev/null +++ b/utils/merkletrie/difftree_test.go @@ -0,0 +1,429 @@ +package merkletrie_test + +import ( + "bytes" + "fmt" + "reflect" + "sort" + "strings" + "testing" + "unicode" + + "srcd.works/go-git.v4/utils/merkletrie" + "srcd.works/go-git.v4/utils/merkletrie/internal/fsnoder" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type DiffTreeSuite struct{} + +var _ = Suite(&DiffTreeSuite{}) + +type diffTreeTest struct { + from string + to string + expected string +} + +func (t diffTreeTest) innerRun(c *C, context string, reverse bool) { + comment := Commentf("\n%s", context) + if reverse { + comment = Commentf("%s [REVERSED]", comment.CheckCommentString()) + } + + a, err := fsnoder.New(t.from) + c.Assert(err, IsNil, comment) + comment = Commentf("%s\n\t from = %s", comment.CheckCommentString(), a) + + b, err := fsnoder.New(t.to) + c.Assert(err, IsNil, comment) + comment = Commentf("%s\n\t to = %s", comment.CheckCommentString(), b) + + expected, err := newChangesFromString(t.expected) + c.Assert(err, IsNil, comment) + + if reverse { + a, b = b, a + expected = expected.reverse() + } + comment = Commentf("%s\n\texpected = %s", comment.CheckCommentString(), expected) + + results, err := merkletrie.DiffTree(a, b, fsnoder.HashEqual) + c.Assert(err, IsNil, comment) + + obtained, err := newChanges(results) + c.Assert(err, IsNil, comment) + + comment = Commentf("%s\n\tobtained = %s", comment.CheckCommentString(), obtained) + + c.Assert(obtained, changesEquals, expected, comment) +} + +func (t diffTreeTest) run(c *C, context string) { + t.innerRun(c, context, false) + t.innerRun(c, context, true) +} + +type change struct { + merkletrie.Action + path string +} + +func (c change) String() string { + return fmt.Sprintf("<%s %s>", c.Action, c.path) +} + +func (c change) reverse() change { + ret := change{ + path: c.path, + } + + switch c.Action { + case merkletrie.Insert: + ret.Action = merkletrie.Delete + case merkletrie.Delete: + ret.Action = merkletrie.Insert + case merkletrie.Modify: + ret.Action = merkletrie.Modify + default: + panic(fmt.Sprintf("unknown action type: %d", c.Action)) + } + + return ret +} + +type changes []change + +func newChanges(original merkletrie.Changes) (changes, error) { + ret := make(changes, len(original)) + for i, c := range original { + action, err := c.Action() + if err != nil { + return nil, err + } + switch action { + case merkletrie.Insert: + ret[i] = change{ + Action: merkletrie.Insert, + path: c.To.String(), + } + case merkletrie.Delete: + ret[i] = change{ + Action: merkletrie.Delete, + path: c.From.String(), + } + case merkletrie.Modify: + ret[i] = change{ + Action: merkletrie.Modify, + path: c.From.String(), + } + default: + panic(fmt.Sprintf("unsupported action %d", c.Action)) + } + } + + return ret, nil +} + +func newChangesFromString(s string) (changes, error) { + ret := make([]change, 0) + + s = strings.TrimSpace(s) + s = removeDuplicatedSpace(s) + s = turnSpaceIntoLiteralSpace(s) + + if s == "" { + return ret, nil + } + + for _, chunk := range strings.Split(s, " ") { + change := change{ + path: string(chunk[1:]), + } + + switch chunk[0] { + case '+': + change.Action = merkletrie.Insert + case '-': + change.Action = merkletrie.Delete + case '*': + change.Action = merkletrie.Modify + default: + panic(fmt.Sprintf("unsupported action descriptor %q", chunk[0])) + } + + ret = append(ret, change) + } + + return ret, nil +} + +func removeDuplicatedSpace(s string) string { + var buf bytes.Buffer + + var lastWasSpace, currentIsSpace bool + for _, r := range s { + currentIsSpace = unicode.IsSpace(r) + + if lastWasSpace && currentIsSpace { + continue + } + lastWasSpace = currentIsSpace + + buf.WriteRune(r) + } + + return buf.String() +} + +func turnSpaceIntoLiteralSpace(s string) string { + return strings.Map( + func(r rune) rune { + if unicode.IsSpace(r) { + return ' ' + } + return r + }, s) +} + +func (cc changes) Len() int { return len(cc) } +func (cc changes) Swap(i, j int) { cc[i], cc[j] = cc[j], cc[i] } +func (cc changes) Less(i, j int) bool { return strings.Compare(cc[i].String(), cc[j].String()) < 0 } + +func (cc changes) equals(other changes) bool { + sort.Sort(cc) + sort.Sort(other) + return reflect.DeepEqual(cc, other) +} + +func (cc changes) String() string { + var buf bytes.Buffer + fmt.Fprintf(&buf, "len(%d) [", len(cc)) + sep := "" + for _, c := range cc { + fmt.Fprintf(&buf, "%s%s", sep, c) + sep = ", " + } + buf.WriteByte(']') + return buf.String() +} + +func (cc changes) reverse() changes { + ret := make(changes, len(cc)) + for i, c := range cc { + ret[i] = c.reverse() + } + + return ret +} + +type changesEqualsChecker struct { + *CheckerInfo +} + +var changesEquals Checker = &changesEqualsChecker{ + &CheckerInfo{Name: "changesEquals", Params: []string{"obtained", "expected"}}, +} + +func (checker *changesEqualsChecker) Check(params []interface{}, names []string) (result bool, error string) { + a, ok := params[0].(changes) + if !ok { + return false, "first parameter must be a changes" + } + b, ok := params[1].(changes) + if !ok { + return false, "second parameter must be a changes" + } + + return a.equals(b), "" +} + +func do(c *C, list []diffTreeTest) { + for i, t := range list { + t.run(c, fmt.Sprintf("test #%d:", i)) + } +} + +func (s *DiffTreeSuite) TestEmptyVsEmpty(c *C) { + do(c, []diffTreeTest{ + {"()", "()", ""}, + {"A()", "A()", ""}, + {"A()", "()", ""}, + {"A()", "B()", ""}, + }) +} + +func (s *DiffTreeSuite) TestBasicCases(c *C) { + do(c, []diffTreeTest{ + {"()", "()", ""}, + {"()", "(a<>)", "+a"}, + {"()", "(a<1>)", "+a"}, + {"()", "(a())", ""}, + {"()", "(a(b()))", ""}, + {"()", "(a(b<>))", "+a/b"}, + {"()", "(a(b<1>))", "+a/b"}, + {"(a<>)", "(a<>)", ""}, + {"(a<>)", "(a<1>)", "*a"}, + {"(a<>)", "(a())", "-a"}, + {"(a<>)", "(a(b()))", "-a"}, + {"(a<>)", "(a(b<>))", "-a +a/b"}, + {"(a<>)", "(a(b<1>))", "-a +a/b"}, + {"(a<>)", "(c())", "-a"}, + {"(a<>)", "(c(b()))", "-a"}, + {"(a<>)", "(c(b<>))", "-a +c/b"}, + {"(a<>)", "(c(b<1>))", "-a +c/b"}, + {"(a<>)", "(c(a()))", "-a"}, + {"(a<>)", "(c(a<>))", "-a +c/a"}, + {"(a<>)", "(c(a<1>))", "-a +c/a"}, + {"(a<1>)", "(a<1>)", ""}, + {"(a<1>)", "(a<2>)", "*a"}, + {"(a<1>)", "(b<1>)", "-a +b"}, + {"(a<1>)", "(b<2>)", "-a +b"}, + {"(a<1>)", "(a())", "-a"}, + {"(a<1>)", "(a(b()))", "-a"}, + {"(a<1>)", "(a(b<>))", "-a +a/b"}, + {"(a<1>)", "(a(b<1>))", "-a +a/b"}, + {"(a<1>)", "(a(b<2>))", "-a +a/b"}, + {"(a<1>)", "(c())", "-a"}, + {"(a<1>)", "(c(b()))", "-a"}, + {"(a<1>)", "(c(b<>))", "-a +c/b"}, + {"(a<1>)", "(c(b<1>))", "-a +c/b"}, + {"(a<1>)", "(c(b<2>))", "-a +c/b"}, + {"(a<1>)", "(c())", "-a"}, + {"(a<1>)", "(c(a()))", "-a"}, + {"(a<1>)", "(c(a<>))", "-a +c/a"}, + {"(a<1>)", "(c(a<1>))", "-a +c/a"}, + {"(a<1>)", "(c(a<2>))", "-a +c/a"}, + {"(a())", "(a())", ""}, + {"(a())", "(b())", ""}, + {"(a())", "(a(b()))", ""}, + {"(a())", "(b(a()))", ""}, + {"(a())", "(a(b<>))", "+a/b"}, + {"(a())", "(a(b<1>))", "+a/b"}, + {"(a())", "(b(a<>))", "+b/a"}, + {"(a())", "(b(a<1>))", "+b/a"}, + }) +} + +func (s *DiffTreeSuite) TestHorizontals(c *C) { + do(c, []diffTreeTest{ + {"()", "(a<> b<>)", "+a +b"}, + {"()", "(a<> b<1>)", "+a +b"}, + {"()", "(a<> b())", "+a"}, + {"()", "(a() b<>)", "+b"}, + {"()", "(a<1> b<>)", "+a +b"}, + {"()", "(a<1> b<1>)", "+a +b"}, + {"()", "(a<1> b<2>)", "+a +b"}, + {"()", "(a<1> b())", "+a"}, + {"()", "(a() b<1>)", "+b"}, + {"()", "(a() b())", ""}, + {"()", "(a<> b<> c<> d<>)", "+a +b +c +d"}, + {"()", "(a<> b<1> c() d<> e<2> f())", "+a +b +d +e"}, + }) +} + +func (s *DiffTreeSuite) TestVerticals(c *C) { + do(c, []diffTreeTest{ + {"()", "(z<>)", "+z"}, + {"()", "(a(z<>))", "+a/z"}, + {"()", "(a(b(z<>)))", "+a/b/z"}, + {"()", "(a(b(c(z<>))))", "+a/b/c/z"}, + {"()", "(a(b(c(d(z<>)))))", "+a/b/c/d/z"}, + {"()", "(a(b(c(d(z<1>)))))", "+a/b/c/d/z"}, + }) +} + +func (s *DiffTreeSuite) TestSingleInserts(c *C) { + do(c, []diffTreeTest{ + {"()", "(z<>)", "+z"}, + {"(a())", "(a(z<>))", "+a/z"}, + {"(a())", "(a(b(z<>)))", "+a/b/z"}, + {"(a(b(c())))", "(a(b(c(z<>))))", "+a/b/c/z"}, + {"(a<> b<> c<>)", "(a<> b<> c<> z<>)", "+z"}, + {"(a(b<> c<> d<>))", "(a(b<> c<> d<> z<>))", "+a/z"}, + {"(a(b(c<> d<> e<>)))", "(a(b(c<> d<> e<> z<>)))", "+a/b/z"}, + {"(a(b<>) f<>)", "(a(b<>) f<> z<>)", "+z"}, + {"(a(b<>) f<>)", "(a(b<> z<>) f<>)", "+a/z"}, + }) +} + +func (s *DiffTreeSuite) TestDebug(c *C) { + do(c, []diffTreeTest{ + {"(a(b<>) f<>)", "(a(b<> z<>) f<>)", "+a/z"}, + }) +} + +// root +// / | \ +// / | ---- +// f d h -------- +// /\ / \ | +// e a j b/ g +// | / \ | +// l n k icm +// | +// o +// | +// p/ +func (s *DiffTreeSuite) TestCrazy(c *C) { + crazy := "(f(e(l<1>) a(n(o(p())) k<1>)) d<1> h(j(i<1> c<2> m<>) b() g<>))" + do(c, []diffTreeTest{ + { + crazy, + "()", + "-d -f/e/l -f/a/k -h/j/i -h/j/c -h/j/m -h/g", + }, { + crazy, + crazy, + "", + }, { + crazy, + "(d<1>)", + "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m -h/g", + }, { + crazy, + "(d<1> h(b() g<>))", + "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m", + }, { + crazy, + "(d<1> f(e(l()) a()) h(b() g<>))", + "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m", + }, { + crazy, + "(d<1> f(e(l<1>) a()) h(b() g<>))", + "-f/a/k -h/j/i -h/j/c -h/j/m", + }, { + crazy, + "(d<2> f(e(l<2>) a(s(t<1>))) h(b() g<> r<> j(i<> c<3> m<>)))", + "+f/a/s/t +h/r -f/a/k *d *f/e/l *h/j/c *h/j/i", + }, { + crazy, + "(f(e(l<2>) a(n(o(p<1>)) k<>)) h(j(i<1> c<2> m<>) b() g<>))", + "*f/e/l +f/a/n/o/p *f/a/k -d", + }, { + crazy, + "(f(e(l<1>) a(n(o(p(r<1>))) k<1>)) d<1> h(j(i<1> c<2> b() m<>) g<1>))", + "+f/a/n/o/p/r *h/g", + }, + }) +} + +func (s *DiffTreeSuite) TestSameNames(c *C) { + do(c, []diffTreeTest{ + { + "(a(a(a<>)))", + "(a(a(a<1>)))", + "*a/a/a", + }, { + "(a(b(a<>)))", + "(a(b(a<>)) b(a<>))", + "+b/a", + }, { + "(a(b(a<>)))", + "(a(b()) b(a<>))", + "-a/b/a +b/a", + }, + }) +} diff --git a/utils/merkletrie/doubleiter.go b/utils/merkletrie/doubleiter.go new file mode 100644 index 0000000..6f5e538 --- /dev/null +++ b/utils/merkletrie/doubleiter.go @@ -0,0 +1,187 @@ +package merkletrie + +import ( + "fmt" + "io" + + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +// A doubleIter is a convenience type to keep track of the current +// noders in two merkletries that are going to be iterated in parallel. +// It has methods for: +// +// - iterating over the merkletries, both at the same time or +// individually: nextFrom, nextTo, nextBoth, stepBoth +// +// - checking if there are noders left in one or both of them with the +// remaining method and its associated returned type. +// +// - comparing the current noders of both merkletries in several ways, +// with the compare method and its associated returned type. +type doubleIter struct { + from struct { + iter *Iter + current noder.Path // nil if no more nodes + } + to struct { + iter *Iter + current noder.Path // nil if no more nodes + } + hashEqual noder.Equal +} + +// NewdoubleIter returns a new doubleIter for the merkletries "from" and +// "to". The hashEqual callback function will be used by the doubleIter +// to compare the hash of the noders in the merkletries. The doubleIter +// will be initialized to the first elements in each merkletrie if any. +func newDoubleIter(from, to noder.Noder, hashEqual noder.Equal) ( + *doubleIter, error) { + var ii doubleIter + var err error + + if ii.from.iter, err = NewIter(from); err != nil { + return nil, fmt.Errorf("from: %s", err) + } + if ii.from.current, err = ii.from.iter.Next(); turnEOFIntoNil(err) != nil { + return nil, fmt.Errorf("from: %s", err) + } + + if ii.to.iter, err = NewIter(to); err != nil { + return nil, fmt.Errorf("to: %s", err) + } + if ii.to.current, err = ii.to.iter.Next(); turnEOFIntoNil(err) != nil { + return nil, fmt.Errorf("to: %s", err) + } + + ii.hashEqual = hashEqual + + return &ii, nil +} + +func turnEOFIntoNil(e error) error { + if e != nil && e != io.EOF { + return e + } + return nil +} + +// NextBoth makes d advance to the next noder in both merkletries. If +// any of them is a directory, it skips its contents. +func (d *doubleIter) nextBoth() error { + if err := d.nextFrom(); err != nil { + return err + } + if err := d.nextTo(); err != nil { + return err + } + + return nil +} + +// NextFrom makes d advance to the next noder in the "from" merkletrie, +// skipping its contents if it is a directory. +func (d *doubleIter) nextFrom() (err error) { + d.from.current, err = d.from.iter.Next() + return turnEOFIntoNil(err) +} + +// NextTo makes d advance to the next noder in the "to" merkletrie, +// skipping its contents if it is a directory. +func (d *doubleIter) nextTo() (err error) { + d.to.current, err = d.to.iter.Next() + return turnEOFIntoNil(err) +} + +// StepBoth makes d advance to the next noder in both merkletries, +// getting deeper into directories if that is the case. +func (d *doubleIter) stepBoth() (err error) { + if d.from.current, err = d.from.iter.Step(); turnEOFIntoNil(err) != nil { + return err + } + if d.to.current, err = d.to.iter.Step(); turnEOFIntoNil(err) != nil { + return err + } + return nil +} + +// Remaining returns if there are no more noders in the tree, if both +// have noders or if one of them doesn't. +func (d *doubleIter) remaining() remaining { + if d.from.current == nil && d.to.current == nil { + return noMoreNoders + } + + if d.from.current == nil && d.to.current != nil { + return onlyToRemains + } + + if d.from.current != nil && d.to.current == nil { + return onlyFromRemains + } + + return bothHaveNodes +} + +// Remaining values tells you whether both trees still have noders, or +// only one of them or none of them. +type remaining int + +const ( + noMoreNoders remaining = iota + onlyToRemains + onlyFromRemains + bothHaveNodes +) + +// Compare returns the comparison between the current elements in the +// merkletries. +func (d *doubleIter) compare() (s comparison, err error) { + s.sameHash = d.hashEqual(d.from.current, d.to.current) + + fromIsDir := d.from.current.IsDir() + toIsDir := d.to.current.IsDir() + + s.bothAreDirs = fromIsDir && toIsDir + s.bothAreFiles = !fromIsDir && !toIsDir + s.fileAndDir = !s.bothAreDirs && !s.bothAreFiles + + fromNumChildren, err := d.from.current.NumChildren() + if err != nil { + return comparison{}, fmt.Errorf("from: %s", err) + } + + toNumChildren, err := d.to.current.NumChildren() + if err != nil { + return comparison{}, fmt.Errorf("to: %s", err) + } + + s.fromIsEmptyDir = fromIsDir && fromNumChildren == 0 + s.toIsEmptyDir = toIsDir && toNumChildren == 0 + + return +} + +// Answers to a lot of questions you can ask about how to noders are +// equal or different. +type comparison struct { + // the following are only valid if both nodes have the same name + // (i.e. nameComparison == 0) + + // Do both nodes have the same hash? + sameHash bool + // Are both nodes files? + bothAreFiles bool + + // the following are only valid if any of the noders are dirs, + // this is, if !bothAreFiles + + // Is one a file and the other a dir? + fileAndDir bool + // Are both nodes dirs? + bothAreDirs bool + // Is the from node an empty dir? + fromIsEmptyDir bool + // Is the to Node an empty dir? + toIsEmptyDir bool +} |