aboutsummaryrefslogtreecommitdiffstats
path: root/utils/merkletrie
diff options
context:
space:
mode:
Diffstat (limited to 'utils/merkletrie')
-rw-r--r--utils/merkletrie/change.go149
-rw-r--r--utils/merkletrie/change_test.go76
-rw-r--r--utils/merkletrie/difftree.go404
-rw-r--r--utils/merkletrie/difftree_test.go429
-rw-r--r--utils/merkletrie/doubleiter.go187
5 files changed, 1245 insertions, 0 deletions
diff --git a/utils/merkletrie/change.go b/utils/merkletrie/change.go
new file mode 100644
index 0000000..e6c99f6
--- /dev/null
+++ b/utils/merkletrie/change.go
@@ -0,0 +1,149 @@
+package merkletrie
+
+import (
+ "fmt"
+ "io"
+
+ "srcd.works/go-git.v4/utils/merkletrie/noder"
+)
+
+// Action values represent the kind of things a Change can represent:
+// insertion, deletions or modifications of files.
+type Action int
+
+// The set of possible actions in a change.
+const (
+ _ Action = iota
+ Insert
+ Delete
+ Modify
+)
+
+// String returns the action as a human readable text.
+func (a Action) String() string {
+ switch a {
+ case Insert:
+ return "Insert"
+ case Delete:
+ return "Delete"
+ case Modify:
+ return "Modify"
+ default:
+ panic(fmt.Sprintf("unsupported action: %d", a))
+ }
+}
+
+// A Change value represent how a noder has change between to merkletries.
+type Change struct {
+ // The noder before the change or nil if it was inserted.
+ From noder.Path
+ // The noder after the change or nil if it was deleted.
+ To noder.Path
+}
+
+// Action is convenience method that returns what Action c represents.
+func (c *Change) Action() (Action, error) {
+ if c.From == nil && c.To == nil {
+ return Action(0), fmt.Errorf("malformed change: nil from and to")
+ }
+ if c.From == nil {
+ return Insert, nil
+ }
+ if c.To == nil {
+ return Delete, nil
+ }
+
+ return Modify, nil
+}
+
+// NewInsert returns a new Change representing the insertion of n.
+func NewInsert(n noder.Path) Change { return Change{To: n} }
+
+// NewDelete returns a new Change representing the deletion of n.
+func NewDelete(n noder.Path) Change { return Change{From: n} }
+
+// NewModify returns a new Change representing that a has been modified and
+// it is now b.
+func NewModify(a, b noder.Path) Change {
+ return Change{
+ From: a,
+ To: b,
+ }
+}
+
+// String returns a single change in human readable form, using the
+// format: '<' + action + space + path + '>'. The contents of the file
+// before or after the change are not included in this format.
+//
+// Example: inserting a file at the path a/b/c.txt will return "<Insert
+// a/b/c.txt>".
+func (c Change) String() string {
+ action, err := c.Action()
+ if err != nil {
+ panic(err)
+ }
+
+ var path string
+ if action == Delete {
+ path = c.From.String()
+ } else {
+ path = c.To.String()
+ }
+
+ return fmt.Sprintf("<%s %s>", action, path)
+}
+
+// Changes is a list of changes between to merkletries.
+type Changes []Change
+
+// NewChanges returns an empty list of changes.
+func NewChanges() Changes {
+ return Changes{}
+}
+
+// Add adds the change c to the list of changes.
+func (l *Changes) Add(c Change) {
+ *l = append(*l, c)
+}
+
+// AddRecursiveInsert adds the required changes to insert all the
+// file-like noders found in root, recursively.
+func (l *Changes) AddRecursiveInsert(root noder.Path) error {
+ return l.addRecursive(root, NewInsert)
+}
+
+// AddRecursiveDelete adds the required changes to delete all the
+// file-like noders found in root, recursively.
+func (l *Changes) AddRecursiveDelete(root noder.Path) error {
+ return l.addRecursive(root, NewDelete)
+}
+
+type noderToChangeFn func(noder.Path) Change // NewInsert or NewDelete
+
+func (l *Changes) addRecursive(root noder.Path, ctor noderToChangeFn) error {
+ if !root.IsDir() {
+ l.Add(ctor(root))
+ return nil
+ }
+
+ i, err := NewIterFromPath(root)
+ if err != nil {
+ return err
+ }
+
+ var current noder.Path
+ for {
+ if current, err = i.Step(); err != nil {
+ if err == io.EOF {
+ break
+ }
+ return err
+ }
+ if current.IsDir() {
+ continue
+ }
+ l.Add(ctor(current))
+ }
+
+ return nil
+}
diff --git a/utils/merkletrie/change_test.go b/utils/merkletrie/change_test.go
new file mode 100644
index 0000000..4f908ce
--- /dev/null
+++ b/utils/merkletrie/change_test.go
@@ -0,0 +1,76 @@
+package merkletrie_test
+
+import (
+ "srcd.works/go-git.v4/utils/merkletrie"
+ "srcd.works/go-git.v4/utils/merkletrie/internal/fsnoder"
+ "srcd.works/go-git.v4/utils/merkletrie/noder"
+
+ . "gopkg.in/check.v1"
+)
+
+type ChangeSuite struct{}
+
+var _ = Suite(&ChangeSuite{})
+
+func (s *ChangeSuite) TestActionString(c *C) {
+ action := merkletrie.Insert
+ c.Assert(action.String(), Equals, "Insert")
+
+ action = merkletrie.Delete
+ c.Assert(action.String(), Equals, "Delete")
+
+ action = merkletrie.Modify
+ c.Assert(action.String(), Equals, "Modify")
+}
+
+func (s *ChangeSuite) TestUnsupportedAction(c *C) {
+ a := merkletrie.Action(42)
+ c.Assert(a.String, PanicMatches, "unsupported action.*")
+}
+
+func (s ChangeSuite) TestNewInsert(c *C) {
+ tree, err := fsnoder.New("(a(b(z<>)))")
+ c.Assert(err, IsNil)
+ path := find(c, tree, "z")
+ change := merkletrie.NewInsert(path)
+ c.Assert(change.String(), Equals, "<Insert a/b/z>")
+
+ shortPath := noder.Path([]noder.Noder{path.Last()})
+ change = merkletrie.NewInsert(shortPath)
+ c.Assert(change.String(), Equals, "<Insert z>")
+}
+
+func (s ChangeSuite) TestNewDelete(c *C) {
+ tree, err := fsnoder.New("(a(b(z<>)))")
+ c.Assert(err, IsNil)
+ path := find(c, tree, "z")
+ change := merkletrie.NewDelete(path)
+ c.Assert(change.String(), Equals, "<Delete a/b/z>")
+
+ shortPath := noder.Path([]noder.Noder{path.Last()})
+ change = merkletrie.NewDelete(shortPath)
+ c.Assert(change.String(), Equals, "<Delete z>")
+}
+
+func (s ChangeSuite) TestNewModify(c *C) {
+ tree1, err := fsnoder.New("(a(b(z<>)))")
+ c.Assert(err, IsNil)
+ path1 := find(c, tree1, "z")
+
+ tree2, err := fsnoder.New("(a(b(z<1>)))")
+ c.Assert(err, IsNil)
+ path2 := find(c, tree2, "z")
+
+ change := merkletrie.NewModify(path1, path2)
+ c.Assert(change.String(), Equals, "<Modify a/b/z>")
+
+ shortPath1 := noder.Path([]noder.Noder{path1.Last()})
+ shortPath2 := noder.Path([]noder.Noder{path2.Last()})
+ change = merkletrie.NewModify(shortPath1, shortPath2)
+ c.Assert(change.String(), Equals, "<Modify z>")
+}
+
+func (s ChangeSuite) TestMalformedChange(c *C) {
+ change := merkletrie.Change{}
+ c.Assert(change.String, PanicMatches, "malformed change.*")
+}
diff --git a/utils/merkletrie/difftree.go b/utils/merkletrie/difftree.go
new file mode 100644
index 0000000..1a626cd
--- /dev/null
+++ b/utils/merkletrie/difftree.go
@@ -0,0 +1,404 @@
+package merkletrie
+
+// The focus of this difftree implementation is to save time by
+// skipping whole directories if their hash is the same in both
+// trees.
+//
+// The diff algorithm implemented here is based on the doubleiter
+// type defined in this same package; we will iterate over both
+// trees at the same time, while comparing the current noders in
+// each iterator. Depending on how they differ we will output the
+// corresponding chages and move the iterators further over both
+// trees.
+//
+// The table bellow show all the possible comparison results, along
+// with what changes should we produce and how to advance the
+// iterators.
+//
+// The table is implemented by the switches in this function,
+// diffTwoNodes, diffTwoNodesSameName and diffTwoDirs.
+//
+// Many Bothans died to bring us this information, make sure you
+// understand the table before modifying this code.
+
+// # Cases
+//
+// When comparing noders in both trees you will found yourself in
+// one of 169 possible cases, but if we ignore moves, we can
+// simplify a lot the search space into the following table:
+//
+// - "-": nothing, no file or directory
+// - a<>: an empty file named "a".
+// - a<1>: a file named "a", with "1" as its contents.
+// - a<2>: a file named "a", with "2" as its contents.
+// - a(): an empty dir named "a".
+// - a(...): a dir named "a", with some files and/or dirs inside (possibly
+// empty).
+// - a(;;;): a dir named "a", with some other files and/or dirs inside
+// (possibly empty), which different from the ones in "a(...)".
+//
+// \ to - a<> a<1> a<2> a() a(...) a(;;;)
+// from \
+// - 00 01 02 03 04 05 06
+// a<> 10 11 12 13 14 15 16
+// a<1> 20 21 22 23 24 25 26
+// a<2> 30 31 32 33 34 35 36
+// a() 40 41 42 43 44 45 46
+// a(...) 50 51 52 53 54 55 56
+// a(;;;) 60 61 62 63 64 65 66
+//
+// Every (from, to) combination in the table is a special case, but
+// some of them can be merged into some more general cases, for
+// instance 11 and 22 can be merged into the general case: both
+// noders are equal.
+//
+// Here is a full list of all the cases that are similar and how to
+// merge them together into more general cases. Each general case
+// is labeled wiht an uppercase letter for further reference, and it
+// is followed by the pseudocode of the checks you have to perfrom
+// on both noders to see if you are in such a case, the actions to
+// perform (i.e. what changes to output) and how to advance the
+// iterators of each tree to continue the comparison process.
+//
+// ## A. Impossible: 00
+//
+// ## B. Same thing on both sides: 11, 22, 33, 44, 55, 66
+// - check: `SameName() && SameHash()`
+// - action: do nothing.
+// - advance: `FromNext(); ToNext()`
+//
+// ### C. To was created: 01, 02, 03, 04, 05, 06
+// - check: `DifferentName() && ToBeforeFrom()`
+// - action: inserRecursively(to)
+// - advance: `ToNext()`
+//
+// ### D. From was deleted: 10, 20, 30, 40, 50, 60
+// - check: `DifferentName() && FromBeforeTo()`
+// - action: `DeleteRecursively(from)`
+// - advance: `FromNext()`
+//
+// ### E. Empty file to file with contents: 12, 13
+// - check: `SameName() && DifferentHash() && FromIsFile() &&
+// ToIsFile() && FromIsEmpty()`
+// - action: `modifyFile(from, to)`
+// - advance: `FromNext()` or `FromStep()`
+//
+// ### E'. file with contents to empty file: 21, 31
+// - check: `SameName() && DifferentHash() && FromIsFile() &&
+// ToIsFile() && ToIsEmpty()`
+// - action: `modifyFile(from, to)`
+// - advance: `FromNext()` or `FromStep()`
+//
+// ### F. empty file to empty dir with the same name: 14
+// - check: `SameName() && FromIsFile() && FromIsEmpty() &&
+// ToIsDir() && ToIsEmpty()`
+// - action: `DeleteFile(from); InsertEmptyDir(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### F'. empty dir to empty file of the same name: 41
+// - check: `SameName() && FromIsDir() && FromIsEmpty &&
+// ToIsFile() && ToIsEmpty()`
+// - action: `DeleteEmptyDir(from); InsertFile(to)`
+// - advance: `FromNext(); ToNext()` or step for any of them.
+//
+// ### G. empty file to non-empty dir of the same name: 15, 16
+// - check: `SameName() && FromIsFile() && ToIsDir() &&
+// FromIsEmpty() && ToIsNotEmpty()`
+// - action: `DeleteFile(from); InsertDirRecursively(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### G'. non-empty dir to empty file of the same name: 51, 61
+// - check: `SameName() && FromIsDir() && FromIsNotEmpty() &&
+// ToIsFile() && FromIsEmpty()`
+// - action: `DeleteDirRecursively(from); InsertFile(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### H. modify file contents: 23, 32
+// - check: `SameName() && FromIsFile() && ToIsFile() &&
+// FromIsNotEmpty() && ToIsNotEmpty()`
+// - action: `ModifyFile(from, to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### I. file with contents to empty dir: 24, 34
+// - check: `SameName() && DifferentHash() && FromIsFile() &&
+// FromIsNotEmpty() && ToIsDir() && ToIsEmpty()`
+// - action: `DeleteFile(from); InsertEmptyDir(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### I'. empty dir to file with contents: 42, 43
+// - check: `SameName() && DifferentHash() && FromIsDir() &&
+// FromIsEmpty() && ToIsFile() && ToIsEmpty()`
+// - action: `DeleteDir(from); InsertFile(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### J. file with contents to dir with contetns: 25, 26, 35, 36
+// - check: `SameName() && DifferentHash() && FromIsFile() &&
+// FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()`
+// - action: `DeleteFile(from); InsertDirRecursively(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### J'. dir with contetns to file with contents: 52, 62, 53, 63
+// - check: `SameName() && DifferentHash() && FromIsDir() &&
+// FromIsNotEmpty() && ToIsFile() && ToIsNotEmpty()`
+// - action: `DeleteDirRecursively(from); InsertFile(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### K. empty dir to dir with contents: 45, 46
+// - check: `SameName() && DifferentHash() && FromIsDir() &&
+// FromIsEmpty() && ToIsDir() && ToIsNotEmpty()`
+// - action: `InsertChildrenRecursively(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### K'. dir with contents to empty dir: 54, 64
+// - check: `SameName() && DifferentHash() && FromIsDir() &&
+// FromIsEmpty() && ToIsDir() && ToIsNotEmpty()`
+// - action: `DeleteChildrenRecursively(from)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### L. dir with contents to dir with different contents: 56, 65
+// - check: `SameName() && DifferentHash() && FromIsDir() &&
+// FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()`
+// - action: nothing
+// - advance: `FromStep(); ToStep()`
+//
+//
+
+// All these cases can be further simplified by a truth table
+// reduction process, in which we gather similar checks together to
+// make the final code easier to read and understand.
+//
+// The first 6 columns are the outputs of the checks to perform on
+// both noders. I have labeled them 1 to 6, this is what they mean:
+//
+// 1: SameName()
+// 2: SameHash()
+// 3: FromIsDir()
+// 4: ToIsDir()
+// 5: FromIsEmpty()
+// 6: ToIsEmpty()
+//
+// The from and to columns are a fsnoder example of the elements
+// that you will find on each tree under the specified comparison
+// results (columns 1 to 6).
+//
+// The type column identifies the case we are into, from the list above.
+//
+// The type' column identifies the new set of reduced cases, using
+// lowercase letters, and they are explained after the table.
+//
+// The last column is the set of actions and advances for each case.
+//
+// "---" means impossible except in case of hash collision.
+//
+// advance meaning:
+// - NN: from.Next(); to.Next()
+// - SS: from.Step(); to.Step()
+//
+// 1 2 3 4 5 6 | from | to |type|type'|action ; advance
+// ------------+--------+--------+----+------------------------------------
+// 0 0 0 0 0 0 | | | | | if !SameName() {
+// . | | | | | if FromBeforeTo() {
+// . | | | D | d | delete(from); from.Next()
+// . | | | | | } else {
+// . | | | C | c | insert(to); to.Next()
+// . | | | | | }
+// 0 1 1 1 1 1 | | | | | }
+// 1 0 0 0 0 0 | a<1> | a<2> | H | e | modify(from, to); NN
+// 1 0 0 0 0 1 | a<1> | a<> | E' | e | modify(from, to); NN
+// 1 0 0 0 1 0 | a<> | a<1> | E | e | modify(from, to); NN
+// 1 0 0 0 1 1 | ---- | ---- | | e |
+// 1 0 0 1 0 0 | a<1> | a(...) | J | f | delete(from); insert(to); NN
+// 1 0 0 1 0 1 | a<1> | a() | I | f | delete(from); insert(to); NN
+// 1 0 0 1 1 0 | a<> | a(...) | G | f | delete(from); insert(to); NN
+// 1 0 0 1 1 1 | a<> | a() | F | f | delete(from); insert(to); NN
+// 1 0 1 0 0 0 | a(...) | a<1> | J' | f | delete(from); insert(to); NN
+// 1 0 1 0 0 1 | a(...) | a<> | G' | f | delete(from); insert(to); NN
+// 1 0 1 0 1 0 | a() | a<1> | I' | f | delete(from); insert(to); NN
+// 1 0 1 0 1 1 | a() | a<> | F' | f | delete(from); insert(to); NN
+// 1 0 1 1 0 0 | a(...) | a(;;;) | L | g | nothing; SS
+// 1 0 1 1 0 1 | a(...) | a() | K' | h | deleteChidren(from); NN
+// 1 0 1 1 1 0 | a() | a(...) | K | i | insertChildren(to); NN
+// 1 0 1 1 1 1 | ---- | ---- | | |
+// 1 1 0 0 0 0 | a<1> | a<1> | B | b | nothing; NN
+// 1 1 0 0 0 1 | ---- | ---- | | b |
+// 1 1 0 0 1 0 | ---- | ---- | | b |
+// 1 1 0 0 1 1 | a<> | a<> | B | b | nothing; NN
+// 1 1 0 1 0 0 | ---- | ---- | | b |
+// 1 1 0 1 0 1 | ---- | ---- | | b |
+// 1 1 0 1 1 0 | ---- | ---- | | b |
+// 1 1 0 1 1 1 | ---- | ---- | | b |
+// 1 1 1 0 0 0 | ---- | ---- | | b |
+// 1 1 1 0 0 1 | ---- | ---- | | b |
+// 1 1 1 0 1 0 | ---- | ---- | | b |
+// 1 1 1 0 1 1 | ---- | ---- | | b |
+// 1 1 1 1 0 0 | a(...) | a(...) | B | b | nothing; NN
+// 1 1 1 1 0 1 | ---- | ---- | | b |
+// 1 1 1 1 1 0 | ---- | ---- | | b |
+// 1 1 1 1 1 1 | a() | a() | B | b | nothing; NN
+//
+// c and d:
+// if !SameName()
+// d if FromBeforeTo()
+// c else
+// b: SameName) && sameHash()
+// e: SameName() && !sameHash() && BothAreFiles()
+// f: SameName() && !sameHash() && FileAndDir()
+// g: SameName() && !sameHash() && BothAreDirs() && NoneIsEmpty
+// i: SameName() && !sameHash() && BothAreDirs() && FromIsEmpty
+// h: else of i
+
+import (
+ "fmt"
+ "strings"
+
+ "srcd.works/go-git.v4/utils/merkletrie/noder"
+)
+
+// DiffTree calculates the list of changes between two merkletries. It
+// uses the provided hashEqual callback to compare noders.
+func DiffTree(fromTree, toTree noder.Noder,
+ hashEqual noder.Equal) (Changes, error) {
+ ret := NewChanges()
+
+ ii, err := newDoubleIter(fromTree, toTree, hashEqual)
+ if err != nil {
+ return nil, err
+ }
+
+ for {
+ from := ii.from.current
+ to := ii.to.current
+
+ switch r := ii.remaining(); r {
+ case noMoreNoders:
+ return ret, nil
+ case onlyFromRemains:
+ if err = ret.AddRecursiveDelete(from); err != nil {
+ return nil, err
+ }
+ if err = ii.nextFrom(); err != nil {
+ return nil, err
+ }
+ case onlyToRemains:
+ if err = ret.AddRecursiveInsert(to); err != nil {
+ return nil, err
+ }
+ if err = ii.nextTo(); err != nil {
+ return nil, err
+ }
+ case bothHaveNodes:
+ if err = diffNodes(&ret, ii); err != nil {
+ return nil, err
+ }
+ default:
+ panic(fmt.Sprintf("unknown remaining value: %d", r))
+ }
+ }
+}
+
+func diffNodes(changes *Changes, ii *doubleIter) error {
+ from := ii.from.current
+ to := ii.to.current
+ var err error
+
+ // compare their full paths as strings
+ switch strings.Compare(from.String(), to.String()) {
+ case -1:
+ if err = changes.AddRecursiveDelete(from); err != nil {
+ return err
+ }
+ if err = ii.nextFrom(); err != nil {
+ return err
+ }
+ case 1:
+ if err = changes.AddRecursiveInsert(to); err != nil {
+ return err
+ }
+ if err = ii.nextTo(); err != nil {
+ return err
+ }
+ default:
+ if err := diffNodesSameName(changes, ii); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func diffNodesSameName(changes *Changes, ii *doubleIter) error {
+ from := ii.from.current
+ to := ii.to.current
+
+ status, err := ii.compare()
+ if err != nil {
+ return err
+ }
+
+ switch {
+ case status.sameHash:
+ // do nothing
+ if err = ii.nextBoth(); err != nil {
+ return err
+ }
+ case status.bothAreFiles:
+ changes.Add(NewModify(from, to))
+ if err = ii.nextBoth(); err != nil {
+ return err
+ }
+ case status.fileAndDir:
+ if err = changes.AddRecursiveDelete(from); err != nil {
+ return err
+ }
+ if err = changes.AddRecursiveInsert(to); err != nil {
+ return err
+ }
+ if err = ii.nextBoth(); err != nil {
+ return err
+ }
+ case status.bothAreDirs:
+ if err = diffDirs(changes, ii); err != nil {
+ return err
+ }
+ default:
+ return fmt.Errorf("bad status from double iterator")
+ }
+
+ return nil
+}
+
+func diffDirs(changes *Changes, ii *doubleIter) error {
+ from := ii.from.current
+ to := ii.to.current
+
+ status, err := ii.compare()
+ if err != nil {
+ return err
+ }
+
+ switch {
+ case status.fromIsEmptyDir:
+ if err = changes.AddRecursiveInsert(to); err != nil {
+ return err
+ }
+ if err = ii.nextBoth(); err != nil {
+ return err
+ }
+ case status.toIsEmptyDir:
+ if err = changes.AddRecursiveDelete(from); err != nil {
+ return err
+ }
+ if err = ii.nextBoth(); err != nil {
+ return err
+ }
+ case !status.fromIsEmptyDir && !status.toIsEmptyDir:
+ // do nothing
+ if err = ii.stepBoth(); err != nil {
+ return err
+ }
+ default:
+ return fmt.Errorf("both dirs are empty but has different hash")
+ }
+
+ return nil
+}
diff --git a/utils/merkletrie/difftree_test.go b/utils/merkletrie/difftree_test.go
new file mode 100644
index 0000000..6a193af
--- /dev/null
+++ b/utils/merkletrie/difftree_test.go
@@ -0,0 +1,429 @@
+package merkletrie_test
+
+import (
+ "bytes"
+ "fmt"
+ "reflect"
+ "sort"
+ "strings"
+ "testing"
+ "unicode"
+
+ "srcd.works/go-git.v4/utils/merkletrie"
+ "srcd.works/go-git.v4/utils/merkletrie/internal/fsnoder"
+
+ . "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) { TestingT(t) }
+
+type DiffTreeSuite struct{}
+
+var _ = Suite(&DiffTreeSuite{})
+
+type diffTreeTest struct {
+ from string
+ to string
+ expected string
+}
+
+func (t diffTreeTest) innerRun(c *C, context string, reverse bool) {
+ comment := Commentf("\n%s", context)
+ if reverse {
+ comment = Commentf("%s [REVERSED]", comment.CheckCommentString())
+ }
+
+ a, err := fsnoder.New(t.from)
+ c.Assert(err, IsNil, comment)
+ comment = Commentf("%s\n\t from = %s", comment.CheckCommentString(), a)
+
+ b, err := fsnoder.New(t.to)
+ c.Assert(err, IsNil, comment)
+ comment = Commentf("%s\n\t to = %s", comment.CheckCommentString(), b)
+
+ expected, err := newChangesFromString(t.expected)
+ c.Assert(err, IsNil, comment)
+
+ if reverse {
+ a, b = b, a
+ expected = expected.reverse()
+ }
+ comment = Commentf("%s\n\texpected = %s", comment.CheckCommentString(), expected)
+
+ results, err := merkletrie.DiffTree(a, b, fsnoder.HashEqual)
+ c.Assert(err, IsNil, comment)
+
+ obtained, err := newChanges(results)
+ c.Assert(err, IsNil, comment)
+
+ comment = Commentf("%s\n\tobtained = %s", comment.CheckCommentString(), obtained)
+
+ c.Assert(obtained, changesEquals, expected, comment)
+}
+
+func (t diffTreeTest) run(c *C, context string) {
+ t.innerRun(c, context, false)
+ t.innerRun(c, context, true)
+}
+
+type change struct {
+ merkletrie.Action
+ path string
+}
+
+func (c change) String() string {
+ return fmt.Sprintf("<%s %s>", c.Action, c.path)
+}
+
+func (c change) reverse() change {
+ ret := change{
+ path: c.path,
+ }
+
+ switch c.Action {
+ case merkletrie.Insert:
+ ret.Action = merkletrie.Delete
+ case merkletrie.Delete:
+ ret.Action = merkletrie.Insert
+ case merkletrie.Modify:
+ ret.Action = merkletrie.Modify
+ default:
+ panic(fmt.Sprintf("unknown action type: %d", c.Action))
+ }
+
+ return ret
+}
+
+type changes []change
+
+func newChanges(original merkletrie.Changes) (changes, error) {
+ ret := make(changes, len(original))
+ for i, c := range original {
+ action, err := c.Action()
+ if err != nil {
+ return nil, err
+ }
+ switch action {
+ case merkletrie.Insert:
+ ret[i] = change{
+ Action: merkletrie.Insert,
+ path: c.To.String(),
+ }
+ case merkletrie.Delete:
+ ret[i] = change{
+ Action: merkletrie.Delete,
+ path: c.From.String(),
+ }
+ case merkletrie.Modify:
+ ret[i] = change{
+ Action: merkletrie.Modify,
+ path: c.From.String(),
+ }
+ default:
+ panic(fmt.Sprintf("unsupported action %d", c.Action))
+ }
+ }
+
+ return ret, nil
+}
+
+func newChangesFromString(s string) (changes, error) {
+ ret := make([]change, 0)
+
+ s = strings.TrimSpace(s)
+ s = removeDuplicatedSpace(s)
+ s = turnSpaceIntoLiteralSpace(s)
+
+ if s == "" {
+ return ret, nil
+ }
+
+ for _, chunk := range strings.Split(s, " ") {
+ change := change{
+ path: string(chunk[1:]),
+ }
+
+ switch chunk[0] {
+ case '+':
+ change.Action = merkletrie.Insert
+ case '-':
+ change.Action = merkletrie.Delete
+ case '*':
+ change.Action = merkletrie.Modify
+ default:
+ panic(fmt.Sprintf("unsupported action descriptor %q", chunk[0]))
+ }
+
+ ret = append(ret, change)
+ }
+
+ return ret, nil
+}
+
+func removeDuplicatedSpace(s string) string {
+ var buf bytes.Buffer
+
+ var lastWasSpace, currentIsSpace bool
+ for _, r := range s {
+ currentIsSpace = unicode.IsSpace(r)
+
+ if lastWasSpace && currentIsSpace {
+ continue
+ }
+ lastWasSpace = currentIsSpace
+
+ buf.WriteRune(r)
+ }
+
+ return buf.String()
+}
+
+func turnSpaceIntoLiteralSpace(s string) string {
+ return strings.Map(
+ func(r rune) rune {
+ if unicode.IsSpace(r) {
+ return ' '
+ }
+ return r
+ }, s)
+}
+
+func (cc changes) Len() int { return len(cc) }
+func (cc changes) Swap(i, j int) { cc[i], cc[j] = cc[j], cc[i] }
+func (cc changes) Less(i, j int) bool { return strings.Compare(cc[i].String(), cc[j].String()) < 0 }
+
+func (cc changes) equals(other changes) bool {
+ sort.Sort(cc)
+ sort.Sort(other)
+ return reflect.DeepEqual(cc, other)
+}
+
+func (cc changes) String() string {
+ var buf bytes.Buffer
+ fmt.Fprintf(&buf, "len(%d) [", len(cc))
+ sep := ""
+ for _, c := range cc {
+ fmt.Fprintf(&buf, "%s%s", sep, c)
+ sep = ", "
+ }
+ buf.WriteByte(']')
+ return buf.String()
+}
+
+func (cc changes) reverse() changes {
+ ret := make(changes, len(cc))
+ for i, c := range cc {
+ ret[i] = c.reverse()
+ }
+
+ return ret
+}
+
+type changesEqualsChecker struct {
+ *CheckerInfo
+}
+
+var changesEquals Checker = &changesEqualsChecker{
+ &CheckerInfo{Name: "changesEquals", Params: []string{"obtained", "expected"}},
+}
+
+func (checker *changesEqualsChecker) Check(params []interface{}, names []string) (result bool, error string) {
+ a, ok := params[0].(changes)
+ if !ok {
+ return false, "first parameter must be a changes"
+ }
+ b, ok := params[1].(changes)
+ if !ok {
+ return false, "second parameter must be a changes"
+ }
+
+ return a.equals(b), ""
+}
+
+func do(c *C, list []diffTreeTest) {
+ for i, t := range list {
+ t.run(c, fmt.Sprintf("test #%d:", i))
+ }
+}
+
+func (s *DiffTreeSuite) TestEmptyVsEmpty(c *C) {
+ do(c, []diffTreeTest{
+ {"()", "()", ""},
+ {"A()", "A()", ""},
+ {"A()", "()", ""},
+ {"A()", "B()", ""},
+ })
+}
+
+func (s *DiffTreeSuite) TestBasicCases(c *C) {
+ do(c, []diffTreeTest{
+ {"()", "()", ""},
+ {"()", "(a<>)", "+a"},
+ {"()", "(a<1>)", "+a"},
+ {"()", "(a())", ""},
+ {"()", "(a(b()))", ""},
+ {"()", "(a(b<>))", "+a/b"},
+ {"()", "(a(b<1>))", "+a/b"},
+ {"(a<>)", "(a<>)", ""},
+ {"(a<>)", "(a<1>)", "*a"},
+ {"(a<>)", "(a())", "-a"},
+ {"(a<>)", "(a(b()))", "-a"},
+ {"(a<>)", "(a(b<>))", "-a +a/b"},
+ {"(a<>)", "(a(b<1>))", "-a +a/b"},
+ {"(a<>)", "(c())", "-a"},
+ {"(a<>)", "(c(b()))", "-a"},
+ {"(a<>)", "(c(b<>))", "-a +c/b"},
+ {"(a<>)", "(c(b<1>))", "-a +c/b"},
+ {"(a<>)", "(c(a()))", "-a"},
+ {"(a<>)", "(c(a<>))", "-a +c/a"},
+ {"(a<>)", "(c(a<1>))", "-a +c/a"},
+ {"(a<1>)", "(a<1>)", ""},
+ {"(a<1>)", "(a<2>)", "*a"},
+ {"(a<1>)", "(b<1>)", "-a +b"},
+ {"(a<1>)", "(b<2>)", "-a +b"},
+ {"(a<1>)", "(a())", "-a"},
+ {"(a<1>)", "(a(b()))", "-a"},
+ {"(a<1>)", "(a(b<>))", "-a +a/b"},
+ {"(a<1>)", "(a(b<1>))", "-a +a/b"},
+ {"(a<1>)", "(a(b<2>))", "-a +a/b"},
+ {"(a<1>)", "(c())", "-a"},
+ {"(a<1>)", "(c(b()))", "-a"},
+ {"(a<1>)", "(c(b<>))", "-a +c/b"},
+ {"(a<1>)", "(c(b<1>))", "-a +c/b"},
+ {"(a<1>)", "(c(b<2>))", "-a +c/b"},
+ {"(a<1>)", "(c())", "-a"},
+ {"(a<1>)", "(c(a()))", "-a"},
+ {"(a<1>)", "(c(a<>))", "-a +c/a"},
+ {"(a<1>)", "(c(a<1>))", "-a +c/a"},
+ {"(a<1>)", "(c(a<2>))", "-a +c/a"},
+ {"(a())", "(a())", ""},
+ {"(a())", "(b())", ""},
+ {"(a())", "(a(b()))", ""},
+ {"(a())", "(b(a()))", ""},
+ {"(a())", "(a(b<>))", "+a/b"},
+ {"(a())", "(a(b<1>))", "+a/b"},
+ {"(a())", "(b(a<>))", "+b/a"},
+ {"(a())", "(b(a<1>))", "+b/a"},
+ })
+}
+
+func (s *DiffTreeSuite) TestHorizontals(c *C) {
+ do(c, []diffTreeTest{
+ {"()", "(a<> b<>)", "+a +b"},
+ {"()", "(a<> b<1>)", "+a +b"},
+ {"()", "(a<> b())", "+a"},
+ {"()", "(a() b<>)", "+b"},
+ {"()", "(a<1> b<>)", "+a +b"},
+ {"()", "(a<1> b<1>)", "+a +b"},
+ {"()", "(a<1> b<2>)", "+a +b"},
+ {"()", "(a<1> b())", "+a"},
+ {"()", "(a() b<1>)", "+b"},
+ {"()", "(a() b())", ""},
+ {"()", "(a<> b<> c<> d<>)", "+a +b +c +d"},
+ {"()", "(a<> b<1> c() d<> e<2> f())", "+a +b +d +e"},
+ })
+}
+
+func (s *DiffTreeSuite) TestVerticals(c *C) {
+ do(c, []diffTreeTest{
+ {"()", "(z<>)", "+z"},
+ {"()", "(a(z<>))", "+a/z"},
+ {"()", "(a(b(z<>)))", "+a/b/z"},
+ {"()", "(a(b(c(z<>))))", "+a/b/c/z"},
+ {"()", "(a(b(c(d(z<>)))))", "+a/b/c/d/z"},
+ {"()", "(a(b(c(d(z<1>)))))", "+a/b/c/d/z"},
+ })
+}
+
+func (s *DiffTreeSuite) TestSingleInserts(c *C) {
+ do(c, []diffTreeTest{
+ {"()", "(z<>)", "+z"},
+ {"(a())", "(a(z<>))", "+a/z"},
+ {"(a())", "(a(b(z<>)))", "+a/b/z"},
+ {"(a(b(c())))", "(a(b(c(z<>))))", "+a/b/c/z"},
+ {"(a<> b<> c<>)", "(a<> b<> c<> z<>)", "+z"},
+ {"(a(b<> c<> d<>))", "(a(b<> c<> d<> z<>))", "+a/z"},
+ {"(a(b(c<> d<> e<>)))", "(a(b(c<> d<> e<> z<>)))", "+a/b/z"},
+ {"(a(b<>) f<>)", "(a(b<>) f<> z<>)", "+z"},
+ {"(a(b<>) f<>)", "(a(b<> z<>) f<>)", "+a/z"},
+ })
+}
+
+func (s *DiffTreeSuite) TestDebug(c *C) {
+ do(c, []diffTreeTest{
+ {"(a(b<>) f<>)", "(a(b<> z<>) f<>)", "+a/z"},
+ })
+}
+
+// root
+// / | \
+// / | ----
+// f d h --------
+// /\ / \ |
+// e a j b/ g
+// | / \ |
+// l n k icm
+// |
+// o
+// |
+// p/
+func (s *DiffTreeSuite) TestCrazy(c *C) {
+ crazy := "(f(e(l<1>) a(n(o(p())) k<1>)) d<1> h(j(i<1> c<2> m<>) b() g<>))"
+ do(c, []diffTreeTest{
+ {
+ crazy,
+ "()",
+ "-d -f/e/l -f/a/k -h/j/i -h/j/c -h/j/m -h/g",
+ }, {
+ crazy,
+ crazy,
+ "",
+ }, {
+ crazy,
+ "(d<1>)",
+ "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m -h/g",
+ }, {
+ crazy,
+ "(d<1> h(b() g<>))",
+ "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m",
+ }, {
+ crazy,
+ "(d<1> f(e(l()) a()) h(b() g<>))",
+ "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m",
+ }, {
+ crazy,
+ "(d<1> f(e(l<1>) a()) h(b() g<>))",
+ "-f/a/k -h/j/i -h/j/c -h/j/m",
+ }, {
+ crazy,
+ "(d<2> f(e(l<2>) a(s(t<1>))) h(b() g<> r<> j(i<> c<3> m<>)))",
+ "+f/a/s/t +h/r -f/a/k *d *f/e/l *h/j/c *h/j/i",
+ }, {
+ crazy,
+ "(f(e(l<2>) a(n(o(p<1>)) k<>)) h(j(i<1> c<2> m<>) b() g<>))",
+ "*f/e/l +f/a/n/o/p *f/a/k -d",
+ }, {
+ crazy,
+ "(f(e(l<1>) a(n(o(p(r<1>))) k<1>)) d<1> h(j(i<1> c<2> b() m<>) g<1>))",
+ "+f/a/n/o/p/r *h/g",
+ },
+ })
+}
+
+func (s *DiffTreeSuite) TestSameNames(c *C) {
+ do(c, []diffTreeTest{
+ {
+ "(a(a(a<>)))",
+ "(a(a(a<1>)))",
+ "*a/a/a",
+ }, {
+ "(a(b(a<>)))",
+ "(a(b(a<>)) b(a<>))",
+ "+b/a",
+ }, {
+ "(a(b(a<>)))",
+ "(a(b()) b(a<>))",
+ "-a/b/a +b/a",
+ },
+ })
+}
diff --git a/utils/merkletrie/doubleiter.go b/utils/merkletrie/doubleiter.go
new file mode 100644
index 0000000..6f5e538
--- /dev/null
+++ b/utils/merkletrie/doubleiter.go
@@ -0,0 +1,187 @@
+package merkletrie
+
+import (
+ "fmt"
+ "io"
+
+ "srcd.works/go-git.v4/utils/merkletrie/noder"
+)
+
+// A doubleIter is a convenience type to keep track of the current
+// noders in two merkletries that are going to be iterated in parallel.
+// It has methods for:
+//
+// - iterating over the merkletries, both at the same time or
+// individually: nextFrom, nextTo, nextBoth, stepBoth
+//
+// - checking if there are noders left in one or both of them with the
+// remaining method and its associated returned type.
+//
+// - comparing the current noders of both merkletries in several ways,
+// with the compare method and its associated returned type.
+type doubleIter struct {
+ from struct {
+ iter *Iter
+ current noder.Path // nil if no more nodes
+ }
+ to struct {
+ iter *Iter
+ current noder.Path // nil if no more nodes
+ }
+ hashEqual noder.Equal
+}
+
+// NewdoubleIter returns a new doubleIter for the merkletries "from" and
+// "to". The hashEqual callback function will be used by the doubleIter
+// to compare the hash of the noders in the merkletries. The doubleIter
+// will be initialized to the first elements in each merkletrie if any.
+func newDoubleIter(from, to noder.Noder, hashEqual noder.Equal) (
+ *doubleIter, error) {
+ var ii doubleIter
+ var err error
+
+ if ii.from.iter, err = NewIter(from); err != nil {
+ return nil, fmt.Errorf("from: %s", err)
+ }
+ if ii.from.current, err = ii.from.iter.Next(); turnEOFIntoNil(err) != nil {
+ return nil, fmt.Errorf("from: %s", err)
+ }
+
+ if ii.to.iter, err = NewIter(to); err != nil {
+ return nil, fmt.Errorf("to: %s", err)
+ }
+ if ii.to.current, err = ii.to.iter.Next(); turnEOFIntoNil(err) != nil {
+ return nil, fmt.Errorf("to: %s", err)
+ }
+
+ ii.hashEqual = hashEqual
+
+ return &ii, nil
+}
+
+func turnEOFIntoNil(e error) error {
+ if e != nil && e != io.EOF {
+ return e
+ }
+ return nil
+}
+
+// NextBoth makes d advance to the next noder in both merkletries. If
+// any of them is a directory, it skips its contents.
+func (d *doubleIter) nextBoth() error {
+ if err := d.nextFrom(); err != nil {
+ return err
+ }
+ if err := d.nextTo(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// NextFrom makes d advance to the next noder in the "from" merkletrie,
+// skipping its contents if it is a directory.
+func (d *doubleIter) nextFrom() (err error) {
+ d.from.current, err = d.from.iter.Next()
+ return turnEOFIntoNil(err)
+}
+
+// NextTo makes d advance to the next noder in the "to" merkletrie,
+// skipping its contents if it is a directory.
+func (d *doubleIter) nextTo() (err error) {
+ d.to.current, err = d.to.iter.Next()
+ return turnEOFIntoNil(err)
+}
+
+// StepBoth makes d advance to the next noder in both merkletries,
+// getting deeper into directories if that is the case.
+func (d *doubleIter) stepBoth() (err error) {
+ if d.from.current, err = d.from.iter.Step(); turnEOFIntoNil(err) != nil {
+ return err
+ }
+ if d.to.current, err = d.to.iter.Step(); turnEOFIntoNil(err) != nil {
+ return err
+ }
+ return nil
+}
+
+// Remaining returns if there are no more noders in the tree, if both
+// have noders or if one of them doesn't.
+func (d *doubleIter) remaining() remaining {
+ if d.from.current == nil && d.to.current == nil {
+ return noMoreNoders
+ }
+
+ if d.from.current == nil && d.to.current != nil {
+ return onlyToRemains
+ }
+
+ if d.from.current != nil && d.to.current == nil {
+ return onlyFromRemains
+ }
+
+ return bothHaveNodes
+}
+
+// Remaining values tells you whether both trees still have noders, or
+// only one of them or none of them.
+type remaining int
+
+const (
+ noMoreNoders remaining = iota
+ onlyToRemains
+ onlyFromRemains
+ bothHaveNodes
+)
+
+// Compare returns the comparison between the current elements in the
+// merkletries.
+func (d *doubleIter) compare() (s comparison, err error) {
+ s.sameHash = d.hashEqual(d.from.current, d.to.current)
+
+ fromIsDir := d.from.current.IsDir()
+ toIsDir := d.to.current.IsDir()
+
+ s.bothAreDirs = fromIsDir && toIsDir
+ s.bothAreFiles = !fromIsDir && !toIsDir
+ s.fileAndDir = !s.bothAreDirs && !s.bothAreFiles
+
+ fromNumChildren, err := d.from.current.NumChildren()
+ if err != nil {
+ return comparison{}, fmt.Errorf("from: %s", err)
+ }
+
+ toNumChildren, err := d.to.current.NumChildren()
+ if err != nil {
+ return comparison{}, fmt.Errorf("to: %s", err)
+ }
+
+ s.fromIsEmptyDir = fromIsDir && fromNumChildren == 0
+ s.toIsEmptyDir = toIsDir && toNumChildren == 0
+
+ return
+}
+
+// Answers to a lot of questions you can ask about how to noders are
+// equal or different.
+type comparison struct {
+ // the following are only valid if both nodes have the same name
+ // (i.e. nameComparison == 0)
+
+ // Do both nodes have the same hash?
+ sameHash bool
+ // Are both nodes files?
+ bothAreFiles bool
+
+ // the following are only valid if any of the noders are dirs,
+ // this is, if !bothAreFiles
+
+ // Is one a file and the other a dir?
+ fileAndDir bool
+ // Are both nodes dirs?
+ bothAreDirs bool
+ // Is the from node an empty dir?
+ fromIsEmptyDir bool
+ // Is the to Node an empty dir?
+ toIsEmptyDir bool
+}