aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--utils/merkletrie/change.go149
-rw-r--r--utils/merkletrie/change_test.go65
-rw-r--r--utils/merkletrie/difftree.go404
-rw-r--r--utils/merkletrie/difftree_test.go429
-rw-r--r--utils/merkletrie/doc.go22
-rw-r--r--utils/merkletrie/doubleiter.go187
-rw-r--r--utils/merkletrie/iter_test.go3
7 files changed, 1246 insertions, 13 deletions
diff --git a/utils/merkletrie/change.go b/utils/merkletrie/change.go
new file mode 100644
index 0000000..cacf658
--- /dev/null
+++ b/utils/merkletrie/change.go
@@ -0,0 +1,149 @@
+package merkletrie
+
+import (
+ "fmt"
+ "io"
+
+ "srcd.works/go-git.v4/utils/merkletrie/noder"
+)
+
+// Action values represent the kind of things a Change can represent:
+// insertion, deletions or modifications of files.
+type Action int
+
+// The set of possible actions in a change.
+const (
+ _ = iota
+ Insert
+ Delete
+ Modify
+)
+
+// String returns the action as a human readable text.
+func (a Action) String() string {
+ switch a {
+ case Insert:
+ return "Insert"
+ case Delete:
+ return "Delete"
+ case Modify:
+ return "Modify"
+ default:
+ panic(fmt.Sprintf("unsupported action: %d", a))
+ }
+}
+
+// A Change value represent how a noder has change between to merkletries.
+type Change struct {
+ // The noder before the change or nil if it was inserted.
+ From noder.Path
+ // The noder after the change or nil if it was deleted.
+ To noder.Path
+}
+
+// Action is convenience method that returns what Action c represents.
+func (c *Change) Action() (Action, error) {
+ if c.From == nil && c.To == nil {
+ return Action(0), fmt.Errorf("malformed change: nil from and to")
+ }
+ if c.From == nil {
+ return Insert, nil
+ }
+ if c.To == nil {
+ return Delete, nil
+ }
+
+ return Modify, nil
+}
+
+// NewInsert returns a new Change representing the insertion of n.
+func NewInsert(n noder.Path) Change { return Change{To: n} }
+
+// NewDelete returns a new Change representing the deletion of n.
+func NewDelete(n noder.Path) Change { return Change{From: n} }
+
+// NewModify returns a new Change representing that a has been modified and
+// it is now b.
+func NewModify(a, b noder.Path) Change {
+ return Change{
+ From: a,
+ To: b,
+ }
+}
+
+// String returns a single change in human readable form, using the
+// format: '<' + action + space + path + '>'. The contents of the file
+// before or after the change are not included in this format.
+//
+// Example: inserting a file at the path a/b/c.txt will return "<Insert
+// a/b/c.txt>".
+func (c Change) String() string {
+ action, err := c.Action()
+ if err != nil {
+ panic(err)
+ }
+
+ var path string
+ if action == Delete {
+ path = c.From.String()
+ } else {
+ path = c.To.String()
+ }
+
+ return fmt.Sprintf("<%s %s>", action, path)
+}
+
+// Changes is a list of changes between to merkletries.
+type Changes []Change
+
+// NewChanges returns an empty list of changes.
+func NewChanges() Changes {
+ return Changes{}
+}
+
+// Add adds the change c to the list of changes.
+func (l *Changes) Add(c Change) {
+ *l = append(*l, c)
+}
+
+// AddRecursiveInsert adds the required changes to insert all the
+// file-like noders found in root, recursively.
+func (l *Changes) AddRecursiveInsert(root noder.Path) error {
+ return l.addRecursive(root, NewInsert)
+}
+
+// AddRecursiveDelete adds the required changes to delete all the
+// file-like noders found in root, recursively.
+func (l *Changes) AddRecursiveDelete(root noder.Path) error {
+ return l.addRecursive(root, NewDelete)
+}
+
+type noderToChangeFn func(noder.Path) Change // NewInsert or NewDelete
+
+func (l *Changes) addRecursive(root noder.Path, ctor noderToChangeFn) error {
+ if !root.IsDir() {
+ l.Add(ctor(root))
+ return nil
+ }
+
+ i, err := NewIterFromPath(root)
+ if err != nil {
+ return err
+ }
+
+ var current noder.Path
+ for {
+ if current, err = i.Step(); err != nil {
+ if err == io.EOF {
+ break
+ }
+ return err
+ }
+ if current.IsDir() {
+ continue
+ }
+ l.Add(ctor(current))
+ }
+
+ return nil
+}
diff --git a/utils/merkletrie/change_test.go b/utils/merkletrie/change_test.go
new file mode 100644
index 0000000..c899ff9
--- /dev/null
+++ b/utils/merkletrie/change_test.go
@@ -0,0 +1,65 @@
+package merkletrie_test
+
+import (
+ "srcd.works/go-git.v4/utils/merkletrie"
+ "srcd.works/go-git.v4/utils/merkletrie/internal/fsnoder"
+ "srcd.works/go-git.v4/utils/merkletrie/noder"
+
+ . "gopkg.in/check.v1"
+)
+
+type ChangeSuite struct{}
+
+var _ = Suite(&ChangeSuite{})
+
+func (s *ChangeSuite) TestUnsupportedAction(c *C) {
+ a := merkletrie.Action(42)
+ c.Assert(a.String, PanicMatches, "unsupported action.*")
+}
+
+func (s ChangeSuite) TestNewInsert(c *C) {
+ tree, err := fsnoder.New("(a(b(z<>)))")
+ c.Assert(err, IsNil)
+ path := find(c, tree, "z")
+ change := merkletrie.NewInsert(path)
+ c.Assert(change.String(), Equals, "<Insert a/b/z>")
+
+ shortPath := noder.Path([]noder.Noder{path.Last()})
+ change = merkletrie.NewInsert(shortPath)
+ c.Assert(change.String(), Equals, "<Insert z>")
+}
+
+func (s ChangeSuite) TestNewDelete(c *C) {
+ tree, err := fsnoder.New("(a(b(z<>)))")
+ c.Assert(err, IsNil)
+ path := find(c, tree, "z")
+ change := merkletrie.NewDelete(path)
+ c.Assert(change.String(), Equals, "<Delete a/b/z>")
+
+ shortPath := noder.Path([]noder.Noder{path.Last()})
+ change = merkletrie.NewDelete(shortPath)
+ c.Assert(change.String(), Equals, "<Delete z>")
+}
+
+func (s ChangeSuite) TestNewModify(c *C) {
+ tree1, err := fsnoder.New("(a(b(z<>)))")
+ c.Assert(err, IsNil)
+ path1 := find(c, tree1, "z")
+
+ tree2, err := fsnoder.New("(a(b(z<1>)))")
+ c.Assert(err, IsNil)
+ path2 := find(c, tree2, "z")
+
+ change := merkletrie.NewModify(path1, path2)
+ c.Assert(change.String(), Equals, "<Modify a/b/z>")
+
+ shortPath1 := noder.Path([]noder.Noder{path1.Last()})
+ shortPath2 := noder.Path([]noder.Noder{path2.Last()})
+ change = merkletrie.NewModify(shortPath1, shortPath2)
+ c.Assert(change.String(), Equals, "<Modify z>")
+}
+
+func (s ChangeSuite) TestMalformedChange(c *C) {
+ change := merkletrie.Change{}
+ c.Assert(change.String, PanicMatches, "malformed change.*")
+}
diff --git a/utils/merkletrie/difftree.go b/utils/merkletrie/difftree.go
new file mode 100644
index 0000000..1a626cd
--- /dev/null
+++ b/utils/merkletrie/difftree.go
@@ -0,0 +1,404 @@
+package merkletrie
+
+// The focus of this difftree implementation is to save time by
+// skipping whole directories if their hash is the same in both
+// trees.
+//
+// The diff algorithm implemented here is based on the doubleiter
+// type defined in this same package; we will iterate over both
+// trees at the same time, while comparing the current noders in
+// each iterator. Depending on how they differ we will output the
+// corresponding chages and move the iterators further over both
+// trees.
+//
+// The table bellow show all the possible comparison results, along
+// with what changes should we produce and how to advance the
+// iterators.
+//
+// The table is implemented by the switches in this function,
+// diffTwoNodes, diffTwoNodesSameName and diffTwoDirs.
+//
+// Many Bothans died to bring us this information, make sure you
+// understand the table before modifying this code.
+
+// # Cases
+//
+// When comparing noders in both trees you will found yourself in
+// one of 169 possible cases, but if we ignore moves, we can
+// simplify a lot the search space into the following table:
+//
+// - "-": nothing, no file or directory
+// - a<>: an empty file named "a".
+// - a<1>: a file named "a", with "1" as its contents.
+// - a<2>: a file named "a", with "2" as its contents.
+// - a(): an empty dir named "a".
+// - a(...): a dir named "a", with some files and/or dirs inside (possibly
+// empty).
+// - a(;;;): a dir named "a", with some other files and/or dirs inside
+// (possibly empty), which different from the ones in "a(...)".
+//
+// \ to - a<> a<1> a<2> a() a(...) a(;;;)
+// from \
+// - 00 01 02 03 04 05 06
+// a<> 10 11 12 13 14 15 16
+// a<1> 20 21 22 23 24 25 26
+// a<2> 30 31 32 33 34 35 36
+// a() 40 41 42 43 44 45 46
+// a(...) 50 51 52 53 54 55 56
+// a(;;;) 60 61 62 63 64 65 66
+//
+// Every (from, to) combination in the table is a special case, but
+// some of them can be merged into some more general cases, for
+// instance 11 and 22 can be merged into the general case: both
+// noders are equal.
+//
+// Here is a full list of all the cases that are similar and how to
+// merge them together into more general cases. Each general case
+// is labeled wiht an uppercase letter for further reference, and it
+// is followed by the pseudocode of the checks you have to perfrom
+// on both noders to see if you are in such a case, the actions to
+// perform (i.e. what changes to output) and how to advance the
+// iterators of each tree to continue the comparison process.
+//
+// ## A. Impossible: 00
+//
+// ## B. Same thing on both sides: 11, 22, 33, 44, 55, 66
+// - check: `SameName() && SameHash()`
+// - action: do nothing.
+// - advance: `FromNext(); ToNext()`
+//
+// ### C. To was created: 01, 02, 03, 04, 05, 06
+// - check: `DifferentName() && ToBeforeFrom()`
+// - action: inserRecursively(to)
+// - advance: `ToNext()`
+//
+// ### D. From was deleted: 10, 20, 30, 40, 50, 60
+// - check: `DifferentName() && FromBeforeTo()`
+// - action: `DeleteRecursively(from)`
+// - advance: `FromNext()`
+//
+// ### E. Empty file to file with contents: 12, 13
+// - check: `SameName() && DifferentHash() && FromIsFile() &&
+// ToIsFile() && FromIsEmpty()`
+// - action: `modifyFile(from, to)`
+// - advance: `FromNext()` or `FromStep()`
+//
+// ### E'. file with contents to empty file: 21, 31
+// - check: `SameName() && DifferentHash() && FromIsFile() &&
+// ToIsFile() && ToIsEmpty()`
+// - action: `modifyFile(from, to)`
+// - advance: `FromNext()` or `FromStep()`
+//
+// ### F. empty file to empty dir with the same name: 14
+// - check: `SameName() && FromIsFile() && FromIsEmpty() &&
+// ToIsDir() && ToIsEmpty()`
+// - action: `DeleteFile(from); InsertEmptyDir(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### F'. empty dir to empty file of the same name: 41
+// - check: `SameName() && FromIsDir() && FromIsEmpty &&
+// ToIsFile() && ToIsEmpty()`
+// - action: `DeleteEmptyDir(from); InsertFile(to)`
+// - advance: `FromNext(); ToNext()` or step for any of them.
+//
+// ### G. empty file to non-empty dir of the same name: 15, 16
+// - check: `SameName() && FromIsFile() && ToIsDir() &&
+// FromIsEmpty() && ToIsNotEmpty()`
+// - action: `DeleteFile(from); InsertDirRecursively(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### G'. non-empty dir to empty file of the same name: 51, 61
+// - check: `SameName() && FromIsDir() && FromIsNotEmpty() &&
+// ToIsFile() && FromIsEmpty()`
+// - action: `DeleteDirRecursively(from); InsertFile(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### H. modify file contents: 23, 32
+// - check: `SameName() && FromIsFile() && ToIsFile() &&
+// FromIsNotEmpty() && ToIsNotEmpty()`
+// - action: `ModifyFile(from, to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### I. file with contents to empty dir: 24, 34
+// - check: `SameName() && DifferentHash() && FromIsFile() &&
+// FromIsNotEmpty() && ToIsDir() && ToIsEmpty()`
+// - action: `DeleteFile(from); InsertEmptyDir(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### I'. empty dir to file with contents: 42, 43
+// - check: `SameName() && DifferentHash() && FromIsDir() &&
+// FromIsEmpty() && ToIsFile() && ToIsEmpty()`
+// - action: `DeleteDir(from); InsertFile(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### J. file with contents to dir with contetns: 25, 26, 35, 36
+// - check: `SameName() && DifferentHash() && FromIsFile() &&
+// FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()`
+// - action: `DeleteFile(from); InsertDirRecursively(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### J'. dir with contetns to file with contents: 52, 62, 53, 63
+// - check: `SameName() && DifferentHash() && FromIsDir() &&
+// FromIsNotEmpty() && ToIsFile() && ToIsNotEmpty()`
+// - action: `DeleteDirRecursively(from); InsertFile(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### K. empty dir to dir with contents: 45, 46
+// - check: `SameName() && DifferentHash() && FromIsDir() &&
+// FromIsEmpty() && ToIsDir() && ToIsNotEmpty()`
+// - action: `InsertChildrenRecursively(to)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### K'. dir with contents to empty dir: 54, 64
+// - check: `SameName() && DifferentHash() && FromIsDir() &&
+// FromIsEmpty() && ToIsDir() && ToIsNotEmpty()`
+// - action: `DeleteChildrenRecursively(from)`
+// - advance: `FromNext(); ToNext()`
+//
+// ### L. dir with contents to dir with different contents: 56, 65
+// - check: `SameName() && DifferentHash() && FromIsDir() &&
+// FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()`
+// - action: nothing
+// - advance: `FromStep(); ToStep()`
+//
+//
+
+// All these cases can be further simplified by a truth table
+// reduction process, in which we gather similar checks together to
+// make the final code easier to read and understand.
+//
+// The first 6 columns are the outputs of the checks to perform on
+// both noders. I have labeled them 1 to 6, this is what they mean:
+//
+// 1: SameName()
+// 2: SameHash()
+// 3: FromIsDir()
+// 4: ToIsDir()
+// 5: FromIsEmpty()
+// 6: ToIsEmpty()
+//
+// The from and to columns are a fsnoder example of the elements
+// that you will find on each tree under the specified comparison
+// results (columns 1 to 6).
+//
+// The type column identifies the case we are into, from the list above.
+//
+// The type' column identifies the new set of reduced cases, using
+// lowercase letters, and they are explained after the table.
+//
+// The last column is the set of actions and advances for each case.
+//
+// "---" means impossible except in case of hash collision.
+//
+// advance meaning:
+// - NN: from.Next(); to.Next()
+// - SS: from.Step(); to.Step()
+//
+// 1 2 3 4 5 6 | from | to |type|type'|action ; advance
+// ------------+--------+--------+----+------------------------------------
+// 0 0 0 0 0 0 | | | | | if !SameName() {
+// . | | | | | if FromBeforeTo() {
+// . | | | D | d | delete(from); from.Next()
+// . | | | | | } else {
+// . | | | C | c | insert(to); to.Next()
+// . | | | | | }
+// 0 1 1 1 1 1 | | | | | }
+// 1 0 0 0 0 0 | a<1> | a<2> | H | e | modify(from, to); NN
+// 1 0 0 0 0 1 | a<1> | a<> | E' | e | modify(from, to); NN
+// 1 0 0 0 1 0 | a<> | a<1> | E | e | modify(from, to); NN
+// 1 0 0 0 1 1 | ---- | ---- | | e |
+// 1 0 0 1 0 0 | a<1> | a(...) | J | f | delete(from); insert(to); NN
+// 1 0 0 1 0 1 | a<1> | a() | I | f | delete(from); insert(to); NN
+// 1 0 0 1 1 0 | a<> | a(...) | G | f | delete(from); insert(to); NN
+// 1 0 0 1 1 1 | a<> | a() | F | f | delete(from); insert(to); NN
+// 1 0 1 0 0 0 | a(...) | a<1> | J' | f | delete(from); insert(to); NN
+// 1 0 1 0 0 1 | a(...) | a<> | G' | f | delete(from); insert(to); NN
+// 1 0 1 0 1 0 | a() | a<1> | I' | f | delete(from); insert(to); NN
+// 1 0 1 0 1 1 | a() | a<> | F' | f | delete(from); insert(to); NN
+// 1 0 1 1 0 0 | a(...) | a(;;;) | L | g | nothing; SS
+// 1 0 1 1 0 1 | a(...) | a() | K' | h | deleteChidren(from); NN
+// 1 0 1 1 1 0 | a() | a(...) | K | i | insertChildren(to); NN
+// 1 0 1 1 1 1 | ---- | ---- | | |
+// 1 1 0 0 0 0 | a<1> | a<1> | B | b | nothing; NN
+// 1 1 0 0 0 1 | ---- | ---- | | b |
+// 1 1 0 0 1 0 | ---- | ---- | | b |
+// 1 1 0 0 1 1 | a<> | a<> | B | b | nothing; NN
+// 1 1 0 1 0 0 | ---- | ---- | | b |
+// 1 1 0 1 0 1 | ---- | ---- | | b |
+// 1 1 0 1 1 0 | ---- | ---- | | b |
+// 1 1 0 1 1 1 | ---- | ---- | | b |
+// 1 1 1 0 0 0 | ---- | ---- | | b |
+// 1 1 1 0 0 1 | ---- | ---- | | b |
+// 1 1 1 0 1 0 | ---- | ---- | | b |
+// 1 1 1 0 1 1 | ---- | ---- | | b |
+// 1 1 1 1 0 0 | a(...) | a(...) | B | b | nothing; NN
+// 1 1 1 1 0 1 | ---- | ---- | | b |
+// 1 1 1 1 1 0 | ---- | ---- | | b |
+// 1 1 1 1 1 1 | a() | a() | B | b | nothing; NN
+//
+// c and d:
+// if !SameName()
+// d if FromBeforeTo()
+// c else
+// b: SameName) && sameHash()
+// e: SameName() && !sameHash() && BothAreFiles()
+// f: SameName() && !sameHash() && FileAndDir()
+// g: SameName() && !sameHash() && BothAreDirs() && NoneIsEmpty
+// i: SameName() && !sameHash() && BothAreDirs() && FromIsEmpty
+// h: else of i
+
+import (
+ "fmt"
+ "strings"
+
+ "srcd.works/go-git.v4/utils/merkletrie/noder"
+)
+
+// DiffTree calculates the list of changes between two merkletries. It
+// uses the provided hashEqual callback to compare noders.
+func DiffTree(fromTree, toTree noder.Noder,
+ hashEqual noder.Equal) (Changes, error) {
+ ret := NewChanges()
+
+ ii, err := newDoubleIter(fromTree, toTree, hashEqual)
+ if err != nil {
+ return nil, err
+ }
+
+ for {
+ from := ii.from.current
+ to := ii.to.current
+
+ switch r := ii.remaining(); r {
+ case noMoreNoders:
+ return ret, nil
+ case onlyFromRemains:
+ if err = ret.AddRecursiveDelete(from); err != nil {
+ return nil, err
+ }
+ if err = ii.nextFrom(); err != nil {
+ return nil, err
+ }
+ case onlyToRemains:
+ if err = ret.AddRecursiveInsert(to); err != nil {
+ return nil, err
+ }
+ if err = ii.nextTo(); err != nil {
+ return nil, err
+ }
+ case bothHaveNodes:
+ if err = diffNodes(&ret, ii); err != nil {
+ return nil, err
+ }
+ default:
+ panic(fmt.Sprintf("unknown remaining value: %d", r))
+ }
+ }
+}
+
+func diffNodes(changes *Changes, ii *doubleIter) error {
+ from := ii.from.current
+ to := ii.to.current
+ var err error
+
+ // compare their full paths as strings
+ switch strings.Compare(from.String(), to.String()) {
+ case -1:
+ if err = changes.AddRecursiveDelete(from); err != nil {
+ return err
+ }
+ if err = ii.nextFrom(); err != nil {
+ return err
+ }
+ case 1:
+ if err = changes.AddRecursiveInsert(to); err != nil {
+ return err
+ }
+ if err = ii.nextTo(); err != nil {
+ return err
+ }
+ default:
+ if err := diffNodesSameName(changes, ii); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func diffNodesSameName(changes *Changes, ii *doubleIter) error {
+ from := ii.from.current
+ to := ii.to.current
+
+ status, err := ii.compare()
+ if err != nil {
+ return err
+ }
+
+ switch {
+ case status.sameHash:
+ // do nothing
+ if err = ii.nextBoth(); err != nil {
+ return err
+ }
+ case status.bothAreFiles:
+ changes.Add(NewModify(from, to))
+ if err = ii.nextBoth(); err != nil {
+ return err
+ }
+ case status.fileAndDir:
+ if err = changes.AddRecursiveDelete(from); err != nil {
+ return err
+ }
+ if err = changes.AddRecursiveInsert(to); err != nil {
+ return err
+ }
+ if err = ii.nextBoth(); err != nil {
+ return err
+ }
+ case status.bothAreDirs:
+ if err = diffDirs(changes, ii); err != nil {
+ return err
+ }
+ default:
+ return fmt.Errorf("bad status from double iterator")
+ }
+
+ return nil
+}
+
+func diffDirs(changes *Changes, ii *doubleIter) error {
+ from := ii.from.current
+ to := ii.to.current
+
+ status, err := ii.compare()
+ if err != nil {
+ return err
+ }
+
+ switch {
+ case status.fromIsEmptyDir:
+ if err = changes.AddRecursiveInsert(to); err != nil {
+ return err
+ }
+ if err = ii.nextBoth(); err != nil {
+ return err
+ }
+ case status.toIsEmptyDir:
+ if err = changes.AddRecursiveDelete(from); err != nil {
+ return err
+ }
+ if err = ii.nextBoth(); err != nil {
+ return err
+ }
+ case !status.fromIsEmptyDir && !status.toIsEmptyDir:
+ // do nothing
+ if err = ii.stepBoth(); err != nil {
+ return err
+ }
+ default:
+ return fmt.Errorf("both dirs are empty but has different hash")
+ }
+
+ return nil
+}
diff --git a/utils/merkletrie/difftree_test.go b/utils/merkletrie/difftree_test.go
new file mode 100644
index 0000000..6a193af
--- /dev/null
+++ b/utils/merkletrie/difftree_test.go
@@ -0,0 +1,429 @@
+package merkletrie_test
+
+import (
+ "bytes"
+ "fmt"
+ "reflect"
+ "sort"
+ "strings"
+ "testing"
+ "unicode"
+
+ "srcd.works/go-git.v4/utils/merkletrie"
+ "srcd.works/go-git.v4/utils/merkletrie/internal/fsnoder"
+
+ . "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) { TestingT(t) }
+
+type DiffTreeSuite struct{}
+
+var _ = Suite(&DiffTreeSuite{})
+
+type diffTreeTest struct {
+ from string
+ to string
+ expected string
+}
+
+func (t diffTreeTest) innerRun(c *C, context string, reverse bool) {
+ comment := Commentf("\n%s", context)
+ if reverse {
+ comment = Commentf("%s [REVERSED]", comment.CheckCommentString())
+ }
+
+ a, err := fsnoder.New(t.from)
+ c.Assert(err, IsNil, comment)
+ comment = Commentf("%s\n\t from = %s", comment.CheckCommentString(), a)
+
+ b, err := fsnoder.New(t.to)
+ c.Assert(err, IsNil, comment)
+ comment = Commentf("%s\n\t to = %s", comment.CheckCommentString(), b)
+
+ expected, err := newChangesFromString(t.expected)
+ c.Assert(err, IsNil, comment)
+
+ if reverse {
+ a, b = b, a
+ expected = expected.reverse()
+ }
+ comment = Commentf("%s\n\texpected = %s", comment.CheckCommentString(), expected)
+
+ results, err := merkletrie.DiffTree(a, b, fsnoder.HashEqual)
+ c.Assert(err, IsNil, comment)
+
+ obtained, err := newChanges(results)
+ c.Assert(err, IsNil, comment)
+
+ comment = Commentf("%s\n\tobtained = %s", comment.CheckCommentString(), obtained)
+
+ c.Assert(obtained, changesEquals, expected, comment)
+}
+
+func (t diffTreeTest) run(c *C, context string) {
+ t.innerRun(c, context, false)
+ t.innerRun(c, context, true)
+}
+
+type change struct {
+ merkletrie.Action
+ path string
+}
+
+func (c change) String() string {
+ return fmt.Sprintf("<%s %s>", c.Action, c.path)
+}
+
+func (c change) reverse() change {
+ ret := change{
+ path: c.path,
+ }
+
+ switch c.Action {
+ case merkletrie.Insert:
+ ret.Action = merkletrie.Delete
+ case merkletrie.Delete:
+ ret.Action = merkletrie.Insert
+ case merkletrie.Modify:
+ ret.Action = merkletrie.Modify
+ default:
+ panic(fmt.Sprintf("unknown action type: %d", c.Action))
+ }
+
+ return ret
+}
+
+type changes []change
+
+func newChanges(original merkletrie.Changes) (changes, error) {
+ ret := make(changes, len(original))
+ for i, c := range original {
+ action, err := c.Action()
+ if err != nil {
+ return nil, err
+ }
+ switch action {
+ case merkletrie.Insert:
+ ret[i] = change{
+ Action: merkletrie.Insert,
+ path: c.To.String(),
+ }
+ case merkletrie.Delete:
+ ret[i] = change{
+ Action: merkletrie.Delete,
+ path: c.From.String(),
+ }
+ case merkletrie.Modify:
+ ret[i] = change{
+ Action: merkletrie.Modify,
+ path: c.From.String(),
+ }
+ default:
+ panic(fmt.Sprintf("unsupported action %d", c.Action))
+ }
+ }
+
+ return ret, nil
+}
+
+func newChangesFromString(s string) (changes, error) {
+ ret := make([]change, 0)
+
+ s = strings.TrimSpace(s)
+ s = removeDuplicatedSpace(s)
+ s = turnSpaceIntoLiteralSpace(s)
+
+ if s == "" {
+ return ret, nil
+ }
+
+ for _, chunk := range strings.Split(s, " ") {
+ change := change{
+ path: string(chunk[1:]),
+ }
+
+ switch chunk[0] {
+ case '+':
+ change.Action = merkletrie.Insert
+ case '-':
+ change.Action = merkletrie.Delete
+ case '*':
+ change.Action = merkletrie.Modify
+ default:
+ panic(fmt.Sprintf("unsupported action descriptor %q", chunk[0]))
+ }
+
+ ret = append(ret, change)
+ }
+
+ return ret, nil
+}
+
+func removeDuplicatedSpace(s string) string {
+ var buf bytes.Buffer
+
+ var lastWasSpace, currentIsSpace bool
+ for _, r := range s {
+ currentIsSpace = unicode.IsSpace(r)
+
+ if lastWasSpace && currentIsSpace {
+ continue
+ }
+ lastWasSpace = currentIsSpace
+
+ buf.WriteRune(r)
+ }
+
+ return buf.String()
+}
+
+func turnSpaceIntoLiteralSpace(s string) string {
+ return strings.Map(
+ func(r rune) rune {
+ if unicode.IsSpace(r) {
+ return ' '
+ }
+ return r
+ }, s)
+}
+
+func (cc changes) Len() int { return len(cc) }
+func (cc changes) Swap(i, j int) { cc[i], cc[j] = cc[j], cc[i] }
+func (cc changes) Less(i, j int) bool { return strings.Compare(cc[i].String(), cc[j].String()) < 0 }
+
+func (cc changes) equals(other changes) bool {
+ sort.Sort(cc)
+ sort.Sort(other)
+ return reflect.DeepEqual(cc, other)
+}
+
+func (cc changes) String() string {
+ var buf bytes.Buffer
+ fmt.Fprintf(&buf, "len(%d) [", len(cc))
+ sep := ""
+ for _, c := range cc {
+ fmt.Fprintf(&buf, "%s%s", sep, c)
+ sep = ", "
+ }
+ buf.WriteByte(']')
+ return buf.String()
+}
+
+func (cc changes) reverse() changes {
+ ret := make(changes, len(cc))
+ for i, c := range cc {
+ ret[i] = c.reverse()
+ }
+
+ return ret
+}
+
+type changesEqualsChecker struct {
+ *CheckerInfo
+}
+
+var changesEquals Checker = &changesEqualsChecker{
+ &CheckerInfo{Name: "changesEquals", Params: []string{"obtained", "expected"}},
+}
+
+func (checker *changesEqualsChecker) Check(params []interface{}, names []string) (result bool, error string) {
+ a, ok := params[0].(changes)
+ if !ok {
+ return false, "first parameter must be a changes"
+ }
+ b, ok := params[1].(changes)
+ if !ok {
+ return false, "second parameter must be a changes"
+ }
+
+ return a.equals(b), ""
+}
+
+func do(c *C, list []diffTreeTest) {
+ for i, t := range list {
+ t.run(c, fmt.Sprintf("test #%d:", i))
+ }
+}
+
+func (s *DiffTreeSuite) TestEmptyVsEmpty(c *C) {
+ do(c, []diffTreeTest{
+ {"()", "()", ""},
+ {"A()", "A()", ""},
+ {"A()", "()", ""},
+ {"A()", "B()", ""},
+ })
+}
+
+func (s *DiffTreeSuite) TestBasicCases(c *C) {
+ do(c, []diffTreeTest{
+ {"()", "()", ""},
+ {"()", "(a<>)", "+a"},
+ {"()", "(a<1>)", "+a"},
+ {"()", "(a())", ""},
+ {"()", "(a(b()))", ""},
+ {"()", "(a(b<>))", "+a/b"},
+ {"()", "(a(b<1>))", "+a/b"},
+ {"(a<>)", "(a<>)", ""},
+ {"(a<>)", "(a<1>)", "*a"},
+ {"(a<>)", "(a())", "-a"},
+ {"(a<>)", "(a(b()))", "-a"},
+ {"(a<>)", "(a(b<>))", "-a +a/b"},
+ {"(a<>)", "(a(b<1>))", "-a +a/b"},
+ {"(a<>)", "(c())", "-a"},
+ {"(a<>)", "(c(b()))", "-a"},
+ {"(a<>)", "(c(b<>))", "-a +c/b"},
+ {"(a<>)", "(c(b<1>))", "-a +c/b"},
+ {"(a<>)", "(c(a()))", "-a"},
+ {"(a<>)", "(c(a<>))", "-a +c/a"},
+ {"(a<>)", "(c(a<1>))", "-a +c/a"},
+ {"(a<1>)", "(a<1>)", ""},
+ {"(a<1>)", "(a<2>)", "*a"},
+ {"(a<1>)", "(b<1>)", "-a +b"},
+ {"(a<1>)", "(b<2>)", "-a +b"},
+ {"(a<1>)", "(a())", "-a"},
+ {"(a<1>)", "(a(b()))", "-a"},
+ {"(a<1>)", "(a(b<>))", "-a +a/b"},
+ {"(a<1>)", "(a(b<1>))", "-a +a/b"},
+ {"(a<1>)", "(a(b<2>))", "-a +a/b"},
+ {"(a<1>)", "(c())", "-a"},
+ {"(a<1>)", "(c(b()))", "-a"},
+ {"(a<1>)", "(c(b<>))", "-a +c/b"},
+ {"(a<1>)", "(c(b<1>))", "-a +c/b"},
+ {"(a<1>)", "(c(b<2>))", "-a +c/b"},
+ {"(a<1>)", "(c())", "-a"},
+ {"(a<1>)", "(c(a()))", "-a"},
+ {"(a<1>)", "(c(a<>))", "-a +c/a"},
+ {"(a<1>)", "(c(a<1>))", "-a +c/a"},
+ {"(a<1>)", "(c(a<2>))", "-a +c/a"},
+ {"(a())", "(a())", ""},
+ {"(a())", "(b())", ""},
+ {"(a())", "(a(b()))", ""},
+ {"(a())", "(b(a()))", ""},
+ {"(a())", "(a(b<>))", "+a/b"},
+ {"(a())", "(a(b<1>))", "+a/b"},
+ {"(a())", "(b(a<>))", "+b/a"},
+ {"(a())", "(b(a<1>))", "+b/a"},
+ })
+}
+
+func (s *DiffTreeSuite) TestHorizontals(c *C) {
+ do(c, []diffTreeTest{
+ {"()", "(a<> b<>)", "+a +b"},
+ {"()", "(a<> b<1>)", "+a +b"},
+ {"()", "(a<> b())", "+a"},
+ {"()", "(a() b<>)", "+b"},
+ {"()", "(a<1> b<>)", "+a +b"},
+ {"()", "(a<1> b<1>)", "+a +b"},
+ {"()", "(a<1> b<2>)", "+a +b"},
+ {"()", "(a<1> b())", "+a"},
+ {"()", "(a() b<1>)", "+b"},
+ {"()", "(a() b())", ""},
+ {"()", "(a<> b<> c<> d<>)", "+a +b +c +d"},
+ {"()", "(a<> b<1> c() d<> e<2> f())", "+a +b +d +e"},
+ })
+}
+
+func (s *DiffTreeSuite) TestVerticals(c *C) {
+ do(c, []diffTreeTest{
+ {"()", "(z<>)", "+z"},
+ {"()", "(a(z<>))", "+a/z"},
+ {"()", "(a(b(z<>)))", "+a/b/z"},
+ {"()", "(a(b(c(z<>))))", "+a/b/c/z"},
+ {"()", "(a(b(c(d(z<>)))))", "+a/b/c/d/z"},
+ {"()", "(a(b(c(d(z<1>)))))", "+a/b/c/d/z"},
+ })
+}
+
+func (s *DiffTreeSuite) TestSingleInserts(c *C) {
+ do(c, []diffTreeTest{
+ {"()", "(z<>)", "+z"},
+ {"(a())", "(a(z<>))", "+a/z"},
+ {"(a())", "(a(b(z<>)))", "+a/b/z"},
+ {"(a(b(c())))", "(a(b(c(z<>))))", "+a/b/c/z"},
+ {"(a<> b<> c<>)", "(a<> b<> c<> z<>)", "+z"},
+ {"(a(b<> c<> d<>))", "(a(b<> c<> d<> z<>))", "+a/z"},
+ {"(a(b(c<> d<> e<>)))", "(a(b(c<> d<> e<> z<>)))", "+a/b/z"},
+ {"(a(b<>) f<>)", "(a(b<>) f<> z<>)", "+z"},
+ {"(a(b<>) f<>)", "(a(b<> z<>) f<>)", "+a/z"},
+ })
+}
+
+func (s *DiffTreeSuite) TestDebug(c *C) {
+ do(c, []diffTreeTest{
+ {"(a(b<>) f<>)", "(a(b<> z<>) f<>)", "+a/z"},
+ })
+}
+
+// root
+// / | \
+// / | ----
+// f d h --------
+// /\ / \ |
+// e a j b/ g
+// | / \ |
+// l n k icm
+// |
+// o
+// |
+// p/
+func (s *DiffTreeSuite) TestCrazy(c *C) {
+ crazy := "(f(e(l<1>) a(n(o(p())) k<1>)) d<1> h(j(i<1> c<2> m<>) b() g<>))"
+ do(c, []diffTreeTest{
+ {
+ crazy,
+ "()",
+ "-d -f/e/l -f/a/k -h/j/i -h/j/c -h/j/m -h/g",
+ }, {
+ crazy,
+ crazy,
+ "",
+ }, {
+ crazy,
+ "(d<1>)",
+ "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m -h/g",
+ }, {
+ crazy,
+ "(d<1> h(b() g<>))",
+ "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m",
+ }, {
+ crazy,
+ "(d<1> f(e(l()) a()) h(b() g<>))",
+ "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m",
+ }, {
+ crazy,
+ "(d<1> f(e(l<1>) a()) h(b() g<>))",
+ "-f/a/k -h/j/i -h/j/c -h/j/m",
+ }, {
+ crazy,
+ "(d<2> f(e(l<2>) a(s(t<1>))) h(b() g<> r<> j(i<> c<3> m<>)))",
+ "+f/a/s/t +h/r -f/a/k *d *f/e/l *h/j/c *h/j/i",
+ }, {
+ crazy,
+ "(f(e(l<2>) a(n(o(p<1>)) k<>)) h(j(i<1> c<2> m<>) b() g<>))",
+ "*f/e/l +f/a/n/o/p *f/a/k -d",
+ }, {
+ crazy,
+ "(f(e(l<1>) a(n(o(p(r<1>))) k<1>)) d<1> h(j(i<1> c<2> b() m<>) g<1>))",
+ "+f/a/n/o/p/r *h/g",
+ },
+ })
+}
+
+func (s *DiffTreeSuite) TestSameNames(c *C) {
+ do(c, []diffTreeTest{
+ {
+ "(a(a(a<>)))",
+ "(a(a(a<1>)))",
+ "*a/a/a",
+ }, {
+ "(a(b(a<>)))",
+ "(a(b(a<>)) b(a<>))",
+ "+b/a",
+ }, {
+ "(a(b(a<>)))",
+ "(a(b()) b(a<>))",
+ "-a/b/a +b/a",
+ },
+ })
+}
diff --git a/utils/merkletrie/doc.go b/utils/merkletrie/doc.go
index 28ece3e..5204024 100644
--- a/utils/merkletrie/doc.go
+++ b/utils/merkletrie/doc.go
@@ -1,20 +1,11 @@
/*
Package merkletrie provides support for n-ary trees that are at the same
-time Merkle trees and Radix trees (tries), and provides an efficient
-tree comparison algorithm for them.
+time Merkle trees and Radix trees (tries).
Git trees are Radix n-ary trees in virtue of the names of their
tree entries. At the same time, git trees are Merkle trees thanks to
their hashes.
-When comparing git trees, the simple approach of alphabetically sorting
-their elements and comparing the resulting lists is too slow as it
-depends linearly on the number of files in the trees: When a directory
-has lots of files but none of them has been modified, this approach is
-very expensive. We can do better by prunning whole directories that
-have not change, just by looking at their hashes. This package provides
-the tools to do exactly that.
-
This package defines Merkle tries as nodes that should have:
- a hash: the Merkle part of the Merkle trie
@@ -28,5 +19,16 @@ their children, which is good for testing purposes.
Nodes in the Merkle trie are abstracted by the Noder interface. The
intended use is that git trees implements this interface, either
directly or using a simple wrapper.
+
+This package provides an iterator for merkletries that can skip whole
+directory-like noders and an efficient merkletrie comparison algorithm.
+
+When comparing git trees, the simple approach of alphabetically sorting
+their elements and comparing the resulting lists is too slow as it
+depends linearly on the number of files in the trees: When a directory
+has lots of files but none of them has been modified, this approach is
+very expensive. We can do better by prunning whole directories that
+have not change, just by looking at their hashes. This package provides
+the tools to do exactly that.
*/
package merkletrie
diff --git a/utils/merkletrie/doubleiter.go b/utils/merkletrie/doubleiter.go
new file mode 100644
index 0000000..6f5e538
--- /dev/null
+++ b/utils/merkletrie/doubleiter.go
@@ -0,0 +1,187 @@
+package merkletrie
+
+import (
+ "fmt"
+ "io"
+
+ "srcd.works/go-git.v4/utils/merkletrie/noder"
+)
+
+// A doubleIter is a convenience type to keep track of the current
+// noders in two merkletries that are going to be iterated in parallel.
+// It has methods for:
+//
+// - iterating over the merkletries, both at the same time or
+// individually: nextFrom, nextTo, nextBoth, stepBoth
+//
+// - checking if there are noders left in one or both of them with the
+// remaining method and its associated returned type.
+//
+// - comparing the current noders of both merkletries in several ways,
+// with the compare method and its associated returned type.
+type doubleIter struct {
+ from struct {
+ iter *Iter
+ current noder.Path // nil if no more nodes
+ }
+ to struct {
+ iter *Iter
+ current noder.Path // nil if no more nodes
+ }
+ hashEqual noder.Equal
+}
+
+// NewdoubleIter returns a new doubleIter for the merkletries "from" and
+// "to". The hashEqual callback function will be used by the doubleIter
+// to compare the hash of the noders in the merkletries. The doubleIter
+// will be initialized to the first elements in each merkletrie if any.
+func newDoubleIter(from, to noder.Noder, hashEqual noder.Equal) (
+ *doubleIter, error) {
+ var ii doubleIter
+ var err error
+
+ if ii.from.iter, err = NewIter(from); err != nil {
+ return nil, fmt.Errorf("from: %s", err)
+ }
+ if ii.from.current, err = ii.from.iter.Next(); turnEOFIntoNil(err) != nil {
+ return nil, fmt.Errorf("from: %s", err)
+ }
+
+ if ii.to.iter, err = NewIter(to); err != nil {
+ return nil, fmt.Errorf("to: %s", err)
+ }
+ if ii.to.current, err = ii.to.iter.Next(); turnEOFIntoNil(err) != nil {
+ return nil, fmt.Errorf("to: %s", err)
+ }
+
+ ii.hashEqual = hashEqual
+
+ return &ii, nil
+}
+
+func turnEOFIntoNil(e error) error {
+ if e != nil && e != io.EOF {
+ return e
+ }
+ return nil
+}
+
+// NextBoth makes d advance to the next noder in both merkletries. If
+// any of them is a directory, it skips its contents.
+func (d *doubleIter) nextBoth() error {
+ if err := d.nextFrom(); err != nil {
+ return err
+ }
+ if err := d.nextTo(); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// NextFrom makes d advance to the next noder in the "from" merkletrie,
+// skipping its contents if it is a directory.
+func (d *doubleIter) nextFrom() (err error) {
+ d.from.current, err = d.from.iter.Next()
+ return turnEOFIntoNil(err)
+}
+
+// NextTo makes d advance to the next noder in the "to" merkletrie,
+// skipping its contents if it is a directory.
+func (d *doubleIter) nextTo() (err error) {
+ d.to.current, err = d.to.iter.Next()
+ return turnEOFIntoNil(err)
+}
+
+// StepBoth makes d advance to the next noder in both merkletries,
+// getting deeper into directories if that is the case.
+func (d *doubleIter) stepBoth() (err error) {
+ if d.from.current, err = d.from.iter.Step(); turnEOFIntoNil(err) != nil {
+ return err
+ }
+ if d.to.current, err = d.to.iter.Step(); turnEOFIntoNil(err) != nil {
+ return err
+ }
+ return nil
+}
+
+// Remaining returns if there are no more noders in the tree, if both
+// have noders or if one of them doesn't.
+func (d *doubleIter) remaining() remaining {
+ if d.from.current == nil && d.to.current == nil {
+ return noMoreNoders
+ }
+
+ if d.from.current == nil && d.to.current != nil {
+ return onlyToRemains
+ }
+
+ if d.from.current != nil && d.to.current == nil {
+ return onlyFromRemains
+ }
+
+ return bothHaveNodes
+}
+
+// Remaining values tells you whether both trees still have noders, or
+// only one of them or none of them.
+type remaining int
+
+const (
+ noMoreNoders remaining = iota
+ onlyToRemains
+ onlyFromRemains
+ bothHaveNodes
+)
+
+// Compare returns the comparison between the current elements in the
+// merkletries.
+func (d *doubleIter) compare() (s comparison, err error) {
+ s.sameHash = d.hashEqual(d.from.current, d.to.current)
+
+ fromIsDir := d.from.current.IsDir()
+ toIsDir := d.to.current.IsDir()
+
+ s.bothAreDirs = fromIsDir && toIsDir
+ s.bothAreFiles = !fromIsDir && !toIsDir
+ s.fileAndDir = !s.bothAreDirs && !s.bothAreFiles
+
+ fromNumChildren, err := d.from.current.NumChildren()
+ if err != nil {
+ return comparison{}, fmt.Errorf("from: %s", err)
+ }
+
+ toNumChildren, err := d.to.current.NumChildren()
+ if err != nil {
+ return comparison{}, fmt.Errorf("to: %s", err)
+ }
+
+ s.fromIsEmptyDir = fromIsDir && fromNumChildren == 0
+ s.toIsEmptyDir = toIsDir && toNumChildren == 0
+
+ return
+}
+
+// Answers to a lot of questions you can ask about how to noders are
+// equal or different.
+type comparison struct {
+ // the following are only valid if both nodes have the same name
+ // (i.e. nameComparison == 0)
+
+ // Do both nodes have the same hash?
+ sameHash bool
+ // Are both nodes files?
+ bothAreFiles bool
+
+ // the following are only valid if any of the noders are dirs,
+ // this is, if !bothAreFiles
+
+ // Is one a file and the other a dir?
+ fileAndDir bool
+ // Are both nodes dirs?
+ bothAreDirs bool
+ // Is the from node an empty dir?
+ fromIsEmptyDir bool
+ // Is the to Node an empty dir?
+ toIsEmptyDir bool
+}
diff --git a/utils/merkletrie/iter_test.go b/utils/merkletrie/iter_test.go
index 52d567a..c1900da 100644
--- a/utils/merkletrie/iter_test.go
+++ b/utils/merkletrie/iter_test.go
@@ -4,7 +4,6 @@ import (
"fmt"
"io"
"strings"
- "testing"
"srcd.works/go-git.v4/utils/merkletrie"
"srcd.works/go-git.v4/utils/merkletrie/internal/fsnoder"
@@ -13,8 +12,6 @@ import (
. "gopkg.in/check.v1"
)
-func Test(t *testing.T) { TestingT(t) }
-
type IterSuite struct{}
var _ = Suite(&IterSuite{})