From e80d7b7267ba9f2057f259be331c4de927a60ecb Mon Sep 17 00:00:00 2001 From: Alberto Cortés Date: Mon, 30 Jan 2017 21:34:33 +0100 Subject: delete old noder, create a new one in utils (#241) --- plumbing/difftree/internal/merkletrie/doc.go | 30 -- plumbing/difftree/internal/merkletrie/frame.go | 79 ----- .../difftree/internal/merkletrie/frame_test.go | 69 ---- plumbing/difftree/internal/merkletrie/iter.go | 167 ---------- .../internal/merkletrie/iter_fixtures_test.go | 330 ------------------- plumbing/difftree/internal/merkletrie/iter_test.go | 176 ---------- plumbing/difftree/internal/merkletrie/node.go | 65 ---- plumbing/difftree/internal/merkletrie/node_test.go | 68 ---- plumbing/difftree/internal/merkletrie/noder.go | 20 -- utils/merkletrie/doc.go | 32 ++ utils/merkletrie/internal/fsnoder/dir.go | 140 ++++++++ utils/merkletrie/internal/fsnoder/dir_test.go | 364 +++++++++++++++++++++ utils/merkletrie/internal/fsnoder/doc.go | 50 +++ utils/merkletrie/internal/fsnoder/file.go | 72 ++++ utils/merkletrie/internal/fsnoder/file_test.go | 67 ++++ utils/merkletrie/internal/fsnoder/new.go | 192 +++++++++++ utils/merkletrie/internal/fsnoder/new_test.go | 337 +++++++++++++++++++ utils/merkletrie/noder/noder.go | 59 ++++ utils/merkletrie/noder/noder_test.go | 94 ++++++ utils/merkletrie/noder/path.go | 59 ++++ 20 files changed, 1466 insertions(+), 1004 deletions(-) delete mode 100644 plumbing/difftree/internal/merkletrie/doc.go delete mode 100644 plumbing/difftree/internal/merkletrie/frame.go delete mode 100644 plumbing/difftree/internal/merkletrie/frame_test.go delete mode 100644 plumbing/difftree/internal/merkletrie/iter.go delete mode 100644 plumbing/difftree/internal/merkletrie/iter_fixtures_test.go delete mode 100644 plumbing/difftree/internal/merkletrie/iter_test.go delete mode 100644 plumbing/difftree/internal/merkletrie/node.go delete mode 100644 plumbing/difftree/internal/merkletrie/node_test.go delete mode 100644 plumbing/difftree/internal/merkletrie/noder.go create mode 100644 utils/merkletrie/doc.go create mode 100644 utils/merkletrie/internal/fsnoder/dir.go create mode 100644 utils/merkletrie/internal/fsnoder/dir_test.go create mode 100644 utils/merkletrie/internal/fsnoder/doc.go create mode 100644 utils/merkletrie/internal/fsnoder/file.go create mode 100644 utils/merkletrie/internal/fsnoder/file_test.go create mode 100644 utils/merkletrie/internal/fsnoder/new.go create mode 100644 utils/merkletrie/internal/fsnoder/new_test.go create mode 100644 utils/merkletrie/noder/noder.go create mode 100644 utils/merkletrie/noder/noder_test.go create mode 100644 utils/merkletrie/noder/path.go diff --git a/plumbing/difftree/internal/merkletrie/doc.go b/plumbing/difftree/internal/merkletrie/doc.go deleted file mode 100644 index f8c3b2f..0000000 --- a/plumbing/difftree/internal/merkletrie/doc.go +++ /dev/null @@ -1,30 +0,0 @@ -package merkletrie - -/* -Package merkletrie gives support for n-ary trees that are at the same -time Merkle trees and Radix trees, and provides an efficient tree -comparison algorithm for them. - -Git trees are Radix n-ary trees in virtue of the names of their -tree entries. At the same time, git trees are Merkle trees thanks to -their hashes. - -When comparing git trees, the simple approach of alphabetically sorting -their elements and comparing the resulting lists is not enough as it -depends linearly on the number of files in the trees: When a directory -has lots of files but none of them has been modified, this approach is -very expensive. We can do better by prunning whole directories that -have not change, by just by looking at their hashes. This package -provides the tools to do exactly that. - -This package defines Radix-Merkle trees as nodes that should have: -- a hash: the Merkle part of the Radix-Merkle tree -- a key: the Radix part of the Radix-Merkle tree - -The Merkle hash condition is not enforced by this package though. This -means that node hashes doesn't have to take into account the hashes of -their children, which is good for testing purposes. - -Nodes in the Radix-Merkle tree are abstracted by the Noder interface. -The intended use is that git.Tree implements this interface. -*/ diff --git a/plumbing/difftree/internal/merkletrie/frame.go b/plumbing/difftree/internal/merkletrie/frame.go deleted file mode 100644 index a40c6ad..0000000 --- a/plumbing/difftree/internal/merkletrie/frame.go +++ /dev/null @@ -1,79 +0,0 @@ -package merkletrie - -import ( - "bytes" - "fmt" -) - -const sep = "/" - -// A frame represents siblings in a trie, along with the path to get to -// them. For example the frame for the node with key `b` in this trie: -// -// a -// / \ -// / \ -// / \ -// b c -// /|\ / \ -// y z x d e -// | -// g -// -// would be: -// -// f := frame{ -// base: "a/b", // path to the siblings -// stack: []Node{z, y, x} // in reverse alphabetical order -// } -type frame struct { - base string // absolute key of their parents - stack []Noder // siblings, sorted in reverse alphabetical order by key -} - -// newFrame returns a frame for the children of a node n. -func newFrame(parentAbsoluteKey string, n Noder) *frame { - return &frame{ - base: parentAbsoluteKey + sep + n.Key(), - stack: n.Children(), - } -} - -func (f *frame) String() string { - var buf bytes.Buffer - _, _ = buf.WriteString(fmt.Sprintf("base=%q, stack=[", f.base)) - - sep := "" - for _, e := range f.stack { - _, _ = buf.WriteString(sep) - sep = ", " - _, _ = buf.WriteString(fmt.Sprintf("%q", e.Key())) - } - - _ = buf.WriteByte(']') - - return buf.String() -} - -func (f *frame) top() (Noder, bool) { - if len(f.stack) == 0 { - return nil, false - } - - top := len(f.stack) - 1 - - return f.stack[top], true -} - -func (f *frame) pop() (Noder, bool) { - if len(f.stack) == 0 { - return nil, false - } - - top := len(f.stack) - 1 - ret := f.stack[top] - f.stack[top] = nil - f.stack = f.stack[:top] - - return ret, true -} diff --git a/plumbing/difftree/internal/merkletrie/frame_test.go b/plumbing/difftree/internal/merkletrie/frame_test.go deleted file mode 100644 index 0ef036a..0000000 --- a/plumbing/difftree/internal/merkletrie/frame_test.go +++ /dev/null @@ -1,69 +0,0 @@ -package merkletrie - -import . "gopkg.in/check.v1" - -type FrameSuite struct{} - -var _ = Suite(&FrameSuite{}) - -func (s *FrameSuite) TestNewFrameFromLeaf(c *C) { - n := newNode( - []byte("hash"), - "key", - []*node{}, - ) - - frame := newFrame("foo", n) - - expectedString := `base="foo/key", stack=[]` - c.Assert(frame.String(), Equals, expectedString) - - obtainedTopNode, obtainedTopOK := frame.top() - c.Assert(obtainedTopNode, IsNil) - c.Assert(obtainedTopOK, Equals, false) - - obtainedPopNode, obtainedPopOK := frame.top() - c.Assert(obtainedPopNode, IsNil) - c.Assert(obtainedPopOK, Equals, false) -} - -func (s *FrameSuite) TestNewFrameFromParent(c *C) { - leaf0 := newNode([]byte("leaf0 hash"), "leaf0 key", []*node{}) - leaf1 := newNode([]byte("leaf1 hash"), "leaf1 key", []*node{}) - leaf2 := newNode([]byte("leaf2 hash"), "leaf2 key", []*node{}) - leaf3 := newNode([]byte("leaf3 hash"), "leaf3 key", []*node{}) - parent := newNode( - []byte("parent hash"), - "parent key", - []*node{leaf3, leaf0, leaf2, leaf1}, // not alphabetically sorted - ) - - frame := newFrame("foo", parent) - - expectedString := `base="foo/parent key", stack=["leaf3 key", "leaf2 key", "leaf1 key", "leaf0 key"]` - c.Assert(frame.String(), Equals, expectedString) - - checkTopAndPop(c, frame, leaf0, true) - checkTopAndPop(c, frame, leaf1, true) - checkTopAndPop(c, frame, leaf2, true) - checkTopAndPop(c, frame, leaf3, true) - checkTopAndPop(c, frame, nil, false) -} - -func checkTopAndPop(c *C, f *frame, expectedNode *node, expectedOK bool) { - n, ok := f.top() - if expectedNode == nil { - c.Assert(n, IsNil) - } else { - c.Assert(n, DeepEquals, expectedNode) - } - c.Assert(ok, Equals, expectedOK) - - n, ok = f.pop() - if expectedNode == nil { - c.Assert(n, IsNil) - } else { - c.Assert(n, DeepEquals, expectedNode) - } - c.Assert(ok, Equals, expectedOK) -} diff --git a/plumbing/difftree/internal/merkletrie/iter.go b/plumbing/difftree/internal/merkletrie/iter.go deleted file mode 100644 index 417dbb3..0000000 --- a/plumbing/difftree/internal/merkletrie/iter.go +++ /dev/null @@ -1,167 +0,0 @@ -package merkletrie - -// Iter is a radix tree iterator that will traverse the trie in -// depth-first pre-order. Entries are traversed in (case-sensitive) -// alphabetical order for each level. -// -// This is the kind of traversal you will expect when listing -// ordinary files and directories recursively, for example: -// -// Trie Traversal order -// ---- --------------- -// . -// / | \ a -// / | \ b -// b a z ===> b/a -// / \ b/c -// c a z -// -// -// The Step method will return the next item, the Next method will do -// the same but without descending deeper into the tree (i.e. skipping -// the contents of "directories"). -// -// The name of the type and its methods are based on the well known "next" -// and "step" operations, quite common in debuggers, like gdb. -type Iter struct { - // tells if the iteration has started. - hasStarted bool - // Each level of the tree is represented as a frame, this stack - // keeps track of the frames wrapping the current iterator position. - // The iterator will "step" into a node by adding its frame to the - // stack, or go to the next element at the same level by poping the - // current frame. - frameStack []*frame -} - -// NewIter returns a new iterator for the trie with its root at n. -func NewIter(n Noder) *Iter { - ret := &Iter{} - ret.push(newFrame("", n)) - - return ret -} - -func (iter *Iter) top() (*frame, bool) { - if len(iter.frameStack) == 0 { - return nil, false - } - - top := len(iter.frameStack) - 1 - - return iter.frameStack[top], true -} - -func (iter *Iter) pop() (*frame, bool) { - if len(iter.frameStack) == 0 { - return nil, false - } - - top := len(iter.frameStack) - 1 - ret := iter.frameStack[top] - iter.frameStack[top] = nil - iter.frameStack = iter.frameStack[:top] - - return ret, true -} - -func (iter *Iter) push(f *frame) { - iter.frameStack = append(iter.frameStack, f) -} - -const ( - descend = true - dontDescend = false -) - -// Next returns the next node without descending deeper into the tree -// and true. If there are no more entries it returns nil and false. -func (iter *Iter) Next() (Noder, bool) { - return iter.advance(dontDescend) -} - -// Step returns the next node in the tree, descending deeper into it if -// needed. If there are no more nodes in the tree, it returns nil and -// false. -func (iter *Iter) Step() (Noder, bool) { - return iter.advance(descend) -} - -// advances the iterator in whatever direction you want: descend or -// dontDescend. -func (iter *Iter) advance(mustDescend bool) (Noder, bool) { - node, ok := iter.current() - if !ok { - return nil, false - } - - // The first time we just return the current node. - if !iter.hasStarted { - iter.hasStarted = true - return node, ok - } - // following advances will involve dropping already seen nodes - // or getting into their children - - ignoreChildren := node.NumChildren() == 0 || !mustDescend - if ignoreChildren { - // if we must ignore the current node children, just drop - // it and find the next one in the existing frames. - _ = iter.drop() - node, ok = iter.current() - return node, ok - } - - // if we must descend into the current's node children, drop the - // parent and add a new frame with its children. - _ = iter.drop() - iter.push(newFrame(node.Key(), node)) - node, _ = iter.current() - - return node, true -} - -// returns the current frame and the current node (i.e. the ones at the -// top of their respective stacks. -func (iter *Iter) current() (Noder, bool) { - f, ok := iter.top() - if !ok { - return nil, false - } - - n, ok := f.top() - if !ok { - return nil, false - } - - return n, true -} - -// removes the current node and all the frames that become empty as a -// consequence of this action. It returns true if something was dropped, -// and false if there were no more nodes in the iterator. -func (iter *Iter) drop() bool { - frame, ok := iter.top() - if !ok { - return false - } - - _, ok = frame.pop() - if !ok { - return false - } - - for { // remove empty frames - if len(frame.stack) != 0 { - break - } - - _, _ = iter.pop() - frame, ok = iter.top() - if !ok { - break - } - } - - return true -} diff --git a/plumbing/difftree/internal/merkletrie/iter_fixtures_test.go b/plumbing/difftree/internal/merkletrie/iter_fixtures_test.go deleted file mode 100644 index 20bddaf..0000000 --- a/plumbing/difftree/internal/merkletrie/iter_fixtures_test.go +++ /dev/null @@ -1,330 +0,0 @@ -package merkletrie - -// this files contains fixtures for testing the Iter. -// -// - iter... functions returns iterators for newly created trees -// for example: -// -// + iterLeaf returns an iterator for simple tree with just the root. -// -// + iter2Horizontal returns an iterator for a tree with 2 nodes, both -// childs of the root. -// -// - runs... contains sets of tests, indexed by a string that helps -// to understand each test: "nsn" means next, then step, then next -// again. The test also contains the expected keys of the nodes you -// will get when calling the operations over the correspoding trees: -// Example: runs2HorizontalSorted with iter2HorizontalSorted and so on. - -func iterLeaf() *Iter { - root := newNode(hash, "root", empty) - return NewIter(root) -} - -var runs0 = map[string][]test{ - "nn": {{next, ""}, {next, ""}}, - "ns": {{next, ""}, {step, ""}}, - "sn": {{step, ""}, {next, ""}}, - "ss": {{step, ""}, {step, ""}}, -} - -// root -// | -// a -func iter1() *Iter { - a := newNode(hash, "a", empty) - root := newNode(hash, "root", []*node{a}) - return NewIter(root) -} - -var runs1 = map[string][]test{ - "nn": {{next, "a"}, {next, ""}}, - "ns": {{next, "a"}, {step, ""}}, - "sn": {{step, "a"}, {next, ""}}, - "ss": {{step, "a"}, {step, ""}}, -} - -// root -// / \ -// a b -func iter2HorizontalSorted() *Iter { - a := newNode(hash, "a", empty) - b := newNode(hash, "b", empty) - root := newNode(hash, "root", []*node{a, b}) - return NewIter(root) -} - -// root -// / \ -// b a -func iter2HorizontalReverse() *Iter { - a := newNode(hash, "a", empty) - b := newNode(hash, "b", empty) - root := newNode(hash, "root", []*node{b, a}) - return NewIter(root) -} - -var runs2Horizontal = map[string][]test{ - "nnn": {{next, "a"}, {next, "b"}, {next, ""}}, - "nns": {{next, "a"}, {next, "b"}, {step, ""}}, - "nsn": {{next, "a"}, {step, "b"}, {next, ""}}, - "nss": {{next, "a"}, {step, "b"}, {step, ""}}, - "snn": {{step, "a"}, {next, "b"}, {next, ""}}, - "sns": {{step, "a"}, {next, "b"}, {step, ""}}, - "ssn": {{step, "a"}, {step, "b"}, {next, ""}}, - "sss": {{step, "a"}, {step, "b"}, {step, ""}}, -} - -// root -// | -// a -// | -// b -func iter2VerticalSorted() *Iter { - b := newNode(hash, "b", empty) - a := newNode(hash, "a", []*node{b}) - root := newNode(hash, "root", []*node{a}) - return NewIter(root) -} - -var runs2VerticalSorted = map[string][]test{ - "nnn": {{next, "a"}, {next, ""}, {next, ""}}, - "nns": {{next, "a"}, {next, ""}, {step, ""}}, - "nsn": {{next, "a"}, {step, "b"}, {next, ""}}, - "nss": {{next, "a"}, {step, "b"}, {step, ""}}, - "snn": {{step, "a"}, {next, ""}, {next, ""}}, - "sns": {{step, "a"}, {next, ""}, {step, ""}}, - "ssn": {{step, "a"}, {step, "b"}, {next, ""}}, - "sss": {{step, "a"}, {step, "b"}, {step, ""}}, -} - -// root -// | -// b -// | -// a -func iter2VerticalReverse() *Iter { - a := newNode(hash, "a", empty) - b := newNode(hash, "b", []*node{a}) - root := newNode(hash, "root", []*node{b}) - return NewIter(root) -} - -var runs2VerticalReverse = map[string][]test{ - "nnn": {{next, "b"}, {next, ""}, {next, ""}}, - "nns": {{next, "b"}, {next, ""}, {step, ""}}, - "nsn": {{next, "b"}, {step, "a"}, {next, ""}}, - "nss": {{next, "b"}, {step, "a"}, {step, ""}}, - "snn": {{step, "b"}, {next, ""}, {next, ""}}, - "sns": {{step, "b"}, {next, ""}, {step, ""}}, - "ssn": {{step, "b"}, {step, "a"}, {next, ""}}, - "sss": {{step, "b"}, {step, "a"}, {step, ""}}, -} - -// root -// /|\ -// c a b -func iter3Horizontal() *Iter { - a := newNode(hash, "a", empty) - b := newNode(hash, "b", empty) - c := newNode(hash, "c", empty) - root := newNode(hash, "root", []*node{c, a, b}) - return NewIter(root) -} - -var runs3Horizontal = map[string][]test{ - "nnnn": {{next, "a"}, {next, "b"}, {next, "c"}, {next, ""}}, - "nnns": {{next, "a"}, {next, "b"}, {next, "c"}, {step, ""}}, - "nnsn": {{next, "a"}, {next, "b"}, {step, "c"}, {next, ""}}, - "nnss": {{next, "a"}, {next, "b"}, {step, "c"}, {step, ""}}, - "nsnn": {{next, "a"}, {step, "b"}, {next, "c"}, {next, ""}}, - "nsns": {{next, "a"}, {step, "b"}, {next, "c"}, {step, ""}}, - "nssn": {{next, "a"}, {step, "b"}, {step, "c"}, {next, ""}}, - "nsss": {{next, "a"}, {step, "b"}, {step, "c"}, {step, ""}}, - "snnn": {{step, "a"}, {next, "b"}, {next, "c"}, {next, ""}}, - "snns": {{step, "a"}, {next, "b"}, {next, "c"}, {step, ""}}, - "snsn": {{step, "a"}, {next, "b"}, {step, "c"}, {next, ""}}, - "snss": {{step, "a"}, {next, "b"}, {step, "c"}, {step, ""}}, - "ssnn": {{step, "a"}, {step, "b"}, {next, "c"}, {next, ""}}, - "ssns": {{step, "a"}, {step, "b"}, {next, "c"}, {step, ""}}, - "sssn": {{step, "a"}, {step, "b"}, {step, "c"}, {next, ""}}, - "ssss": {{step, "a"}, {step, "b"}, {step, "c"}, {step, ""}}, -} - -// root -// | -// b -// | -// c -// | -// a -func iter3Vertical() *Iter { - a := newNode(hash, "a", empty) - c := newNode(hash, "c", []*node{a}) - b := newNode(hash, "b", []*node{c}) - root := newNode(hash, "root", []*node{b}) - return NewIter(root) -} - -var runs3Vertical = map[string][]test{ - "nnnn": {{next, "b"}, {next, ""}, {next, ""}, {next, ""}}, - "nnns": {{next, "b"}, {next, ""}, {next, ""}, {step, ""}}, - "nnsn": {{next, "b"}, {next, ""}, {step, ""}, {next, ""}}, - "nnss": {{next, "b"}, {next, ""}, {step, ""}, {step, ""}}, - "nsnn": {{next, "b"}, {step, "c"}, {next, ""}, {next, ""}}, - "nsns": {{next, "b"}, {step, "c"}, {next, ""}, {step, ""}}, - "nssn": {{next, "b"}, {step, "c"}, {step, "a"}, {next, ""}}, - "nsss": {{next, "b"}, {step, "c"}, {step, "a"}, {step, ""}}, - "snnn": {{step, "b"}, {next, ""}, {next, ""}, {next, ""}}, - "snns": {{step, "b"}, {next, ""}, {next, ""}, {step, ""}}, - "snsn": {{step, "b"}, {next, ""}, {step, ""}, {next, ""}}, - "snss": {{step, "b"}, {next, ""}, {step, ""}, {step, ""}}, - "ssnn": {{step, "b"}, {step, "c"}, {next, ""}, {next, ""}}, - "ssns": {{step, "b"}, {step, "c"}, {next, ""}, {step, ""}}, - "sssn": {{step, "b"}, {step, "c"}, {step, "a"}, {next, ""}}, - "ssss": {{step, "b"}, {step, "c"}, {step, "a"}, {step, ""}}, -} - -// root -// / \ -// c a -// | -// b -func iter3Mix1() *Iter { - a := newNode(hash, "a", empty) - b := newNode(hash, "b", empty) - c := newNode(hash, "c", []*node{b}) - root := newNode(hash, "root", []*node{c, a}) - return NewIter(root) -} - -var runs3Mix1 = map[string][]test{ - "nnnn": {{next, "a"}, {next, "c"}, {next, ""}, {next, ""}}, - "nnns": {{next, "a"}, {next, "c"}, {next, ""}, {step, ""}}, - "nnsn": {{next, "a"}, {next, "c"}, {step, "b"}, {next, ""}}, - "nnss": {{next, "a"}, {next, "c"}, {step, "b"}, {step, ""}}, - "nsnn": {{next, "a"}, {step, "c"}, {next, ""}, {next, ""}}, - "nsns": {{next, "a"}, {step, "c"}, {next, ""}, {step, ""}}, - "nssn": {{next, "a"}, {step, "c"}, {step, "b"}, {next, ""}}, - "nsss": {{next, "a"}, {step, "c"}, {step, "b"}, {step, ""}}, - "snnn": {{step, "a"}, {next, "c"}, {next, ""}, {next, ""}}, - "snns": {{step, "a"}, {next, "c"}, {next, ""}, {step, ""}}, - "snsn": {{step, "a"}, {next, "c"}, {step, "b"}, {next, ""}}, - "snss": {{step, "a"}, {next, "c"}, {step, "b"}, {step, ""}}, - "ssnn": {{step, "a"}, {step, "c"}, {next, ""}, {next, ""}}, - "ssns": {{step, "a"}, {step, "c"}, {next, ""}, {step, ""}}, - "sssn": {{step, "a"}, {step, "c"}, {step, "b"}, {next, ""}}, - "ssss": {{step, "a"}, {step, "c"}, {step, "b"}, {step, ""}}, -} - -// root -// / \ -// b a -// | -// c -func iter3Mix2() *Iter { - b := newNode(hash, "b", empty) - c := newNode(hash, "c", empty) - a := newNode(hash, "a", []*node{c}) - root := newNode(hash, "root", []*node{b, a}) - return NewIter(root) -} - -var runs3Mix2 = map[string][]test{ - "nnnn": {{next, "a"}, {next, "b"}, {next, ""}, {next, ""}}, - "nnns": {{next, "a"}, {next, "b"}, {next, ""}, {step, ""}}, - "nnsn": {{next, "a"}, {next, "b"}, {step, ""}, {next, ""}}, - "nnss": {{next, "a"}, {next, "b"}, {step, ""}, {step, ""}}, - "nsnn": {{next, "a"}, {step, "c"}, {next, "b"}, {next, ""}}, - "nsns": {{next, "a"}, {step, "c"}, {next, "b"}, {step, ""}}, - "nssn": {{next, "a"}, {step, "c"}, {step, "b"}, {next, ""}}, - "nsss": {{next, "a"}, {step, "c"}, {step, "b"}, {step, ""}}, - "snnn": {{step, "a"}, {next, "b"}, {next, ""}, {next, ""}}, - "snns": {{step, "a"}, {next, "b"}, {next, ""}, {step, ""}}, - "snsn": {{step, "a"}, {next, "b"}, {step, ""}, {next, ""}}, - "snss": {{step, "a"}, {next, "b"}, {step, ""}, {step, ""}}, - "ssnn": {{step, "a"}, {step, "c"}, {next, "b"}, {next, ""}}, - "ssns": {{step, "a"}, {step, "c"}, {next, "b"}, {step, ""}}, - "sssn": {{step, "a"}, {step, "c"}, {step, "b"}, {next, ""}}, - "ssss": {{step, "a"}, {step, "c"}, {step, "b"}, {step, ""}}, -} - -// root -// / | \ -// / | ---- -// f d h -------- -// /\ / \ | -// e a j b g -// | / \ | -// l n k icm -// | -// o -// | -// p -func iterCrazy() *Iter { - l := newNode(hash, "l", empty) - e := newNode(hash, "e", []*node{l}) - - p := newNode(hash, "p", empty) - o := newNode(hash, "o", []*node{p}) - n := newNode(hash, "n", []*node{o}) - k := newNode(hash, "k", empty) - a := newNode(hash, "a", []*node{n, k}) - f := newNode(hash, "f", []*node{e, a}) - - d := newNode(hash, "d", empty) - - i := newNode(hash, "i", empty) - c := newNode(hash, "c", empty) - m := newNode(hash, "m", empty) - j := newNode(hash, "j", []*node{i, c, m}) - b := newNode(hash, "b", empty) - g := newNode(hash, "g", empty) - h := newNode(hash, "h", []*node{j, b, g}) - - root := newNode(hash, "root", []*node{f, d, h}) - return NewIter(root) -} - -var ( - n = next - s = step -) - -var runsCrazy = map[string][]test{ - "nn nn n": {{n, "d"}, {n, "f"}, {n, "h"}, {n, ""}, {n, ""}}, - "nn nn s": {{n, "d"}, {n, "f"}, {n, "h"}, {n, ""}, {s, ""}}, - "nn ns n": {{n, "d"}, {n, "f"}, {n, "h"}, {s, "b"}, {n, "g"}}, - "nn ns s": {{n, "d"}, {n, "f"}, {n, "h"}, {s, "b"}, {s, "g"}}, - "nn sn n": {{n, "d"}, {n, "f"}, {s, "a"}, {n, "e"}, {n, "h"}}, - "nn sn s": {{n, "d"}, {n, "f"}, {s, "a"}, {n, "e"}, {s, "l"}}, - "nn ss n": {{n, "d"}, {n, "f"}, {s, "a"}, {s, "k"}, {n, "n"}}, - "nn ss s": {{n, "d"}, {n, "f"}, {s, "a"}, {s, "k"}, {s, "n"}}, - "ns nn n": {{n, "d"}, {s, "f"}, {n, "h"}, {n, ""}, {n, ""}}, - "ns nn s": {{n, "d"}, {s, "f"}, {n, "h"}, {n, ""}, {s, ""}}, - "ns ns n": {{n, "d"}, {s, "f"}, {n, "h"}, {s, "b"}, {n, "g"}}, - "ns ns s": {{n, "d"}, {s, "f"}, {n, "h"}, {s, "b"}, {s, "g"}}, - "ns sn n": {{n, "d"}, {s, "f"}, {s, "a"}, {n, "e"}, {n, "h"}}, - - "ns ss ns ss": { - {n, "d"}, {s, "f"}, - {s, "a"}, {s, "k"}, - {n, "n"}, {s, "o"}, - {s, "p"}, {s, "e"}, - }, - - "ns ss ns sn": { - {n, "d"}, {s, "f"}, - {s, "a"}, {s, "k"}, - {n, "n"}, {s, "o"}, - {s, "p"}, {n, "e"}, - }, - - "nn ns ns ss nn": { - {n, "d"}, {n, "f"}, - {n, "h"}, {s, "b"}, - {n, "g"}, {s, "j"}, - {s, "c"}, {s, "i"}, - {n, "m"}, {n, ""}, - }, -} diff --git a/plumbing/difftree/internal/merkletrie/iter_test.go b/plumbing/difftree/internal/merkletrie/iter_test.go deleted file mode 100644 index 65116e1..0000000 --- a/plumbing/difftree/internal/merkletrie/iter_test.go +++ /dev/null @@ -1,176 +0,0 @@ -package merkletrie - -import . "gopkg.in/check.v1" - -type IterSuite struct{} - -var _ = Suite(&IterSuite{}) - -// we don't care about hashes for iterating the tree, so -// use this hash for every object -var hash = []byte{} - -// leafs have no children, use this empty list. -var empty = []*node{} - -// test a defined as an operation to run on an iterator and the key of -// the node expected to be returned by the operation. Use "" as the -// expected key for when there are no more objects in the tree. -type test struct { - operation int // next or step - expected string // key of the expected node, "" for nil node -} - -// test.operation -const ( - next = iota - step -) - -// goes over a list of tests, calling each operation on the iter and -// checking that the obtained result is equal to the expected result -func runTests(c *C, description string, iter *Iter, list []test) { - var obtained Noder - var ok bool - var comment CommentInterface - - for i, t := range list { - comment = Commentf("description %q, operation #%d", - description, i+1) - - switch t.operation { - case next: - obtained, ok = iter.Next() - case step: - obtained, ok = iter.Step() - default: - c.Fatalf("invalid operation %d", t.operation) - } - - if t.expected == "" { - c.Assert(ok, Equals, false, comment) - c.Assert(obtained, IsNil, comment) - } else { - c.Assert(ok, Equals, true, comment) - c.Assert(obtained.Key(), Equals, t.expected, comment) - } - } -} - -// a simple tree consisting on just a leaf -func (s *IterSuite) TestLeaf(c *C) { - for description, tests := range runs0 { - runTests(c, description, iterLeaf(), tests) - } -} - -// root -// | -// a -func (s *IterSuite) TestOneChild(c *C) { - for description, tests := range runs1 { - runTests(c, description, iter1(), tests) - } -} - -// root -// / \ -// a b -func (s *IterSuite) Test2HorizontalSorted(c *C) { - for description, tests := range runs2Horizontal { - runTests(c, description, iter2HorizontalSorted(), tests) - } -} - -// root -// / \ -// b a -func (s *IterSuite) Test2HorizontalReverse(c *C) { - for description, tests := range runs2Horizontal { - runTests(c, description, iter2HorizontalReverse(), tests) - } -} - -// root -// | -// a -// | -// b -func (s *IterSuite) Test2VerticalSorted(c *C) { - for description, tests := range runs2VerticalSorted { - runTests(c, description, iter2VerticalSorted(), tests) - } -} - -// root -// | -// b -// | -// a -func (s *IterSuite) Test2VerticalReverse(c *C) { - for description, tests := range runs2VerticalReverse { - runTests(c, description, iter2VerticalReverse(), tests) - } -} - -// root -// /|\ -// c a b -func (s *IterSuite) Test3Horizontal(c *C) { - for description, tests := range runs3Horizontal { - runTests(c, description, iter3Horizontal(), tests) - } -} - -// root -// | -// b -// | -// c -// | -// a -func (s *IterSuite) Test3Vertical(c *C) { - for description, tests := range runs3Vertical { - runTests(c, description, iter3Vertical(), tests) - } -} - -// root -// / \ -// c a -// | -// b -func (s *IterSuite) Test3Mix1(c *C) { - for description, tests := range runs3Mix1 { - runTests(c, description, iter3Mix1(), tests) - } -} - -// root -// / \ -// b a -// | -// c -func (s *IterSuite) Test3Mix2(c *C) { - for description, tests := range runs3Mix2 { - runTests(c, description, iter3Mix2(), tests) - } -} - -// root -// / | \ -// / | ---- -// f d h -------- -// /\ / \ | -// e a j b g -// | / \ | -// l n k icm -// | -// o -// | -// p -func (s *IterSuite) TestCrazy(c *C) { - for description, tests := range runsCrazy { - runTests(c, description, iterCrazy(), tests) - } -} diff --git a/plumbing/difftree/internal/merkletrie/node.go b/plumbing/difftree/internal/merkletrie/node.go deleted file mode 100644 index 99be5b8..0000000 --- a/plumbing/difftree/internal/merkletrie/node.go +++ /dev/null @@ -1,65 +0,0 @@ -package merkletrie - -import ( - "sort" - "strings" -) - -// A node is a Noder implementation for testing purposes: It is easier -// to create test trees using nodes than using real git tree objects. -type node struct { - hash []byte - key string - children []*node -} - -// newNode returns a new Node with the given hash, key and children -// (children can be specified in any order). -func newNode(hash []byte, key string, children []*node) *node { - sort.Sort(reverseAlphabeticallyByKey(children)) - - return &node{ - hash: hash, - key: key, - children: children, - } -} - -// Hash returns the hash of the node. -func (n *node) Hash() []byte { - return n.hash -} - -// Key returns the key of the node. -func (n *node) Key() string { - return n.key -} - -// NumChildren returns the number of children. -func (n *node) NumChildren() int { - return len(n.children) -} - -// Children returns the node's children in reverse key alphabetical -// order. -func (n *node) Children() []Noder { - ret := make([]Noder, n.NumChildren()) - for i := range n.children { - ret[i] = n.children[i] - } - return ret -} - -type reverseAlphabeticallyByKey []*node - -func (a reverseAlphabeticallyByKey) Len() int { - return len(a) -} - -func (a reverseAlphabeticallyByKey) Swap(i, j int) { - a[i], a[j] = a[j], a[i] -} - -func (a reverseAlphabeticallyByKey) Less(i, j int) bool { - return strings.Compare(a[i].key, a[j].key) > 0 -} diff --git a/plumbing/difftree/internal/merkletrie/node_test.go b/plumbing/difftree/internal/merkletrie/node_test.go deleted file mode 100644 index 1622952..0000000 --- a/plumbing/difftree/internal/merkletrie/node_test.go +++ /dev/null @@ -1,68 +0,0 @@ -package merkletrie - -import ( - "testing" - - . "gopkg.in/check.v1" -) - -func Test(t *testing.T) { TestingT(t) } - -type NodeSuite struct{} - -var _ = Suite(&NodeSuite{}) - -func (s *NodeSuite) TestHash(c *C) { - n := newNode([]byte("the_hash"), "the_key", []*node{}) - - expected := []byte("the_hash") - c.Assert(expected, DeepEquals, n.Hash()) -} - -func (s *NodeSuite) TestKey(c *C) { - n := newNode([]byte("the_hash"), "the_key", []*node{}) - - expected := "the_key" - c.Assert(expected, Equals, n.Key()) -} - -func (s *NodeSuite) TestNoChildren(c *C) { - n := newNode([]byte{}, "", []*node{}) - - expectedNumChildren := 0 - c.Assert(n.NumChildren(), Equals, expectedNumChildren) - - expectedChildren := []Noder{} - c.Assert(n.Children(), DeepEquals, expectedChildren) -} - -func (s *NodeSuite) TestOneChild(c *C) { - child := newNode([]byte("child"), "child", []*node{}) - parent := newNode([]byte("parent"), "parent", []*node{child}) - - expectedNumChildren := 1 - c.Assert(parent.NumChildren(), Equals, expectedNumChildren) - - expectedChildren := []Noder{Noder(child)} - c.Assert(parent.Children(), DeepEquals, expectedChildren) -} - -func (s *NodeSuite) TestManyChildren(c *C) { - child0 := newNode([]byte("child0"), "child0", []*node{}) - child1 := newNode([]byte("child1"), "child1", []*node{}) - child2 := newNode([]byte("child2"), "child2", []*node{}) - child3 := newNode([]byte("child3"), "child3", []*node{}) - // children are added unsorted. - parent := newNode([]byte("parent"), "parent", []*node{child1, child3, child0, child2}) - - expectedNumChildren := 4 - c.Assert(parent.NumChildren(), Equals, expectedNumChildren) - - expectedChildren := []Noder{ // sorted alphabetically by key - Noder(child3), - Noder(child2), - Noder(child1), - Noder(child0), - } - c.Assert(parent.Children(), DeepEquals, expectedChildren) -} diff --git a/plumbing/difftree/internal/merkletrie/noder.go b/plumbing/difftree/internal/merkletrie/noder.go deleted file mode 100644 index 3566657..0000000 --- a/plumbing/difftree/internal/merkletrie/noder.go +++ /dev/null @@ -1,20 +0,0 @@ -package merkletrie - -// The Noder interface is implemented by the elements of a Merkle Trie. -type Noder interface { - // Hash returns the hash of the element. - Hash() []byte - // Key returns the key of the element. - Key() string - // Children returns the children of the element, sorted - // in reverse key alphabetical order. - Children() []Noder - // NumChildren returns the number of children this element has. - // - // This method is an optimization: the number of children is easily - // calculated as the length of the value returned by the Children - // method (above); yet, some implementations will be able to - // implement NumChildren in O(1) while Children is usually more - // complex. - NumChildren() int -} diff --git a/utils/merkletrie/doc.go b/utils/merkletrie/doc.go new file mode 100644 index 0000000..28ece3e --- /dev/null +++ b/utils/merkletrie/doc.go @@ -0,0 +1,32 @@ +/* +Package merkletrie provides support for n-ary trees that are at the same +time Merkle trees and Radix trees (tries), and provides an efficient +tree comparison algorithm for them. + +Git trees are Radix n-ary trees in virtue of the names of their +tree entries. At the same time, git trees are Merkle trees thanks to +their hashes. + +When comparing git trees, the simple approach of alphabetically sorting +their elements and comparing the resulting lists is too slow as it +depends linearly on the number of files in the trees: When a directory +has lots of files but none of them has been modified, this approach is +very expensive. We can do better by prunning whole directories that +have not change, just by looking at their hashes. This package provides +the tools to do exactly that. + +This package defines Merkle tries as nodes that should have: + +- a hash: the Merkle part of the Merkle trie + +- a key: the Radix part of the Merkle trie + +The Merkle hash condition is not enforced by this package though. This +means that the hash of a node doesn't have to take into account the hashes of +their children, which is good for testing purposes. + +Nodes in the Merkle trie are abstracted by the Noder interface. The +intended use is that git trees implements this interface, either +directly or using a simple wrapper. +*/ +package merkletrie diff --git a/utils/merkletrie/internal/fsnoder/dir.go b/utils/merkletrie/internal/fsnoder/dir.go new file mode 100644 index 0000000..1811de9 --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/dir.go @@ -0,0 +1,140 @@ +package fsnoder + +import ( + "bytes" + "fmt" + "hash/fnv" + "sort" + "strings" + + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" +) + +// Dir values implement directory-like noders. +type dir struct { + name string // relative + children []noder.Noder // sorted by name + hash []byte // memoized +} + +type byName []noder.Noder + +func (a byName) Len() int { return len(a) } +func (a byName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byName) Less(i, j int) bool { + return strings.Compare(a[i].Name(), a[j].Name()) < 0 +} + +// copies the children slice, so nobody can modify the order of its +// elements from the outside. +func newDir(name string, children []noder.Noder) (*dir, error) { + cloned := make([]noder.Noder, len(children)) + _ = copy(cloned, children) + sort.Sort(byName(cloned)) + + if hasChildrenWithNoName(cloned) { + return nil, fmt.Errorf("non-root inner nodes cannot have empty names") + } + + if hasDuplicatedNames(cloned) { + return nil, fmt.Errorf("children cannot have duplicated names") + } + + return &dir{ + name: name, + children: cloned, + }, nil +} + +func hasChildrenWithNoName(children []noder.Noder) bool { + for _, c := range children { + if c.Name() == "" { + return true + } + } + + return false +} + +func hasDuplicatedNames(children []noder.Noder) bool { + if len(children) < 2 { + return false + } + + for i := 1; i < len(children); i++ { + if children[i].Name() == children[i-1].Name() { + return true + } + } + + return false +} + +func (d *dir) Hash() []byte { + if d.hash == nil { + d.calculateHash() + } + + return d.hash +} + +// hash is calculated as the hash of "dir " plus the concatenation, for +// each child, of its name, a space and its hash. Children are sorted +// alphabetically before calculating the hash, so the result is unique. +func (d *dir) calculateHash() { + h := fnv.New64a() + h.Write([]byte("dir ")) + for _, c := range d.children { + h.Write([]byte(c.Name())) + h.Write([]byte(" ")) + h.Write(c.Hash()) + } + d.hash = h.Sum([]byte{}) +} + +func (d *dir) Name() string { + return d.name +} + +func (d *dir) IsDir() bool { + return true +} + +// returns a copy so nobody can alter the order of its elements from the +// outside. +func (d *dir) Children() ([]noder.Noder, error) { + clon := make([]noder.Noder, len(d.children)) + _ = copy(clon, d.children) + return clon, nil +} + +func (d *dir) NumChildren() (int, error) { + return len(d.children), nil +} + +const ( + dirStartMark = '(' + dirEndMark = ')' + dirElementSep = ' ' +) + +// The string generated by this method is unique for each tree, as the +// children of each node are sorted alphabetically by name when +// generating the string. +func (d *dir) String() string { + var buf bytes.Buffer + + buf.WriteString(d.name) + buf.WriteRune(dirStartMark) + + for i, c := range d.children { + if i != 0 { + buf.WriteRune(dirElementSep) + } + buf.WriteString(c.String()) + } + + buf.WriteRune(dirEndMark) + + return buf.String() +} diff --git a/utils/merkletrie/internal/fsnoder/dir_test.go b/utils/merkletrie/internal/fsnoder/dir_test.go new file mode 100644 index 0000000..df5aacc --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/dir_test.go @@ -0,0 +1,364 @@ +package fsnoder + +import ( + "reflect" + "sort" + + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" + + . "gopkg.in/check.v1" +) + +type DirSuite struct{} + +var _ = Suite(&DirSuite{}) + +func (s *DirSuite) TestIsDir(c *C) { + noName, err := newDir("", nil) + c.Assert(err, IsNil) + c.Assert(noName.IsDir(), Equals, true) + + empty, err := newDir("empty", nil) + c.Assert(err, IsNil) + c.Assert(empty.IsDir(), Equals, true) + + root, err := newDir("foo", []noder.Noder{empty}) + c.Assert(err, IsNil) + c.Assert(root.IsDir(), Equals, true) +} + +func assertChildren(c *C, n noder.Noder, expected []noder.Noder) { + numChildren, err := n.NumChildren() + c.Assert(err, IsNil) + c.Assert(numChildren, Equals, len(expected)) + + children, err := n.Children() + c.Assert(err, IsNil) + c.Assert(children, sortedSliceEquals, expected) +} + +type sortedSliceEqualsChecker struct { + *CheckerInfo +} + +var sortedSliceEquals Checker = &sortedSliceEqualsChecker{ + &CheckerInfo{ + Name: "sortedSliceEquals", + Params: []string{"obtained", "expected"}, + }, +} + +func (checker *sortedSliceEqualsChecker) Check( + params []interface{}, names []string) (result bool, error string) { + a, ok := params[0].([]noder.Noder) + if !ok { + return false, "first parameter must be a []noder.Noder" + } + b, ok := params[1].([]noder.Noder) + if !ok { + return false, "second parameter must be a []noder.Noder" + } + sort.Sort(byName(a)) + sort.Sort(byName(b)) + + return reflect.DeepEqual(a, b), "" +} + +func (s *DirSuite) TestNewDirectoryNoNameAndEmpty(c *C) { + root, err := newDir("", nil) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0xca, 0x40, 0xf8, 0x67, 0x57, 0x8c, 0x32, 0x1c}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, noder.NoChildren) + c.Assert(root.String(), Equals, "()") +} + +func (s *DirSuite) TestNewDirectoryEmpty(c *C) { + root, err := newDir("root", nil) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0xca, 0x40, 0xf8, 0x67, 0x57, 0x8c, 0x32, 0x1c}) + c.Assert(root.Name(), Equals, "root") + assertChildren(c, root, noder.NoChildren) + c.Assert(root.String(), Equals, "root()") +} + +func (s *DirSuite) TestEmptyDirsHaveSameHash(c *C) { + d1, err := newDir("foo", nil) + c.Assert(err, IsNil) + + d2, err := newDir("bar", nil) + c.Assert(err, IsNil) + + c.Assert(d1.Hash(), DeepEquals, d2.Hash()) +} + +func (s *DirSuite) TestNewDirWithEmptyDir(c *C) { + empty, err := newDir("empty", nil) + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{empty}) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0x39, 0x25, 0xa8, 0x99, 0x16, 0x47, 0x6a, 0x75}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{empty}) + c.Assert(root.String(), Equals, "(empty())") +} + +func (s *DirSuite) TestNewDirWithOneEmptyFile(c *C) { + empty, err := newFile("name", "") + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{empty}) + c.Assert(err, IsNil) + c.Assert(root.Hash(), DeepEquals, + []byte{0xd, 0x4e, 0x23, 0x1d, 0xf5, 0x2e, 0xfa, 0xc2}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{empty}) + c.Assert(root.String(), Equals, "(name<>)") +} + +func (s *DirSuite) TestNewDirWithOneFile(c *C) { + a, err := newFile("a", "1") + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{a}) + c.Assert(err, IsNil) + c.Assert(root.Hash(), DeepEquals, + []byte{0x96, 0xab, 0x29, 0x54, 0x2, 0x9e, 0x89, 0x28}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{a}) + c.Assert(root.String(), Equals, "(a<1>)") +} + +func (s *DirSuite) TestDirsWithSameFileHaveSameHash(c *C) { + f1, err := newFile("a", "1") + c.Assert(err, IsNil) + r1, err := newDir("", []noder.Noder{f1}) + c.Assert(err, IsNil) + + f2, err := newFile("a", "1") + c.Assert(err, IsNil) + r2, err := newDir("", []noder.Noder{f2}) + c.Assert(err, IsNil) + + c.Assert(r1.Hash(), DeepEquals, r2.Hash()) +} + +func (s *DirSuite) TestDirsWithDifferentFileContentHaveDifferentHash(c *C) { + f1, err := newFile("a", "1") + c.Assert(err, IsNil) + r1, err := newDir("", []noder.Noder{f1}) + c.Assert(err, IsNil) + + f2, err := newFile("a", "2") + c.Assert(err, IsNil) + r2, err := newDir("", []noder.Noder{f2}) + c.Assert(err, IsNil) + + c.Assert(r1.Hash(), Not(DeepEquals), r2.Hash()) +} + +func (s *DirSuite) TestDirsWithDifferentFileNameHaveDifferentHash(c *C) { + f1, err := newFile("a", "1") + c.Assert(err, IsNil) + r1, err := newDir("", []noder.Noder{f1}) + c.Assert(err, IsNil) + + f2, err := newFile("b", "1") + c.Assert(err, IsNil) + r2, err := newDir("", []noder.Noder{f2}) + c.Assert(err, IsNil) + + c.Assert(r1.Hash(), Not(DeepEquals), r2.Hash()) +} + +func (s *DirSuite) TestDirsWithDifferentFileHaveDifferentHash(c *C) { + f1, err := newFile("a", "1") + c.Assert(err, IsNil) + r1, err := newDir("", []noder.Noder{f1}) + c.Assert(err, IsNil) + + f2, err := newFile("b", "2") + c.Assert(err, IsNil) + r2, err := newDir("", []noder.Noder{f2}) + c.Assert(err, IsNil) + + c.Assert(r1.Hash(), Not(DeepEquals), r2.Hash()) +} + +func (s *DirSuite) TestDirWithEmptyDirHasDifferentHashThanEmptyDir(c *C) { + f, err := newFile("a", "") + c.Assert(err, IsNil) + r1, err := newDir("", []noder.Noder{f}) + c.Assert(err, IsNil) + + d, err := newDir("a", nil) + c.Assert(err, IsNil) + r2, err := newDir("", []noder.Noder{d}) + c.Assert(err, IsNil) + + c.Assert(r1.Hash(), Not(DeepEquals), r2.Hash()) +} + +func (s *DirSuite) TestNewDirWithTwoFilesSameContent(c *C) { + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b1, err := newFile("b", "1") + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{a1, b1}) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0xc7, 0xc4, 0xbf, 0x70, 0x33, 0xb9, 0x57, 0xdb}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{b1, a1}) + c.Assert(root.String(), Equals, "(a<1> b<1>)") +} + +func (s *DirSuite) TestNewDirWithTwoFilesDifferentContent(c *C) { + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b2, err := newFile("b", "2") + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{a1, b2}) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0x94, 0x8a, 0x9d, 0x8f, 0x6d, 0x98, 0x34, 0x55}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{b2, a1}) +} + +func (s *DirSuite) TestCrazy(c *C) { + // "" + // | + // ------------------------- + // | | | | | + // a1 B c1 d2 E + // | | + // ------------- E + // | | | | | + // A B X c1 E + // | | + // a1 e1 + e1, err := newFile("e", "1") + c.Assert(err, IsNil) + E, err := newDir("e", []noder.Noder{e1}) + c.Assert(err, IsNil) + E, err = newDir("e", []noder.Noder{E}) + c.Assert(err, IsNil) + E, err = newDir("e", []noder.Noder{E}) + c.Assert(err, IsNil) + + A, err := newDir("a", nil) + c.Assert(err, IsNil) + B, err := newDir("b", nil) + c.Assert(err, IsNil) + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + X, err := newDir("x", []noder.Noder{a1}) + c.Assert(err, IsNil) + c1, err := newFile("c", "1") + c.Assert(err, IsNil) + B, err = newDir("b", []noder.Noder{c1, B, X, A}) + c.Assert(err, IsNil) + + a1, err = newFile("a", "1") + c.Assert(err, IsNil) + c1, err = newFile("c", "1") + c.Assert(err, IsNil) + d2, err := newFile("d", "2") + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{a1, d2, E, B, c1}) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0xc3, 0x72, 0x9d, 0xf1, 0xcc, 0xec, 0x6d, 0xbb}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{E, c1, B, a1, d2}) + c.Assert(root.String(), Equals, + "(a<1> b(a() b() c<1> x(a<1>)) c<1> d<2> e(e(e(e<1>))))") +} + +func (s *DirSuite) TestDirCannotHaveDirWithNoName(c *C) { + noName, err := newDir("", nil) + c.Assert(err, IsNil) + + _, err = newDir("", []noder.Noder{noName}) + c.Assert(err, Not(IsNil)) +} + +func (s *DirSuite) TestDirCannotHaveDuplicatedFiles(c *C) { + f1, err := newFile("a", "1") + c.Assert(err, IsNil) + + f2, err := newFile("a", "1") + c.Assert(err, IsNil) + + _, err = newDir("", []noder.Noder{f1, f2}) + c.Assert(err, Not(IsNil)) +} + +func (s *DirSuite) TestDirCannotHaveDuplicatedFileNames(c *C) { + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + + a2, err := newFile("a", "2") + c.Assert(err, IsNil) + + _, err = newDir("", []noder.Noder{a1, a2}) + c.Assert(err, Not(IsNil)) +} + +func (s *DirSuite) TestDirCannotHaveDuplicatedDirNames(c *C) { + d1, err := newDir("a", nil) + c.Assert(err, IsNil) + + d2, err := newDir("a", nil) + c.Assert(err, IsNil) + + _, err = newDir("", []noder.Noder{d1, d2}) + c.Assert(err, Not(IsNil)) +} + +func (s *DirSuite) TestDirCannotHaveDirAndFileWithSameName(c *C) { + f, err := newFile("a", "") + c.Assert(err, IsNil) + + d, err := newDir("a", nil) + c.Assert(err, IsNil) + + _, err = newDir("", []noder.Noder{f, d}) + c.Assert(err, Not(IsNil)) +} + +func (s *DirSuite) TestUnsortedString(c *C) { + b, err := newDir("b", nil) + c.Assert(err, IsNil) + + z, err := newDir("z", nil) + c.Assert(err, IsNil) + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + + c2, err := newFile("c", "2") + c.Assert(err, IsNil) + + d3, err := newFile("d", "3") + c.Assert(err, IsNil) + + d, err := newDir("d", []noder.Noder{c2, z, d3, a1, b}) + c.Assert(err, IsNil) + + c.Assert(d.String(), Equals, "d(a<1> b() c<2> d<3> z())") +} diff --git a/utils/merkletrie/internal/fsnoder/doc.go b/utils/merkletrie/internal/fsnoder/doc.go new file mode 100644 index 0000000..86cae7c --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/doc.go @@ -0,0 +1,50 @@ +/* +Package fsnoder allows to create merkletrie noders that resemble file +systems, from human readable string descriptions. Its intended use is +generating noders in tests in a readable way. + +For example: + + root, _ = New("(a<1> b<2>, B(c<3> d()))") + +will create a noder as follows: + + root - "root" is an unnamed dir containing "a", "b" and "B". + / | \ - "a" is a file containing the string "1". + / | \ - "b" is a file containing the string "2". + a b B - "B" is a directory containing "c" and "d". + / \ - "c" is a file containing the string "3". + c d - "D" is an empty directory. + +Files are expressed as: + +- a single letter for the name of the file. + +- a single number, between angle brackets, for the contents of the file. + +Directories are expressed as: + +- a single letter for the name of the directory. + +- its elements between parents, separated with spaces, in any order. + +- (optionally) the root directory can be unnamed, by skiping its name. + +Examples: + +- D(a<1> b<2>) : two files, "a" and "b", having "1" and "2" as their +respective contents, inside a directory called "D". + +- A() : An empty directory called "A". + +- A(b<>) : An directory called "A", with an empty file inside called "b": + +- (b(c<1> d(e<2>)) f<>) : an unamed directory containing: + + ├── b --> directory + │   ├── c --> file containing "1" + │   └── d --> directory + │   └── e --> file containing "2" + └── f --> empty file +*/ +package fsnoder diff --git a/utils/merkletrie/internal/fsnoder/file.go b/utils/merkletrie/internal/fsnoder/file.go new file mode 100644 index 0000000..c975a60 --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/file.go @@ -0,0 +1,72 @@ +package fsnoder + +import ( + "bytes" + "fmt" + "hash/fnv" + + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" +) + +// file values represent file-like noders in a merkle trie. +type file struct { + name string // relative + contents string + hash []byte // memoized +} + +// newFile returns a noder representing a file with the given contents. +func newFile(name, contents string) (*file, error) { + if name == "" { + return nil, fmt.Errorf("files cannot have empty names") + } + + return &file{ + name: name, + contents: contents, + }, nil +} + +// The hash of a file is just its contents. +// Empty files will have the fnv64 basis offset as its hash. +func (f *file) Hash() []byte { + if f.hash == nil { + h := fnv.New64a() + h.Write([]byte(f.contents)) // it nevers returns an error. + f.hash = h.Sum(nil) + } + + return f.hash +} + +func (f *file) Name() string { + return f.name +} + +func (f *file) IsDir() bool { + return false +} + +func (f *file) Children() ([]noder.Noder, error) { + return noder.NoChildren, nil +} + +func (f *file) NumChildren() (int, error) { + return 0, nil +} + +const ( + fileStartMark = '<' + fileEndMark = '>' +) + +// String returns a string formated as: name. +func (f *file) String() string { + var buf bytes.Buffer + buf.WriteString(f.name) + buf.WriteRune(fileStartMark) + buf.WriteString(f.contents) + buf.WriteRune(fileEndMark) + + return buf.String() +} diff --git a/utils/merkletrie/internal/fsnoder/file_test.go b/utils/merkletrie/internal/fsnoder/file_test.go new file mode 100644 index 0000000..730ae7c --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/file_test.go @@ -0,0 +1,67 @@ +package fsnoder + +import ( + "testing" + + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type FileSuite struct{} + +var _ = Suite(&FileSuite{}) + +var ( + HashOfEmptyFile = []byte{0xcb, 0xf2, 0x9c, 0xe4, 0x84, 0x22, 0x23, 0x25} // fnv64 basis offset + HashOfContents = []byte{0xee, 0x7e, 0xf3, 0xd0, 0xc2, 0xb5, 0xef, 0x83} // hash of "contents" +) + +func (s *FileSuite) TestNewFileEmpty(c *C) { + f, err := newFile("name", "") + c.Assert(err, IsNil) + + c.Assert(f.Hash(), DeepEquals, HashOfEmptyFile) + c.Assert(f.Name(), Equals, "name") + c.Assert(f.IsDir(), Equals, false) + assertChildren(c, f, noder.NoChildren) + c.Assert(f.String(), Equals, "name<>") +} + +func (s *FileSuite) TestNewFileWithContents(c *C) { + f, err := newFile("name", "contents") + c.Assert(err, IsNil) + + c.Assert(f.Hash(), DeepEquals, HashOfContents) + c.Assert(f.Name(), Equals, "name") + c.Assert(f.IsDir(), Equals, false) + assertChildren(c, f, noder.NoChildren) + c.Assert(f.String(), Equals, "name") +} + +func (s *FileSuite) TestNewfileErrorEmptyName(c *C) { + _, err := newFile("", "contents") + c.Assert(err, Not(IsNil)) +} + +func (s *FileSuite) TestDifferentContentsHaveDifferentHash(c *C) { + f1, err := newFile("name", "contents") + c.Assert(err, IsNil) + + f2, err := newFile("name", "foo") + c.Assert(err, IsNil) + + c.Assert(f1.Hash(), Not(DeepEquals), f2.Hash()) +} + +func (s *FileSuite) TestSameContentsHaveSameHash(c *C) { + f1, err := newFile("name1", "contents") + c.Assert(err, IsNil) + + f2, err := newFile("name2", "contents") + c.Assert(err, IsNil) + + c.Assert(f1.Hash(), DeepEquals, f2.Hash()) +} diff --git a/utils/merkletrie/internal/fsnoder/new.go b/utils/merkletrie/internal/fsnoder/new.go new file mode 100644 index 0000000..3ea4733 --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/new.go @@ -0,0 +1,192 @@ +package fsnoder + +import ( + "bytes" + "fmt" + "io" + + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" +) + +// New function creates a full merkle trie from the string description of +// a filesystem tree. See examples of the string format in the package +// description. +func New(s string) (noder.Noder, error) { + return decodeDir([]byte(s), root) +} + +const ( + root = true + nonRoot = false +) + +// Expected data: a fsnoder description, for example: A(foo bar qux ...). +// When isRoot is true, unnamed dirs are supported, for example: (foo +// bar qux ...) +func decodeDir(data []byte, isRoot bool) (*dir, error) { + data = bytes.TrimSpace(data) + if len(data) == 0 { + return nil, io.EOF + } + + // get the name of the dir (a single letter) and remove it from the + // data. In case the there is no name and isRoot is true, just use + // "" as the name. + var name string + if data[0] == dirStartMark { + if isRoot { + name = "" + } else { + return nil, fmt.Errorf("inner unnamed dirs not allowed: %s", data) + } + } else { + name = string(data[0]) + data = data[1:] + } + + // check that data is enclosed in parents and it is big enough and + // remove them. + if len(data) < 2 { + return nil, fmt.Errorf("malformed data: too short") + } + if data[0] != dirStartMark { + return nil, fmt.Errorf("malformed data: first %q not found", + dirStartMark) + } + if data[len(data)-1] != dirEndMark { + return nil, fmt.Errorf("malformed data: last %q not found", + dirEndMark) + } + data = data[1 : len(data)-1] // remove initial '(' and last ')' + + children, err := decodeChildren(data) + if err != nil { + return nil, err + } + + return newDir(name, children) +} + +func isNumber(b byte) bool { + return '0' <= b && b <= '9' +} + +func isLetter(b byte) bool { + return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') +} + +func decodeChildren(data []byte) ([]noder.Noder, error) { + data = bytes.TrimSpace(data) + if len(data) == 0 { + return nil, nil + } + + chunks := split(data) + ret := make([]noder.Noder, len(chunks)) + var err error + for i, c := range chunks { + ret[i], err = decodeChild(c) + if err != nil { + return nil, fmt.Errorf("malformed element %d (%s): %s", i, c, err) + } + } + + return ret, nil +} + +// returns the description of the elements of a dir. It is just looking +// for spaces if they are not part of inner dirs. +func split(data []byte) [][]byte { + chunks := [][]byte{} + + start := 0 + dirDepth := 0 + for i, b := range data { + switch b { + case dirStartMark: + dirDepth++ + case dirEndMark: + dirDepth-- + case dirElementSep: + if dirDepth == 0 { + chunks = append(chunks, data[start:i+1]) + start = i + 1 + } + } + } + chunks = append(chunks, data[start:]) + + return chunks +} + +// A child can be a file or a dir. +func decodeChild(data []byte) (noder.Noder, error) { + clean := bytes.TrimSpace(data) + if len(data) < 3 { + return nil, fmt.Errorf("element too short: %s", clean) + } + + switch clean[1] { + case fileStartMark: + return decodeFile(clean) + case dirStartMark: + return decodeDir(clean, nonRoot) + default: + if clean[0] == dirStartMark { + return nil, fmt.Errorf("non-root unnamed dir are not allowed: %s", + clean) + } + return nil, fmt.Errorf("malformed dir element: %s", clean) + } +} + +func decodeFile(data []byte) (noder.Noder, error) { + if len(data) == 3 { + return decodeEmptyFile(data) + } + + if len(data) != 4 { + return nil, fmt.Errorf("length is not 4") + } + if !isLetter(data[0]) { + return nil, fmt.Errorf("name must be a letter") + } + if data[1] != '<' { + return nil, fmt.Errorf("wrong file start character") + } + if !isNumber(data[2]) { + return nil, fmt.Errorf("contents must be a number") + } + if data[3] != '>' { + return nil, fmt.Errorf("wrong file end character") + } + + name := string(data[0]) + contents := string(data[2]) + + return newFile(name, contents) +} + +func decodeEmptyFile(data []byte) (noder.Noder, error) { + if len(data) != 3 { + return nil, fmt.Errorf("length is not 3: %s", data) + } + if !isLetter(data[0]) { + return nil, fmt.Errorf("name must be a letter: %s", data) + } + if data[1] != '<' { + return nil, fmt.Errorf("wrong file start character: %s", data) + } + if data[2] != '>' { + return nil, fmt.Errorf("wrong file end character: %s", data) + } + + name := string(data[0]) + + return newFile(name, "") +} + +// HashEqual returns if a and b have the same hash. +func HashEqual(a, b noder.Hasher) bool { + return bytes.Equal(a.Hash(), b.Hash()) +} diff --git a/utils/merkletrie/internal/fsnoder/new_test.go b/utils/merkletrie/internal/fsnoder/new_test.go new file mode 100644 index 0000000..e473705 --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/new_test.go @@ -0,0 +1,337 @@ +package fsnoder + +import ( + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" + + . "gopkg.in/check.v1" +) + +type FSNoderSuite struct{} + +var _ = Suite(&FSNoderSuite{}) + +func check(c *C, input string, expected *dir) { + obtained, err := New(input) + c.Assert(err, IsNil, Commentf("input = %s", input)) + + comment := Commentf("\n input = %s\n"+ + "expected = %s\nobtained = %s", + input, expected, obtained) + c.Assert(obtained.Hash(), DeepEquals, expected.Hash(), comment) +} + +func (s *FSNoderSuite) TestNoDataFails(c *C) { + _, err := New("") + c.Assert(err, Not(IsNil)) + + _, err = New(" ") // SPC + TAB + c.Assert(err, Not(IsNil)) +} + +func (s *FSNoderSuite) TestUnnamedRootFailsIfNotRoot(c *C) { + _, err := decodeDir([]byte("()"), false) + c.Assert(err, Not(IsNil)) +} + +func (s *FSNoderSuite) TestUnnamedInnerFails(c *C) { + _, err := New("(())") + c.Assert(err, Not(IsNil)) + _, err = New("((a<>))") + c.Assert(err, Not(IsNil)) +} + +func (s *FSNoderSuite) TestMalformedInnerName(c *C) { + _, err := New("(ab<>)") + c.Assert(err, Not(IsNil)) +} + +func (s *FSNoderSuite) TestMalformedFile(c *C) { + _, err := New("(a<12>)") + c.Assert(err, Not(IsNil)) + _, err = New("(4<>)") + c.Assert(err, Not(IsNil)) + _, err = New("(4<1>)") + c.Assert(err, Not(IsNil)) + _, err = New("(4?1>)") + c.Assert(err, Not(IsNil)) + _, err = New("(4)") + c.Assert(err, Not(IsNil)) + _, err = New("(4")) + c.Assert(err, Not(IsNil)) + _, err = decodeFile([]byte("a")) + c.Assert(err, Not(IsNil)) + _, err = decodeFile([]byte("a<1?")) + c.Assert(err, Not(IsNil)) + + _, err = decodeEmptyFile([]byte("a<1>")) + c.Assert(err, Not(IsNil)) + _, err = decodeEmptyFile([]byte("a?>")) + c.Assert(err, Not(IsNil)) + _, err = decodeEmptyFile([]byte("1<>")) + c.Assert(err, Not(IsNil)) + _, err = decodeEmptyFile([]byte("a") + c.Assert(err, Not(IsNil)) + _, err = New("a<>") + c.Assert(err, Not(IsNil)) +} + +func (s *FSNoderSuite) TestUnnamedEmptyRoot(c *C) { + input := "()" + + expected, err := newDir("", nil) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestNamedEmptyRoot(c *C) { + input := "a()" + + expected, err := newDir("a", nil) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestEmptyFile(c *C) { + input := "(a<>)" + + a1, err := newFile("a", "") + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{a1}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestNonEmptyFile(c *C) { + input := "(a<1>)" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{a1}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestTwoFilesSameContents(c *C) { + input := "(b<1> a<1>)" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b1, err := newFile("b", "1") + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{a1, b1}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestTwoFilesDifferentContents(c *C) { + input := "(b<2> a<1>)" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b2, err := newFile("b", "2") + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{a1, b2}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestManyFiles(c *C) { + input := "(e<1> b<2> a<1> c<1> d<3> f<4>)" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b2, err := newFile("b", "2") + c.Assert(err, IsNil) + c1, err := newFile("c", "1") + c.Assert(err, IsNil) + d3, err := newFile("d", "3") + c.Assert(err, IsNil) + e1, err := newFile("e", "1") + c.Assert(err, IsNil) + f4, err := newFile("f", "4") + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{e1, b2, a1, c1, d3, f4}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestEmptyDir(c *C) { + input := "(A())" + + A, err := newDir("A", nil) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithEmtpyFile(c *C) { + input := "(A(a<>))" + + a, err := newFile("a", "") + c.Assert(err, IsNil) + A, err := newDir("A", []noder.Noder{a}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithEmtpyFileSameName(c *C) { + input := "(A(A<>))" + + f, err := newFile("A", "") + c.Assert(err, IsNil) + A, err := newDir("A", []noder.Noder{f}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithFile(c *C) { + input := "(A(a<1>))" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + A, err := newDir("A", []noder.Noder{a1}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithEmptyDirSameName(c *C) { + input := "(A(A()))" + + A2, err := newDir("A", nil) + c.Assert(err, IsNil) + A1, err := newDir("A", []noder.Noder{A2}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A1}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithEmptyDir(c *C) { + input := "(A(B()))" + + B, err := newDir("B", nil) + c.Assert(err, IsNil) + A, err := newDir("A", []noder.Noder{B}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithTwoFiles(c *C) { + input := "(A(a<1> b<2>))" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b2, err := newFile("b", "2") + c.Assert(err, IsNil) + A, err := newDir("A", []noder.Noder{b2, a1}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestCrazy(c *C) { + // "" + // | + // ------------------------- + // | | | | | + // a1 B c1 d2 E + // | | + // ------------- E + // | | | | | + // A B X c1 E + // | | + // a1 e1 + input := "(d<2> b(c<1> b() a() x(a<1>)) a<1> c<1> e(e(e(e<1>))))" + + e1, err := newFile("e", "1") + c.Assert(err, IsNil) + E, err := newDir("e", []noder.Noder{e1}) + c.Assert(err, IsNil) + E, err = newDir("e", []noder.Noder{E}) + c.Assert(err, IsNil) + E, err = newDir("e", []noder.Noder{E}) + c.Assert(err, IsNil) + + A, err := newDir("a", nil) + c.Assert(err, IsNil) + B, err := newDir("b", nil) + c.Assert(err, IsNil) + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + X, err := newDir("x", []noder.Noder{a1}) + c.Assert(err, IsNil) + c1, err := newFile("c", "1") + c.Assert(err, IsNil) + B, err = newDir("b", []noder.Noder{c1, B, X, A}) + c.Assert(err, IsNil) + + a1, err = newFile("a", "1") + c.Assert(err, IsNil) + c1, err = newFile("c", "1") + c.Assert(err, IsNil) + d2, err := newFile("d", "2") + c.Assert(err, IsNil) + + expected, err := newDir("", []noder.Noder{a1, d2, E, B, c1}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestHashEqual(c *C) { + input1 := "(A(a<1> b<2>))" + input2 := "(A(a<1> b<2>))" + input3 := "(A(a<> b<2>))" + + t1, err := New(input1) + c.Assert(err, IsNil) + t2, err := New(input2) + c.Assert(err, IsNil) + t3, err := New(input3) + c.Assert(err, IsNil) + + c.Assert(HashEqual(t1, t2), Equals, true) + c.Assert(HashEqual(t2, t1), Equals, true) + + c.Assert(HashEqual(t2, t3), Equals, false) + c.Assert(HashEqual(t3, t2), Equals, false) + + c.Assert(HashEqual(t3, t1), Equals, false) + c.Assert(HashEqual(t1, t3), Equals, false) +} diff --git a/utils/merkletrie/noder/noder.go b/utils/merkletrie/noder/noder.go new file mode 100644 index 0000000..d6b3de4 --- /dev/null +++ b/utils/merkletrie/noder/noder.go @@ -0,0 +1,59 @@ +// Package noder provide an interface for defining nodes in a +// merkletrie, their hashes and their paths (a noders and its +// ancestors). +// +// The hasher interface is easy to implement naively by elements that +// already have a hash, like git blobs and trees. More sophisticated +// implementations can implement the Equal function in exotic ways +// though: for instance, comparing the modification time of directories +// in a filesystem. +package noder + +import "fmt" + +// Hasher interface is implemented by types that can tell you +// their hash. +type Hasher interface { + Hash() []byte +} + +// Equal functions take two hashers and return if they are equal. +// +// These functions are expected to be faster than reflect.Equal or +// reflect.DeepEqual because they can compare just the hash of the +// objects, instead of their contents, so they are expected to be O(1). +type Equal func(a, b Hasher) bool + +// The Noder interface is implemented by the elements of a Merkle Trie. +// +// There are two types of elements in a Merkle Trie: +// +// - file-like nodes: they cannot have children. +// +// - directory-like nodes: they can have 0 or more children and their +// hash is calculated by combining their children hashes. +type Noder interface { + Hasher + fmt.Stringer // for testing purposes + // Name returns the name of an element (relative, not its full + // path). + Name() string + // IsDir returns true if the element is a directory-like node or + // false if it is a file-like node. + IsDir() bool + // Children returns the children of the element. Note that empty + // directory-like noders and file-like noders will both return + // NoChildren. + Children() ([]Noder, error) + // NumChildren returns the number of children this element has. + // + // This method is an optimization: the number of children is easily + // calculated as the length of the value returned by the Children + // method (above); yet, some implementations will be able to + // implement NumChildren in O(1) while Children is usually more + // complex. + NumChildren() (int, error) +} + +// NoChildren represents the children of a noder without children. +var NoChildren = []Noder{} diff --git a/utils/merkletrie/noder/noder_test.go b/utils/merkletrie/noder/noder_test.go new file mode 100644 index 0000000..14bef4a --- /dev/null +++ b/utils/merkletrie/noder/noder_test.go @@ -0,0 +1,94 @@ +package noder + +import . "gopkg.in/check.v1" + +type NoderSuite struct{} + +var _ = Suite(&NoderSuite{}) + +type noderMock struct { + name string + hash []byte + isDir bool + children []Noder +} + +func (n noderMock) String() string { return n.Name() } +func (n noderMock) Hash() []byte { return n.hash } +func (n noderMock) Name() string { return n.name } +func (n noderMock) IsDir() bool { return n.isDir } +func (n noderMock) Children() ([]Noder, error) { return n.children, nil } +func (n noderMock) NumChildren() (int, error) { return len(n.children), nil } + +// Returns a sequence with the noders 3, 2, and 1 from the +// following diagram: +// +// 3 +// | +// 2 +// | +// 1 +// / \ +// c1 c2 +// +// This is also the path of "1". +func nodersFixture() []Noder { + n1 := &noderMock{ + name: "1", + hash: []byte{0x00, 0x01, 0x02}, + isDir: true, + children: childrenFixture(), + } + n2 := &noderMock{name: "2"} + n3 := &noderMock{name: "3"} + return []Noder{n3, n2, n1} +} + +// Returns a collection of 2 noders: c1, c2. +func childrenFixture() []Noder { + c1 := &noderMock{name: "c1"} + c2 := &noderMock{name: "c2"} + return []Noder{c1, c2} +} + +// Returns the same as nodersFixture but sorted by name, this is: "1", +// "2" and then "3". +func sortedNodersFixture() []Noder { + n1 := &noderMock{ + name: "1", + hash: []byte{0x00, 0x01, 0x02}, + isDir: true, + children: childrenFixture(), + } + n2 := &noderMock{name: "2"} + n3 := &noderMock{name: "3"} + return []Noder{n1, n2, n3} // the same as nodersFixture but sorted by name +} + +// returns nodersFixture as the path of "1". +func pathFixture() Path { + return Path(nodersFixture()) +} + +func (s *NoderSuite) TestString(c *C) { + c.Assert(pathFixture().String(), Equals, "3/2/1") +} + +func (s *NoderSuite) TestLast(c *C) { + c.Assert(pathFixture().Last().Name(), Equals, "1") +} + +func (s *NoderSuite) TestPathImplementsNoder(c *C) { + p := pathFixture() + c.Assert(p.Name(), Equals, "1") + c.Assert(p.Hash(), DeepEquals, []byte{0x00, 0x01, 0x02}) + c.Assert(p.IsDir(), Equals, true) + + children, err := p.Children() + c.Assert(err, IsNil) + c.Assert(children, DeepEquals, childrenFixture()) + + numChildren, err := p.NumChildren() + c.Assert(err, IsNil) + c.Assert(numChildren, Equals, 2) +} diff --git a/utils/merkletrie/noder/path.go b/utils/merkletrie/noder/path.go new file mode 100644 index 0000000..c992f7e --- /dev/null +++ b/utils/merkletrie/noder/path.go @@ -0,0 +1,59 @@ +package noder + +import "bytes" + +// Path values represent a noder and its ancestors. The root goes first +// and the actual final noder the path is refering to will be the last. +// +// A path implements the Noder interface, redirecting all the interface +// calls to its final noder. +// +// Paths build from an empty Noder slice are not valid paths and should +// not be used. +type Path []Noder + +// String returns the full path of the final noder as a string, using +// "/" as the separator. +func (p Path) String() string { + var buf bytes.Buffer + sep := "" + for _, e := range p { + _, _ = buf.WriteString(sep) + sep = "/" + _, _ = buf.WriteString(e.Name()) + } + + return buf.String() +} + +// Last returns the final noder in the path. +func (p Path) Last() Noder { + return p[len(p)-1] +} + +// Hash returns the hash of the final noder of the path. +func (p Path) Hash() []byte { + return p.Last().Hash() +} + +// Name returns the name of the final noder of the path. +func (p Path) Name() string { + return p.Last().Name() +} + +// IsDir returns if the final noder of the path is a directory-like +// noder. +func (p Path) IsDir() bool { + return p.Last().IsDir() +} + +// Children returns the children of the final noder in the path. +func (p Path) Children() ([]Noder, error) { + return p.Last().Children() +} + +// NumChildren returns the number of children the final noder of the +// path has. +func (p Path) NumChildren() (int, error) { + return p.Last().NumChildren() +} -- cgit