From e80d7b7267ba9f2057f259be331c4de927a60ecb Mon Sep 17 00:00:00 2001 From: Alberto Cortés Date: Mon, 30 Jan 2017 21:34:33 +0100 Subject: delete old noder, create a new one in utils (#241) --- utils/merkletrie/doc.go | 32 +++ utils/merkletrie/internal/fsnoder/dir.go | 140 ++++++++++ utils/merkletrie/internal/fsnoder/dir_test.go | 364 +++++++++++++++++++++++++ utils/merkletrie/internal/fsnoder/doc.go | 50 ++++ utils/merkletrie/internal/fsnoder/file.go | 72 +++++ utils/merkletrie/internal/fsnoder/file_test.go | 67 +++++ utils/merkletrie/internal/fsnoder/new.go | 192 +++++++++++++ utils/merkletrie/internal/fsnoder/new_test.go | 337 +++++++++++++++++++++++ utils/merkletrie/noder/noder.go | 59 ++++ utils/merkletrie/noder/noder_test.go | 94 +++++++ utils/merkletrie/noder/path.go | 59 ++++ 11 files changed, 1466 insertions(+) create mode 100644 utils/merkletrie/doc.go create mode 100644 utils/merkletrie/internal/fsnoder/dir.go create mode 100644 utils/merkletrie/internal/fsnoder/dir_test.go create mode 100644 utils/merkletrie/internal/fsnoder/doc.go create mode 100644 utils/merkletrie/internal/fsnoder/file.go create mode 100644 utils/merkletrie/internal/fsnoder/file_test.go create mode 100644 utils/merkletrie/internal/fsnoder/new.go create mode 100644 utils/merkletrie/internal/fsnoder/new_test.go create mode 100644 utils/merkletrie/noder/noder.go create mode 100644 utils/merkletrie/noder/noder_test.go create mode 100644 utils/merkletrie/noder/path.go (limited to 'utils') diff --git a/utils/merkletrie/doc.go b/utils/merkletrie/doc.go new file mode 100644 index 0000000..28ece3e --- /dev/null +++ b/utils/merkletrie/doc.go @@ -0,0 +1,32 @@ +/* +Package merkletrie provides support for n-ary trees that are at the same +time Merkle trees and Radix trees (tries), and provides an efficient +tree comparison algorithm for them. + +Git trees are Radix n-ary trees in virtue of the names of their +tree entries. At the same time, git trees are Merkle trees thanks to +their hashes. + +When comparing git trees, the simple approach of alphabetically sorting +their elements and comparing the resulting lists is too slow as it +depends linearly on the number of files in the trees: When a directory +has lots of files but none of them has been modified, this approach is +very expensive. We can do better by prunning whole directories that +have not change, just by looking at their hashes. This package provides +the tools to do exactly that. + +This package defines Merkle tries as nodes that should have: + +- a hash: the Merkle part of the Merkle trie + +- a key: the Radix part of the Merkle trie + +The Merkle hash condition is not enforced by this package though. This +means that the hash of a node doesn't have to take into account the hashes of +their children, which is good for testing purposes. + +Nodes in the Merkle trie are abstracted by the Noder interface. The +intended use is that git trees implements this interface, either +directly or using a simple wrapper. +*/ +package merkletrie diff --git a/utils/merkletrie/internal/fsnoder/dir.go b/utils/merkletrie/internal/fsnoder/dir.go new file mode 100644 index 0000000..1811de9 --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/dir.go @@ -0,0 +1,140 @@ +package fsnoder + +import ( + "bytes" + "fmt" + "hash/fnv" + "sort" + "strings" + + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" +) + +// Dir values implement directory-like noders. +type dir struct { + name string // relative + children []noder.Noder // sorted by name + hash []byte // memoized +} + +type byName []noder.Noder + +func (a byName) Len() int { return len(a) } +func (a byName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a byName) Less(i, j int) bool { + return strings.Compare(a[i].Name(), a[j].Name()) < 0 +} + +// copies the children slice, so nobody can modify the order of its +// elements from the outside. +func newDir(name string, children []noder.Noder) (*dir, error) { + cloned := make([]noder.Noder, len(children)) + _ = copy(cloned, children) + sort.Sort(byName(cloned)) + + if hasChildrenWithNoName(cloned) { + return nil, fmt.Errorf("non-root inner nodes cannot have empty names") + } + + if hasDuplicatedNames(cloned) { + return nil, fmt.Errorf("children cannot have duplicated names") + } + + return &dir{ + name: name, + children: cloned, + }, nil +} + +func hasChildrenWithNoName(children []noder.Noder) bool { + for _, c := range children { + if c.Name() == "" { + return true + } + } + + return false +} + +func hasDuplicatedNames(children []noder.Noder) bool { + if len(children) < 2 { + return false + } + + for i := 1; i < len(children); i++ { + if children[i].Name() == children[i-1].Name() { + return true + } + } + + return false +} + +func (d *dir) Hash() []byte { + if d.hash == nil { + d.calculateHash() + } + + return d.hash +} + +// hash is calculated as the hash of "dir " plus the concatenation, for +// each child, of its name, a space and its hash. Children are sorted +// alphabetically before calculating the hash, so the result is unique. +func (d *dir) calculateHash() { + h := fnv.New64a() + h.Write([]byte("dir ")) + for _, c := range d.children { + h.Write([]byte(c.Name())) + h.Write([]byte(" ")) + h.Write(c.Hash()) + } + d.hash = h.Sum([]byte{}) +} + +func (d *dir) Name() string { + return d.name +} + +func (d *dir) IsDir() bool { + return true +} + +// returns a copy so nobody can alter the order of its elements from the +// outside. +func (d *dir) Children() ([]noder.Noder, error) { + clon := make([]noder.Noder, len(d.children)) + _ = copy(clon, d.children) + return clon, nil +} + +func (d *dir) NumChildren() (int, error) { + return len(d.children), nil +} + +const ( + dirStartMark = '(' + dirEndMark = ')' + dirElementSep = ' ' +) + +// The string generated by this method is unique for each tree, as the +// children of each node are sorted alphabetically by name when +// generating the string. +func (d *dir) String() string { + var buf bytes.Buffer + + buf.WriteString(d.name) + buf.WriteRune(dirStartMark) + + for i, c := range d.children { + if i != 0 { + buf.WriteRune(dirElementSep) + } + buf.WriteString(c.String()) + } + + buf.WriteRune(dirEndMark) + + return buf.String() +} diff --git a/utils/merkletrie/internal/fsnoder/dir_test.go b/utils/merkletrie/internal/fsnoder/dir_test.go new file mode 100644 index 0000000..df5aacc --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/dir_test.go @@ -0,0 +1,364 @@ +package fsnoder + +import ( + "reflect" + "sort" + + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" + + . "gopkg.in/check.v1" +) + +type DirSuite struct{} + +var _ = Suite(&DirSuite{}) + +func (s *DirSuite) TestIsDir(c *C) { + noName, err := newDir("", nil) + c.Assert(err, IsNil) + c.Assert(noName.IsDir(), Equals, true) + + empty, err := newDir("empty", nil) + c.Assert(err, IsNil) + c.Assert(empty.IsDir(), Equals, true) + + root, err := newDir("foo", []noder.Noder{empty}) + c.Assert(err, IsNil) + c.Assert(root.IsDir(), Equals, true) +} + +func assertChildren(c *C, n noder.Noder, expected []noder.Noder) { + numChildren, err := n.NumChildren() + c.Assert(err, IsNil) + c.Assert(numChildren, Equals, len(expected)) + + children, err := n.Children() + c.Assert(err, IsNil) + c.Assert(children, sortedSliceEquals, expected) +} + +type sortedSliceEqualsChecker struct { + *CheckerInfo +} + +var sortedSliceEquals Checker = &sortedSliceEqualsChecker{ + &CheckerInfo{ + Name: "sortedSliceEquals", + Params: []string{"obtained", "expected"}, + }, +} + +func (checker *sortedSliceEqualsChecker) Check( + params []interface{}, names []string) (result bool, error string) { + a, ok := params[0].([]noder.Noder) + if !ok { + return false, "first parameter must be a []noder.Noder" + } + b, ok := params[1].([]noder.Noder) + if !ok { + return false, "second parameter must be a []noder.Noder" + } + sort.Sort(byName(a)) + sort.Sort(byName(b)) + + return reflect.DeepEqual(a, b), "" +} + +func (s *DirSuite) TestNewDirectoryNoNameAndEmpty(c *C) { + root, err := newDir("", nil) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0xca, 0x40, 0xf8, 0x67, 0x57, 0x8c, 0x32, 0x1c}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, noder.NoChildren) + c.Assert(root.String(), Equals, "()") +} + +func (s *DirSuite) TestNewDirectoryEmpty(c *C) { + root, err := newDir("root", nil) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0xca, 0x40, 0xf8, 0x67, 0x57, 0x8c, 0x32, 0x1c}) + c.Assert(root.Name(), Equals, "root") + assertChildren(c, root, noder.NoChildren) + c.Assert(root.String(), Equals, "root()") +} + +func (s *DirSuite) TestEmptyDirsHaveSameHash(c *C) { + d1, err := newDir("foo", nil) + c.Assert(err, IsNil) + + d2, err := newDir("bar", nil) + c.Assert(err, IsNil) + + c.Assert(d1.Hash(), DeepEquals, d2.Hash()) +} + +func (s *DirSuite) TestNewDirWithEmptyDir(c *C) { + empty, err := newDir("empty", nil) + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{empty}) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0x39, 0x25, 0xa8, 0x99, 0x16, 0x47, 0x6a, 0x75}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{empty}) + c.Assert(root.String(), Equals, "(empty())") +} + +func (s *DirSuite) TestNewDirWithOneEmptyFile(c *C) { + empty, err := newFile("name", "") + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{empty}) + c.Assert(err, IsNil) + c.Assert(root.Hash(), DeepEquals, + []byte{0xd, 0x4e, 0x23, 0x1d, 0xf5, 0x2e, 0xfa, 0xc2}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{empty}) + c.Assert(root.String(), Equals, "(name<>)") +} + +func (s *DirSuite) TestNewDirWithOneFile(c *C) { + a, err := newFile("a", "1") + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{a}) + c.Assert(err, IsNil) + c.Assert(root.Hash(), DeepEquals, + []byte{0x96, 0xab, 0x29, 0x54, 0x2, 0x9e, 0x89, 0x28}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{a}) + c.Assert(root.String(), Equals, "(a<1>)") +} + +func (s *DirSuite) TestDirsWithSameFileHaveSameHash(c *C) { + f1, err := newFile("a", "1") + c.Assert(err, IsNil) + r1, err := newDir("", []noder.Noder{f1}) + c.Assert(err, IsNil) + + f2, err := newFile("a", "1") + c.Assert(err, IsNil) + r2, err := newDir("", []noder.Noder{f2}) + c.Assert(err, IsNil) + + c.Assert(r1.Hash(), DeepEquals, r2.Hash()) +} + +func (s *DirSuite) TestDirsWithDifferentFileContentHaveDifferentHash(c *C) { + f1, err := newFile("a", "1") + c.Assert(err, IsNil) + r1, err := newDir("", []noder.Noder{f1}) + c.Assert(err, IsNil) + + f2, err := newFile("a", "2") + c.Assert(err, IsNil) + r2, err := newDir("", []noder.Noder{f2}) + c.Assert(err, IsNil) + + c.Assert(r1.Hash(), Not(DeepEquals), r2.Hash()) +} + +func (s *DirSuite) TestDirsWithDifferentFileNameHaveDifferentHash(c *C) { + f1, err := newFile("a", "1") + c.Assert(err, IsNil) + r1, err := newDir("", []noder.Noder{f1}) + c.Assert(err, IsNil) + + f2, err := newFile("b", "1") + c.Assert(err, IsNil) + r2, err := newDir("", []noder.Noder{f2}) + c.Assert(err, IsNil) + + c.Assert(r1.Hash(), Not(DeepEquals), r2.Hash()) +} + +func (s *DirSuite) TestDirsWithDifferentFileHaveDifferentHash(c *C) { + f1, err := newFile("a", "1") + c.Assert(err, IsNil) + r1, err := newDir("", []noder.Noder{f1}) + c.Assert(err, IsNil) + + f2, err := newFile("b", "2") + c.Assert(err, IsNil) + r2, err := newDir("", []noder.Noder{f2}) + c.Assert(err, IsNil) + + c.Assert(r1.Hash(), Not(DeepEquals), r2.Hash()) +} + +func (s *DirSuite) TestDirWithEmptyDirHasDifferentHashThanEmptyDir(c *C) { + f, err := newFile("a", "") + c.Assert(err, IsNil) + r1, err := newDir("", []noder.Noder{f}) + c.Assert(err, IsNil) + + d, err := newDir("a", nil) + c.Assert(err, IsNil) + r2, err := newDir("", []noder.Noder{d}) + c.Assert(err, IsNil) + + c.Assert(r1.Hash(), Not(DeepEquals), r2.Hash()) +} + +func (s *DirSuite) TestNewDirWithTwoFilesSameContent(c *C) { + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b1, err := newFile("b", "1") + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{a1, b1}) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0xc7, 0xc4, 0xbf, 0x70, 0x33, 0xb9, 0x57, 0xdb}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{b1, a1}) + c.Assert(root.String(), Equals, "(a<1> b<1>)") +} + +func (s *DirSuite) TestNewDirWithTwoFilesDifferentContent(c *C) { + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b2, err := newFile("b", "2") + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{a1, b2}) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0x94, 0x8a, 0x9d, 0x8f, 0x6d, 0x98, 0x34, 0x55}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{b2, a1}) +} + +func (s *DirSuite) TestCrazy(c *C) { + // "" + // | + // ------------------------- + // | | | | | + // a1 B c1 d2 E + // | | + // ------------- E + // | | | | | + // A B X c1 E + // | | + // a1 e1 + e1, err := newFile("e", "1") + c.Assert(err, IsNil) + E, err := newDir("e", []noder.Noder{e1}) + c.Assert(err, IsNil) + E, err = newDir("e", []noder.Noder{E}) + c.Assert(err, IsNil) + E, err = newDir("e", []noder.Noder{E}) + c.Assert(err, IsNil) + + A, err := newDir("a", nil) + c.Assert(err, IsNil) + B, err := newDir("b", nil) + c.Assert(err, IsNil) + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + X, err := newDir("x", []noder.Noder{a1}) + c.Assert(err, IsNil) + c1, err := newFile("c", "1") + c.Assert(err, IsNil) + B, err = newDir("b", []noder.Noder{c1, B, X, A}) + c.Assert(err, IsNil) + + a1, err = newFile("a", "1") + c.Assert(err, IsNil) + c1, err = newFile("c", "1") + c.Assert(err, IsNil) + d2, err := newFile("d", "2") + c.Assert(err, IsNil) + + root, err := newDir("", []noder.Noder{a1, d2, E, B, c1}) + c.Assert(err, IsNil) + + c.Assert(root.Hash(), DeepEquals, + []byte{0xc3, 0x72, 0x9d, 0xf1, 0xcc, 0xec, 0x6d, 0xbb}) + c.Assert(root.Name(), Equals, "") + assertChildren(c, root, []noder.Noder{E, c1, B, a1, d2}) + c.Assert(root.String(), Equals, + "(a<1> b(a() b() c<1> x(a<1>)) c<1> d<2> e(e(e(e<1>))))") +} + +func (s *DirSuite) TestDirCannotHaveDirWithNoName(c *C) { + noName, err := newDir("", nil) + c.Assert(err, IsNil) + + _, err = newDir("", []noder.Noder{noName}) + c.Assert(err, Not(IsNil)) +} + +func (s *DirSuite) TestDirCannotHaveDuplicatedFiles(c *C) { + f1, err := newFile("a", "1") + c.Assert(err, IsNil) + + f2, err := newFile("a", "1") + c.Assert(err, IsNil) + + _, err = newDir("", []noder.Noder{f1, f2}) + c.Assert(err, Not(IsNil)) +} + +func (s *DirSuite) TestDirCannotHaveDuplicatedFileNames(c *C) { + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + + a2, err := newFile("a", "2") + c.Assert(err, IsNil) + + _, err = newDir("", []noder.Noder{a1, a2}) + c.Assert(err, Not(IsNil)) +} + +func (s *DirSuite) TestDirCannotHaveDuplicatedDirNames(c *C) { + d1, err := newDir("a", nil) + c.Assert(err, IsNil) + + d2, err := newDir("a", nil) + c.Assert(err, IsNil) + + _, err = newDir("", []noder.Noder{d1, d2}) + c.Assert(err, Not(IsNil)) +} + +func (s *DirSuite) TestDirCannotHaveDirAndFileWithSameName(c *C) { + f, err := newFile("a", "") + c.Assert(err, IsNil) + + d, err := newDir("a", nil) + c.Assert(err, IsNil) + + _, err = newDir("", []noder.Noder{f, d}) + c.Assert(err, Not(IsNil)) +} + +func (s *DirSuite) TestUnsortedString(c *C) { + b, err := newDir("b", nil) + c.Assert(err, IsNil) + + z, err := newDir("z", nil) + c.Assert(err, IsNil) + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + + c2, err := newFile("c", "2") + c.Assert(err, IsNil) + + d3, err := newFile("d", "3") + c.Assert(err, IsNil) + + d, err := newDir("d", []noder.Noder{c2, z, d3, a1, b}) + c.Assert(err, IsNil) + + c.Assert(d.String(), Equals, "d(a<1> b() c<2> d<3> z())") +} diff --git a/utils/merkletrie/internal/fsnoder/doc.go b/utils/merkletrie/internal/fsnoder/doc.go new file mode 100644 index 0000000..86cae7c --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/doc.go @@ -0,0 +1,50 @@ +/* +Package fsnoder allows to create merkletrie noders that resemble file +systems, from human readable string descriptions. Its intended use is +generating noders in tests in a readable way. + +For example: + + root, _ = New("(a<1> b<2>, B(c<3> d()))") + +will create a noder as follows: + + root - "root" is an unnamed dir containing "a", "b" and "B". + / | \ - "a" is a file containing the string "1". + / | \ - "b" is a file containing the string "2". + a b B - "B" is a directory containing "c" and "d". + / \ - "c" is a file containing the string "3". + c d - "D" is an empty directory. + +Files are expressed as: + +- a single letter for the name of the file. + +- a single number, between angle brackets, for the contents of the file. + +Directories are expressed as: + +- a single letter for the name of the directory. + +- its elements between parents, separated with spaces, in any order. + +- (optionally) the root directory can be unnamed, by skiping its name. + +Examples: + +- D(a<1> b<2>) : two files, "a" and "b", having "1" and "2" as their +respective contents, inside a directory called "D". + +- A() : An empty directory called "A". + +- A(b<>) : An directory called "A", with an empty file inside called "b": + +- (b(c<1> d(e<2>)) f<>) : an unamed directory containing: + + ├── b --> directory + │   ├── c --> file containing "1" + │   └── d --> directory + │   └── e --> file containing "2" + └── f --> empty file +*/ +package fsnoder diff --git a/utils/merkletrie/internal/fsnoder/file.go b/utils/merkletrie/internal/fsnoder/file.go new file mode 100644 index 0000000..c975a60 --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/file.go @@ -0,0 +1,72 @@ +package fsnoder + +import ( + "bytes" + "fmt" + "hash/fnv" + + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" +) + +// file values represent file-like noders in a merkle trie. +type file struct { + name string // relative + contents string + hash []byte // memoized +} + +// newFile returns a noder representing a file with the given contents. +func newFile(name, contents string) (*file, error) { + if name == "" { + return nil, fmt.Errorf("files cannot have empty names") + } + + return &file{ + name: name, + contents: contents, + }, nil +} + +// The hash of a file is just its contents. +// Empty files will have the fnv64 basis offset as its hash. +func (f *file) Hash() []byte { + if f.hash == nil { + h := fnv.New64a() + h.Write([]byte(f.contents)) // it nevers returns an error. + f.hash = h.Sum(nil) + } + + return f.hash +} + +func (f *file) Name() string { + return f.name +} + +func (f *file) IsDir() bool { + return false +} + +func (f *file) Children() ([]noder.Noder, error) { + return noder.NoChildren, nil +} + +func (f *file) NumChildren() (int, error) { + return 0, nil +} + +const ( + fileStartMark = '<' + fileEndMark = '>' +) + +// String returns a string formated as: name. +func (f *file) String() string { + var buf bytes.Buffer + buf.WriteString(f.name) + buf.WriteRune(fileStartMark) + buf.WriteString(f.contents) + buf.WriteRune(fileEndMark) + + return buf.String() +} diff --git a/utils/merkletrie/internal/fsnoder/file_test.go b/utils/merkletrie/internal/fsnoder/file_test.go new file mode 100644 index 0000000..730ae7c --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/file_test.go @@ -0,0 +1,67 @@ +package fsnoder + +import ( + "testing" + + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type FileSuite struct{} + +var _ = Suite(&FileSuite{}) + +var ( + HashOfEmptyFile = []byte{0xcb, 0xf2, 0x9c, 0xe4, 0x84, 0x22, 0x23, 0x25} // fnv64 basis offset + HashOfContents = []byte{0xee, 0x7e, 0xf3, 0xd0, 0xc2, 0xb5, 0xef, 0x83} // hash of "contents" +) + +func (s *FileSuite) TestNewFileEmpty(c *C) { + f, err := newFile("name", "") + c.Assert(err, IsNil) + + c.Assert(f.Hash(), DeepEquals, HashOfEmptyFile) + c.Assert(f.Name(), Equals, "name") + c.Assert(f.IsDir(), Equals, false) + assertChildren(c, f, noder.NoChildren) + c.Assert(f.String(), Equals, "name<>") +} + +func (s *FileSuite) TestNewFileWithContents(c *C) { + f, err := newFile("name", "contents") + c.Assert(err, IsNil) + + c.Assert(f.Hash(), DeepEquals, HashOfContents) + c.Assert(f.Name(), Equals, "name") + c.Assert(f.IsDir(), Equals, false) + assertChildren(c, f, noder.NoChildren) + c.Assert(f.String(), Equals, "name") +} + +func (s *FileSuite) TestNewfileErrorEmptyName(c *C) { + _, err := newFile("", "contents") + c.Assert(err, Not(IsNil)) +} + +func (s *FileSuite) TestDifferentContentsHaveDifferentHash(c *C) { + f1, err := newFile("name", "contents") + c.Assert(err, IsNil) + + f2, err := newFile("name", "foo") + c.Assert(err, IsNil) + + c.Assert(f1.Hash(), Not(DeepEquals), f2.Hash()) +} + +func (s *FileSuite) TestSameContentsHaveSameHash(c *C) { + f1, err := newFile("name1", "contents") + c.Assert(err, IsNil) + + f2, err := newFile("name2", "contents") + c.Assert(err, IsNil) + + c.Assert(f1.Hash(), DeepEquals, f2.Hash()) +} diff --git a/utils/merkletrie/internal/fsnoder/new.go b/utils/merkletrie/internal/fsnoder/new.go new file mode 100644 index 0000000..3ea4733 --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/new.go @@ -0,0 +1,192 @@ +package fsnoder + +import ( + "bytes" + "fmt" + "io" + + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" +) + +// New function creates a full merkle trie from the string description of +// a filesystem tree. See examples of the string format in the package +// description. +func New(s string) (noder.Noder, error) { + return decodeDir([]byte(s), root) +} + +const ( + root = true + nonRoot = false +) + +// Expected data: a fsnoder description, for example: A(foo bar qux ...). +// When isRoot is true, unnamed dirs are supported, for example: (foo +// bar qux ...) +func decodeDir(data []byte, isRoot bool) (*dir, error) { + data = bytes.TrimSpace(data) + if len(data) == 0 { + return nil, io.EOF + } + + // get the name of the dir (a single letter) and remove it from the + // data. In case the there is no name and isRoot is true, just use + // "" as the name. + var name string + if data[0] == dirStartMark { + if isRoot { + name = "" + } else { + return nil, fmt.Errorf("inner unnamed dirs not allowed: %s", data) + } + } else { + name = string(data[0]) + data = data[1:] + } + + // check that data is enclosed in parents and it is big enough and + // remove them. + if len(data) < 2 { + return nil, fmt.Errorf("malformed data: too short") + } + if data[0] != dirStartMark { + return nil, fmt.Errorf("malformed data: first %q not found", + dirStartMark) + } + if data[len(data)-1] != dirEndMark { + return nil, fmt.Errorf("malformed data: last %q not found", + dirEndMark) + } + data = data[1 : len(data)-1] // remove initial '(' and last ')' + + children, err := decodeChildren(data) + if err != nil { + return nil, err + } + + return newDir(name, children) +} + +func isNumber(b byte) bool { + return '0' <= b && b <= '9' +} + +func isLetter(b byte) bool { + return ('a' <= b && b <= 'z') || ('A' <= b && b <= 'Z') +} + +func decodeChildren(data []byte) ([]noder.Noder, error) { + data = bytes.TrimSpace(data) + if len(data) == 0 { + return nil, nil + } + + chunks := split(data) + ret := make([]noder.Noder, len(chunks)) + var err error + for i, c := range chunks { + ret[i], err = decodeChild(c) + if err != nil { + return nil, fmt.Errorf("malformed element %d (%s): %s", i, c, err) + } + } + + return ret, nil +} + +// returns the description of the elements of a dir. It is just looking +// for spaces if they are not part of inner dirs. +func split(data []byte) [][]byte { + chunks := [][]byte{} + + start := 0 + dirDepth := 0 + for i, b := range data { + switch b { + case dirStartMark: + dirDepth++ + case dirEndMark: + dirDepth-- + case dirElementSep: + if dirDepth == 0 { + chunks = append(chunks, data[start:i+1]) + start = i + 1 + } + } + } + chunks = append(chunks, data[start:]) + + return chunks +} + +// A child can be a file or a dir. +func decodeChild(data []byte) (noder.Noder, error) { + clean := bytes.TrimSpace(data) + if len(data) < 3 { + return nil, fmt.Errorf("element too short: %s", clean) + } + + switch clean[1] { + case fileStartMark: + return decodeFile(clean) + case dirStartMark: + return decodeDir(clean, nonRoot) + default: + if clean[0] == dirStartMark { + return nil, fmt.Errorf("non-root unnamed dir are not allowed: %s", + clean) + } + return nil, fmt.Errorf("malformed dir element: %s", clean) + } +} + +func decodeFile(data []byte) (noder.Noder, error) { + if len(data) == 3 { + return decodeEmptyFile(data) + } + + if len(data) != 4 { + return nil, fmt.Errorf("length is not 4") + } + if !isLetter(data[0]) { + return nil, fmt.Errorf("name must be a letter") + } + if data[1] != '<' { + return nil, fmt.Errorf("wrong file start character") + } + if !isNumber(data[2]) { + return nil, fmt.Errorf("contents must be a number") + } + if data[3] != '>' { + return nil, fmt.Errorf("wrong file end character") + } + + name := string(data[0]) + contents := string(data[2]) + + return newFile(name, contents) +} + +func decodeEmptyFile(data []byte) (noder.Noder, error) { + if len(data) != 3 { + return nil, fmt.Errorf("length is not 3: %s", data) + } + if !isLetter(data[0]) { + return nil, fmt.Errorf("name must be a letter: %s", data) + } + if data[1] != '<' { + return nil, fmt.Errorf("wrong file start character: %s", data) + } + if data[2] != '>' { + return nil, fmt.Errorf("wrong file end character: %s", data) + } + + name := string(data[0]) + + return newFile(name, "") +} + +// HashEqual returns if a and b have the same hash. +func HashEqual(a, b noder.Hasher) bool { + return bytes.Equal(a.Hash(), b.Hash()) +} diff --git a/utils/merkletrie/internal/fsnoder/new_test.go b/utils/merkletrie/internal/fsnoder/new_test.go new file mode 100644 index 0000000..e473705 --- /dev/null +++ b/utils/merkletrie/internal/fsnoder/new_test.go @@ -0,0 +1,337 @@ +package fsnoder + +import ( + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" + + . "gopkg.in/check.v1" +) + +type FSNoderSuite struct{} + +var _ = Suite(&FSNoderSuite{}) + +func check(c *C, input string, expected *dir) { + obtained, err := New(input) + c.Assert(err, IsNil, Commentf("input = %s", input)) + + comment := Commentf("\n input = %s\n"+ + "expected = %s\nobtained = %s", + input, expected, obtained) + c.Assert(obtained.Hash(), DeepEquals, expected.Hash(), comment) +} + +func (s *FSNoderSuite) TestNoDataFails(c *C) { + _, err := New("") + c.Assert(err, Not(IsNil)) + + _, err = New(" ") // SPC + TAB + c.Assert(err, Not(IsNil)) +} + +func (s *FSNoderSuite) TestUnnamedRootFailsIfNotRoot(c *C) { + _, err := decodeDir([]byte("()"), false) + c.Assert(err, Not(IsNil)) +} + +func (s *FSNoderSuite) TestUnnamedInnerFails(c *C) { + _, err := New("(())") + c.Assert(err, Not(IsNil)) + _, err = New("((a<>))") + c.Assert(err, Not(IsNil)) +} + +func (s *FSNoderSuite) TestMalformedInnerName(c *C) { + _, err := New("(ab<>)") + c.Assert(err, Not(IsNil)) +} + +func (s *FSNoderSuite) TestMalformedFile(c *C) { + _, err := New("(a<12>)") + c.Assert(err, Not(IsNil)) + _, err = New("(4<>)") + c.Assert(err, Not(IsNil)) + _, err = New("(4<1>)") + c.Assert(err, Not(IsNil)) + _, err = New("(4?1>)") + c.Assert(err, Not(IsNil)) + _, err = New("(4)") + c.Assert(err, Not(IsNil)) + _, err = New("(4")) + c.Assert(err, Not(IsNil)) + _, err = decodeFile([]byte("a")) + c.Assert(err, Not(IsNil)) + _, err = decodeFile([]byte("a<1?")) + c.Assert(err, Not(IsNil)) + + _, err = decodeEmptyFile([]byte("a<1>")) + c.Assert(err, Not(IsNil)) + _, err = decodeEmptyFile([]byte("a?>")) + c.Assert(err, Not(IsNil)) + _, err = decodeEmptyFile([]byte("1<>")) + c.Assert(err, Not(IsNil)) + _, err = decodeEmptyFile([]byte("a") + c.Assert(err, Not(IsNil)) + _, err = New("a<>") + c.Assert(err, Not(IsNil)) +} + +func (s *FSNoderSuite) TestUnnamedEmptyRoot(c *C) { + input := "()" + + expected, err := newDir("", nil) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestNamedEmptyRoot(c *C) { + input := "a()" + + expected, err := newDir("a", nil) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestEmptyFile(c *C) { + input := "(a<>)" + + a1, err := newFile("a", "") + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{a1}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestNonEmptyFile(c *C) { + input := "(a<1>)" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{a1}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestTwoFilesSameContents(c *C) { + input := "(b<1> a<1>)" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b1, err := newFile("b", "1") + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{a1, b1}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestTwoFilesDifferentContents(c *C) { + input := "(b<2> a<1>)" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b2, err := newFile("b", "2") + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{a1, b2}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestManyFiles(c *C) { + input := "(e<1> b<2> a<1> c<1> d<3> f<4>)" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b2, err := newFile("b", "2") + c.Assert(err, IsNil) + c1, err := newFile("c", "1") + c.Assert(err, IsNil) + d3, err := newFile("d", "3") + c.Assert(err, IsNil) + e1, err := newFile("e", "1") + c.Assert(err, IsNil) + f4, err := newFile("f", "4") + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{e1, b2, a1, c1, d3, f4}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestEmptyDir(c *C) { + input := "(A())" + + A, err := newDir("A", nil) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithEmtpyFile(c *C) { + input := "(A(a<>))" + + a, err := newFile("a", "") + c.Assert(err, IsNil) + A, err := newDir("A", []noder.Noder{a}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithEmtpyFileSameName(c *C) { + input := "(A(A<>))" + + f, err := newFile("A", "") + c.Assert(err, IsNil) + A, err := newDir("A", []noder.Noder{f}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithFile(c *C) { + input := "(A(a<1>))" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + A, err := newDir("A", []noder.Noder{a1}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithEmptyDirSameName(c *C) { + input := "(A(A()))" + + A2, err := newDir("A", nil) + c.Assert(err, IsNil) + A1, err := newDir("A", []noder.Noder{A2}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A1}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithEmptyDir(c *C) { + input := "(A(B()))" + + B, err := newDir("B", nil) + c.Assert(err, IsNil) + A, err := newDir("A", []noder.Noder{B}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestDirWithTwoFiles(c *C) { + input := "(A(a<1> b<2>))" + + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + b2, err := newFile("b", "2") + c.Assert(err, IsNil) + A, err := newDir("A", []noder.Noder{b2, a1}) + c.Assert(err, IsNil) + expected, err := newDir("", []noder.Noder{A}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestCrazy(c *C) { + // "" + // | + // ------------------------- + // | | | | | + // a1 B c1 d2 E + // | | + // ------------- E + // | | | | | + // A B X c1 E + // | | + // a1 e1 + input := "(d<2> b(c<1> b() a() x(a<1>)) a<1> c<1> e(e(e(e<1>))))" + + e1, err := newFile("e", "1") + c.Assert(err, IsNil) + E, err := newDir("e", []noder.Noder{e1}) + c.Assert(err, IsNil) + E, err = newDir("e", []noder.Noder{E}) + c.Assert(err, IsNil) + E, err = newDir("e", []noder.Noder{E}) + c.Assert(err, IsNil) + + A, err := newDir("a", nil) + c.Assert(err, IsNil) + B, err := newDir("b", nil) + c.Assert(err, IsNil) + a1, err := newFile("a", "1") + c.Assert(err, IsNil) + X, err := newDir("x", []noder.Noder{a1}) + c.Assert(err, IsNil) + c1, err := newFile("c", "1") + c.Assert(err, IsNil) + B, err = newDir("b", []noder.Noder{c1, B, X, A}) + c.Assert(err, IsNil) + + a1, err = newFile("a", "1") + c.Assert(err, IsNil) + c1, err = newFile("c", "1") + c.Assert(err, IsNil) + d2, err := newFile("d", "2") + c.Assert(err, IsNil) + + expected, err := newDir("", []noder.Noder{a1, d2, E, B, c1}) + c.Assert(err, IsNil) + + check(c, input, expected) +} + +func (s *FSNoderSuite) TestHashEqual(c *C) { + input1 := "(A(a<1> b<2>))" + input2 := "(A(a<1> b<2>))" + input3 := "(A(a<> b<2>))" + + t1, err := New(input1) + c.Assert(err, IsNil) + t2, err := New(input2) + c.Assert(err, IsNil) + t3, err := New(input3) + c.Assert(err, IsNil) + + c.Assert(HashEqual(t1, t2), Equals, true) + c.Assert(HashEqual(t2, t1), Equals, true) + + c.Assert(HashEqual(t2, t3), Equals, false) + c.Assert(HashEqual(t3, t2), Equals, false) + + c.Assert(HashEqual(t3, t1), Equals, false) + c.Assert(HashEqual(t1, t3), Equals, false) +} diff --git a/utils/merkletrie/noder/noder.go b/utils/merkletrie/noder/noder.go new file mode 100644 index 0000000..d6b3de4 --- /dev/null +++ b/utils/merkletrie/noder/noder.go @@ -0,0 +1,59 @@ +// Package noder provide an interface for defining nodes in a +// merkletrie, their hashes and their paths (a noders and its +// ancestors). +// +// The hasher interface is easy to implement naively by elements that +// already have a hash, like git blobs and trees. More sophisticated +// implementations can implement the Equal function in exotic ways +// though: for instance, comparing the modification time of directories +// in a filesystem. +package noder + +import "fmt" + +// Hasher interface is implemented by types that can tell you +// their hash. +type Hasher interface { + Hash() []byte +} + +// Equal functions take two hashers and return if they are equal. +// +// These functions are expected to be faster than reflect.Equal or +// reflect.DeepEqual because they can compare just the hash of the +// objects, instead of their contents, so they are expected to be O(1). +type Equal func(a, b Hasher) bool + +// The Noder interface is implemented by the elements of a Merkle Trie. +// +// There are two types of elements in a Merkle Trie: +// +// - file-like nodes: they cannot have children. +// +// - directory-like nodes: they can have 0 or more children and their +// hash is calculated by combining their children hashes. +type Noder interface { + Hasher + fmt.Stringer // for testing purposes + // Name returns the name of an element (relative, not its full + // path). + Name() string + // IsDir returns true if the element is a directory-like node or + // false if it is a file-like node. + IsDir() bool + // Children returns the children of the element. Note that empty + // directory-like noders and file-like noders will both return + // NoChildren. + Children() ([]Noder, error) + // NumChildren returns the number of children this element has. + // + // This method is an optimization: the number of children is easily + // calculated as the length of the value returned by the Children + // method (above); yet, some implementations will be able to + // implement NumChildren in O(1) while Children is usually more + // complex. + NumChildren() (int, error) +} + +// NoChildren represents the children of a noder without children. +var NoChildren = []Noder{} diff --git a/utils/merkletrie/noder/noder_test.go b/utils/merkletrie/noder/noder_test.go new file mode 100644 index 0000000..14bef4a --- /dev/null +++ b/utils/merkletrie/noder/noder_test.go @@ -0,0 +1,94 @@ +package noder + +import . "gopkg.in/check.v1" + +type NoderSuite struct{} + +var _ = Suite(&NoderSuite{}) + +type noderMock struct { + name string + hash []byte + isDir bool + children []Noder +} + +func (n noderMock) String() string { return n.Name() } +func (n noderMock) Hash() []byte { return n.hash } +func (n noderMock) Name() string { return n.name } +func (n noderMock) IsDir() bool { return n.isDir } +func (n noderMock) Children() ([]Noder, error) { return n.children, nil } +func (n noderMock) NumChildren() (int, error) { return len(n.children), nil } + +// Returns a sequence with the noders 3, 2, and 1 from the +// following diagram: +// +// 3 +// | +// 2 +// | +// 1 +// / \ +// c1 c2 +// +// This is also the path of "1". +func nodersFixture() []Noder { + n1 := &noderMock{ + name: "1", + hash: []byte{0x00, 0x01, 0x02}, + isDir: true, + children: childrenFixture(), + } + n2 := &noderMock{name: "2"} + n3 := &noderMock{name: "3"} + return []Noder{n3, n2, n1} +} + +// Returns a collection of 2 noders: c1, c2. +func childrenFixture() []Noder { + c1 := &noderMock{name: "c1"} + c2 := &noderMock{name: "c2"} + return []Noder{c1, c2} +} + +// Returns the same as nodersFixture but sorted by name, this is: "1", +// "2" and then "3". +func sortedNodersFixture() []Noder { + n1 := &noderMock{ + name: "1", + hash: []byte{0x00, 0x01, 0x02}, + isDir: true, + children: childrenFixture(), + } + n2 := &noderMock{name: "2"} + n3 := &noderMock{name: "3"} + return []Noder{n1, n2, n3} // the same as nodersFixture but sorted by name +} + +// returns nodersFixture as the path of "1". +func pathFixture() Path { + return Path(nodersFixture()) +} + +func (s *NoderSuite) TestString(c *C) { + c.Assert(pathFixture().String(), Equals, "3/2/1") +} + +func (s *NoderSuite) TestLast(c *C) { + c.Assert(pathFixture().Last().Name(), Equals, "1") +} + +func (s *NoderSuite) TestPathImplementsNoder(c *C) { + p := pathFixture() + c.Assert(p.Name(), Equals, "1") + c.Assert(p.Hash(), DeepEquals, []byte{0x00, 0x01, 0x02}) + c.Assert(p.IsDir(), Equals, true) + + children, err := p.Children() + c.Assert(err, IsNil) + c.Assert(children, DeepEquals, childrenFixture()) + + numChildren, err := p.NumChildren() + c.Assert(err, IsNil) + c.Assert(numChildren, Equals, 2) +} diff --git a/utils/merkletrie/noder/path.go b/utils/merkletrie/noder/path.go new file mode 100644 index 0000000..c992f7e --- /dev/null +++ b/utils/merkletrie/noder/path.go @@ -0,0 +1,59 @@ +package noder + +import "bytes" + +// Path values represent a noder and its ancestors. The root goes first +// and the actual final noder the path is refering to will be the last. +// +// A path implements the Noder interface, redirecting all the interface +// calls to its final noder. +// +// Paths build from an empty Noder slice are not valid paths and should +// not be used. +type Path []Noder + +// String returns the full path of the final noder as a string, using +// "/" as the separator. +func (p Path) String() string { + var buf bytes.Buffer + sep := "" + for _, e := range p { + _, _ = buf.WriteString(sep) + sep = "/" + _, _ = buf.WriteString(e.Name()) + } + + return buf.String() +} + +// Last returns the final noder in the path. +func (p Path) Last() Noder { + return p[len(p)-1] +} + +// Hash returns the hash of the final noder of the path. +func (p Path) Hash() []byte { + return p.Last().Hash() +} + +// Name returns the name of the final noder of the path. +func (p Path) Name() string { + return p.Last().Name() +} + +// IsDir returns if the final noder of the path is a directory-like +// noder. +func (p Path) IsDir() bool { + return p.Last().IsDir() +} + +// Children returns the children of the final noder in the path. +func (p Path) Children() ([]Noder, error) { + return p.Last().Children() +} + +// NumChildren returns the number of children the final noder of the +// path has. +func (p Path) NumChildren() (int, error) { + return p.Last().NumChildren() +} -- cgit