From 6a00b3004a9bc9f86a041b59dc27e6b24695fc13 Mon Sep 17 00:00:00 2001 From: Máximo Cuadros Date: Sun, 9 Apr 2017 10:33:09 +0200 Subject: merkletrie: node support for index file --- utils/merkletrie/index/node.go | 113 +++++++++++++++++++++++++++++++++++ utils/merkletrie/index/node_test.go | 116 ++++++++++++++++++++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100644 utils/merkletrie/index/node.go create mode 100644 utils/merkletrie/index/node_test.go (limited to 'utils/merkletrie/index') diff --git a/utils/merkletrie/index/node.go b/utils/merkletrie/index/node.go new file mode 100644 index 0000000..7972f7f --- /dev/null +++ b/utils/merkletrie/index/node.go @@ -0,0 +1,113 @@ +package index + +import ( + "bytes" + "path/filepath" + + "strings" + + "gopkg.in/src-d/go-git.v4/plumbing/format/index" + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" +) + +func IsEquals(a, b noder.Hasher) bool { + pathA := a.(noder.Path) + pathB := b.(noder.Path) + if pathA[len(pathA)-1].IsDir() || pathB[len(pathB)-1].IsDir() { + return false + } + + return bytes.Equal(a.Hash(), b.Hash()) +} + +type Node struct { + index *index.Index + parent string + name string + entry index.Entry + isDir bool +} + +func NewRootNode(idx *index.Index) (*Node, error) { + return &Node{index: idx, isDir: true}, nil +} + +func (n *Node) String() string { + return n.fullpath() +} + +func (n *Node) Hash() []byte { + if n.IsDir() { + return nil + } + + return append(n.entry.Hash[:], n.entry.Mode.Bytes()...) +} + +func (n *Node) Name() string { + return n.name +} + +func (n *Node) IsDir() bool { + return n.isDir +} + +func (n *Node) Children() ([]noder.Noder, error) { + path := n.fullpath() + dirs := make(map[string]bool) + + var c []noder.Noder + for _, e := range n.index.Entries { + if e.Name == path { + continue + } + + prefix := path + if prefix != "" { + prefix += "/" + } + + if !strings.HasPrefix(e.Name, prefix) { + continue + } + + name := e.Name[len(path):] + if len(name) != 0 && name[0] == '/' { + name = name[1:] + } + + parts := strings.Split(name, "/") + if len(parts) > 1 { + dirs[parts[0]] = true + continue + } + + c = append(c, &Node{ + index: n.index, + parent: path, + name: name, + entry: e, + }) + } + + for dir := range dirs { + c = append(c, &Node{ + index: n.index, + parent: path, + name: dir, + isDir: true, + }) + + } + + return c, nil +} + +func (n *Node) NumChildren() (int, error) { + files, err := n.Children() + return len(files), err +} + +func (n *Node) fullpath() string { + return filepath.Join(n.parent, n.name) +} diff --git a/utils/merkletrie/index/node_test.go b/utils/merkletrie/index/node_test.go new file mode 100644 index 0000000..0ee0884 --- /dev/null +++ b/utils/merkletrie/index/node_test.go @@ -0,0 +1,116 @@ +package index + +import ( + "testing" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/index" + "gopkg.in/src-d/go-git.v4/utils/merkletrie" +) + +func Test(t *testing.T) { TestingT(t) } + +type NoderSuite struct{} + +var _ = Suite(&NoderSuite{}) + +func (s *NoderSuite) TestDiff(c *C) { + indexA := &index.Index{ + Entries: []index.Entry{ + {Name: "foo", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + {Name: "bar/foo", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + {Name: "bar/qux", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + {Name: "bar/baz/foo", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + }, + } + + indexB := &index.Index{ + Entries: []index.Entry{ + {Name: "foo", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + {Name: "bar/foo", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + {Name: "bar/qux", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + {Name: "bar/baz/foo", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + }, + } + + nodeA, err := NewRootNode(indexA) + c.Assert(err, IsNil) + nodeB, err := NewRootNode(indexB) + c.Assert(err, IsNil) + + ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals) + c.Assert(err, IsNil) + c.Assert(ch, HasLen, 0) +} + +func (s *NoderSuite) TestDiffChange(c *C) { + indexA := &index.Index{ + Entries: []index.Entry{ + {Name: "bar/baz/bar", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + }, + } + + indexB := &index.Index{ + Entries: []index.Entry{ + {Name: "bar/baz/foo", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + }, + } + + nodeA, err := NewRootNode(indexA) + c.Assert(err, IsNil) + nodeB, err := NewRootNode(indexB) + c.Assert(err, IsNil) + + ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals) + c.Assert(err, IsNil) + c.Assert(ch, HasLen, 2) +} + +func (s *NoderSuite) TestDiffDir(c *C) { + indexA := &index.Index{ + Entries: []index.Entry{ + {Name: "foo", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + }, + } + + indexB := &index.Index{ + Entries: []index.Entry{ + {Name: "foo/bar", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + }, + } + + nodeA, err := NewRootNode(indexA) + c.Assert(err, IsNil) + nodeB, err := NewRootNode(indexB) + c.Assert(err, IsNil) + + ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals) + c.Assert(err, IsNil) + c.Assert(ch, HasLen, 2) +} + +func (s *NoderSuite) TestDiffSameRoot(c *C) { + indexA := &index.Index{ + Entries: []index.Entry{ + {Name: "foo.go", Hash: plumbing.NewHash("aab686eafeb1f44702738c8b0f24f2567c36da6d")}, + {Name: "foo/bar", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + }, + } + + indexB := &index.Index{ + Entries: []index.Entry{ + {Name: "foo/bar", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + {Name: "foo.go", Hash: plumbing.NewHash("8ab686eafeb1f44702738c8b0f24f2567c36da6d")}, + }, + } + + nodeA, err := NewRootNode(indexA) + c.Assert(err, IsNil) + nodeB, err := NewRootNode(indexB) + c.Assert(err, IsNil) + + ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals) + c.Assert(err, IsNil) + c.Assert(ch, HasLen, 1) +} -- cgit From e14ee7a2645b486d72f52a0c62714b3049077554 Mon Sep 17 00:00:00 2001 From: Máximo Cuadros Date: Tue, 11 Apr 2017 23:20:06 +0200 Subject: merkletrie: filesystem and index speedup and documentation --- utils/merkletrie/index/node.go | 141 +++++++++++++++--------------------- utils/merkletrie/index/node_test.go | 40 ++++------ 2 files changed, 73 insertions(+), 108 deletions(-) (limited to 'utils/merkletrie/index') diff --git a/utils/merkletrie/index/node.go b/utils/merkletrie/index/node.go index 7972f7f..2c72f6d 100644 --- a/utils/merkletrie/index/node.go +++ b/utils/merkletrie/index/node.go @@ -1,113 +1,86 @@ package index import ( - "bytes" "path/filepath" - "strings" "gopkg.in/src-d/go-git.v4/plumbing/format/index" "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" ) -func IsEquals(a, b noder.Hasher) bool { - pathA := a.(noder.Path) - pathB := b.(noder.Path) - if pathA[len(pathA)-1].IsDir() || pathB[len(pathB)-1].IsDir() { - return false - } - - return bytes.Equal(a.Hash(), b.Hash()) -} - -type Node struct { - index *index.Index - parent string - name string - entry index.Entry - isDir bool -} - -func NewRootNode(idx *index.Index) (*Node, error) { - return &Node{index: idx, isDir: true}, nil +// The node represents a index.Entry or a directory inferred from the path +// of all entries. It implements the interface noder.Noder of merkletrie +// package. +// +// This implementation implements a "standard" hash method being able to be +// compared with any other noder.Noder implementation inside of go-git +type node struct { + path string + entry index.Entry + children []noder.Noder + isDir bool } -func (n *Node) String() string { - return n.fullpath() -} - -func (n *Node) Hash() []byte { - if n.IsDir() { - return nil - } - - return append(n.entry.Hash[:], n.entry.Mode.Bytes()...) -} - -func (n *Node) Name() string { - return n.name -} +// NewRootNode returns the root node of a computed tree from a index.Index, +func NewRootNode(idx *index.Index) noder.Noder { + const rootNode = "" -func (n *Node) IsDir() bool { - return n.isDir -} + m := map[string]*node{rootNode: {isDir: true}} -func (n *Node) Children() ([]noder.Noder, error) { - path := n.fullpath() - dirs := make(map[string]bool) + for _, e := range idx.Entries { + parts := strings.Split(e.Name, string(filepath.Separator)) - var c []noder.Noder - for _, e := range n.index.Entries { - if e.Name == path { - continue - } + var path string + for _, part := range parts { + parent := path + path = filepath.Join(path, part) - prefix := path - if prefix != "" { - prefix += "/" - } + if _, ok := m[path]; ok { + continue + } - if !strings.HasPrefix(e.Name, prefix) { - continue - } + n := &node{path: path} + if path == e.Name { + n.entry = e + } else { + n.isDir = true + } - name := e.Name[len(path):] - if len(name) != 0 && name[0] == '/' { - name = name[1:] + m[n.path] = n + m[parent].children = append(m[parent].children, n) } + } - parts := strings.Split(name, "/") - if len(parts) > 1 { - dirs[parts[0]] = true - continue - } + return m[rootNode] +} - c = append(c, &Node{ - index: n.index, - parent: path, - name: name, - entry: e, - }) - } +func (n *node) String() string { + return n.path +} - for dir := range dirs { - c = append(c, &Node{ - index: n.index, - parent: path, - name: dir, - isDir: true, - }) +// Hash the hash of a filesystem is a 24-byte slice, is the result of +// concatenating the computed plumbing.Hash of the file as a Blob and its +// plumbing.FileMode; that way the difftree algorithm will detect changes in the +// contents of files and also in their mode. +// +// If the node is computed and not based on a index.Entry the hash is equals +// to a 24-bytes slices of zero values. +func (n *node) Hash() []byte { + return append(n.entry.Hash[:], n.entry.Mode.Bytes()...) +} - } +func (n *node) Name() string { + return filepath.Base(n.path) +} - return c, nil +func (n *node) IsDir() bool { + return n.isDir } -func (n *Node) NumChildren() (int, error) { - files, err := n.Children() - return len(files), err +func (n *node) Children() ([]noder.Noder, error) { + return n.children, nil } -func (n *Node) fullpath() string { - return filepath.Join(n.parent, n.name) +func (n *node) NumChildren() (int, error) { + return len(n.children), nil } diff --git a/utils/merkletrie/index/node_test.go b/utils/merkletrie/index/node_test.go index 0ee0884..48aa35f 100644 --- a/utils/merkletrie/index/node_test.go +++ b/utils/merkletrie/index/node_test.go @@ -1,12 +1,14 @@ package index import ( + "bytes" "testing" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/index" "gopkg.in/src-d/go-git.v4/utils/merkletrie" + "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder" ) func Test(t *testing.T) { TestingT(t) } @@ -34,12 +36,7 @@ func (s *NoderSuite) TestDiff(c *C) { }, } - nodeA, err := NewRootNode(indexA) - c.Assert(err, IsNil) - nodeB, err := NewRootNode(indexB) - c.Assert(err, IsNil) - - ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals) + ch, err := merkletrie.DiffTree(NewRootNode(indexA), NewRootNode(indexB), isEquals) c.Assert(err, IsNil) c.Assert(ch, HasLen, 0) } @@ -57,12 +54,7 @@ func (s *NoderSuite) TestDiffChange(c *C) { }, } - nodeA, err := NewRootNode(indexA) - c.Assert(err, IsNil) - nodeB, err := NewRootNode(indexB) - c.Assert(err, IsNil) - - ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals) + ch, err := merkletrie.DiffTree(NewRootNode(indexA), NewRootNode(indexB), isEquals) c.Assert(err, IsNil) c.Assert(ch, HasLen, 2) } @@ -80,12 +72,7 @@ func (s *NoderSuite) TestDiffDir(c *C) { }, } - nodeA, err := NewRootNode(indexA) - c.Assert(err, IsNil) - nodeB, err := NewRootNode(indexB) - c.Assert(err, IsNil) - - ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals) + ch, err := merkletrie.DiffTree(NewRootNode(indexA), NewRootNode(indexB), isEquals) c.Assert(err, IsNil) c.Assert(ch, HasLen, 2) } @@ -105,12 +92,17 @@ func (s *NoderSuite) TestDiffSameRoot(c *C) { }, } - nodeA, err := NewRootNode(indexA) - c.Assert(err, IsNil) - nodeB, err := NewRootNode(indexB) - c.Assert(err, IsNil) - - ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals) + ch, err := merkletrie.DiffTree(NewRootNode(indexA), NewRootNode(indexB), isEquals) c.Assert(err, IsNil) c.Assert(ch, HasLen, 1) } + +var empty = make([]byte, 24) + +func isEquals(a, b noder.Hasher) bool { + if bytes.Equal(a.Hash(), empty) || bytes.Equal(b.Hash(), empty) { + return false + } + + return bytes.Equal(a.Hash(), b.Hash()) +} -- cgit