aboutsummaryrefslogtreecommitdiffstats
path: root/utils
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2017-04-11 23:20:06 +0200
committerMáximo Cuadros <mcuadros@gmail.com>2017-04-11 23:20:06 +0200
commite14ee7a2645b486d72f52a0c62714b3049077554 (patch)
treeb3acc4ebfedd7f70289f47bd520a7e2d0514e4d3 /utils
parent7a428a915ce2b7bb0f4fc6dcee77932ebacfabbf (diff)
downloadgo-git-e14ee7a2645b486d72f52a0c62714b3049077554.tar.gz
merkletrie: filesystem and index speedup and documentation
Diffstat (limited to 'utils')
-rw-r--r--utils/merkletrie/filesystem/node.go162
-rw-r--r--utils/merkletrie/filesystem/node_test.go47
-rw-r--r--utils/merkletrie/index/node.go141
-rw-r--r--utils/merkletrie/index/node_test.go40
4 files changed, 179 insertions, 211 deletions
diff --git a/utils/merkletrie/filesystem/node.go b/utils/merkletrie/filesystem/node.go
index 847d71e..6c09d29 100644
--- a/utils/merkletrie/filesystem/node.go
+++ b/utils/merkletrie/filesystem/node.go
@@ -1,7 +1,6 @@
package filesystem
import (
- "bytes"
"io"
"os"
"path/filepath"
@@ -16,113 +15,130 @@ var ignore = map[string]bool{
".git": true,
}
-func IsEquals(a, b noder.Hasher) bool {
- pathA := a.(noder.Path)
- pathB := b.(noder.Path)
- if pathA[len(pathA)-1].IsDir() || pathB[len(pathB)-1].IsDir() {
- return false
- }
+// The node represents a file or a directory in a billy.Filesystem. It
+// implements the interface noder.Noder of merkletrie package.
+//
+// This implementation implements a "standard" hash method being able to be
+// compared with any other noder.Noder implementation inside of go-git.
+type node struct {
+ fs billy.Filesystem
+
+ path string
+ hash []byte
+ children []noder.Noder
+ isDir bool
+}
+
+// NewRootNode returns the root node based on a given billy.Filesystem
+func NewRootNode(fs billy.Filesystem) noder.Noder {
+ return &node{fs: fs, isDir: true}
+}
+
+// Hash the hash of a filesystem is the result of concatenating the computed
+// plumbing.Hash of the file as a Blob and its plumbing.FileMode; that way the
+// difftree algorithm will detect changes in the contents of files and also in
+// their mode.
+//
+// The hash of a directory is always a 24-bytes slice of zero values
+func (n *node) Hash() []byte {
+ return n.hash
+}
- return bytes.Equal(a.Hash(), b.Hash())
+func (n *node) Name() string {
+ return filepath.Base(n.path)
}
-type Node struct {
- parent string
- name string
- isDir bool
- info billy.FileInfo
- fs billy.Filesystem
+func (n *node) IsDir() bool {
+ return n.isDir
}
-func NewRootNode(fs billy.Filesystem) (*Node, error) {
- info, err := fs.Stat("/")
- if err != nil && !os.IsNotExist(err) {
+func (n *node) Children() ([]noder.Noder, error) {
+ if err := n.calculateChildren(); err != nil {
return nil, err
}
- return &Node{fs: fs, info: info, isDir: true, name: ""}, nil
+ return n.children, nil
}
-func (n *Node) String() string {
- return filepath.Join(n.parent, n.name)
+func (n *node) NumChildren() (int, error) {
+ if err := n.calculateChildren(); err != nil {
+ return -1, err
+ }
+
+ return len(n.children), nil
}
-func (n *Node) Hash() []byte {
- if n.IsDir() {
+func (n *node) calculateChildren() error {
+ if len(n.children) != 0 {
return nil
}
- f, err := n.fs.Open(n.fullpath())
+ files, err := n.fs.ReadDir(n.path)
if err != nil {
- panic(err)
- }
+ if os.IsNotExist(err) {
+ return nil
+ }
- h := plumbing.NewHasher(plumbing.BlobObject, n.info.Size())
- if _, err := io.Copy(h, f); err != nil {
- panic(err)
+ return nil
}
- hash := h.Sum()
- mode, err := filemode.NewFromOSFileMode(n.info.Mode())
- if err != nil {
- panic(err)
- }
+ for _, file := range files {
+ if _, ok := ignore[file.Name()]; ok {
+ continue
+ }
- return append(hash[:], mode.Bytes()...)
-}
+ c, err := n.newChildNode(file)
+ if err != nil {
+ return err
+ }
-func (n *Node) Name() string {
- return n.name
-}
+ n.children = append(n.children, c)
+ }
-func (n *Node) IsDir() bool {
- return n.isDir
+ return nil
}
-func (n *Node) Children() ([]noder.Noder, error) {
- files, err := n.readDir()
-
+func (n *node) newChildNode(file billy.FileInfo) (*node, error) {
+ path := filepath.Join(n.path, file.Name())
+ hash, err := n.calculateHash(path, file)
if err != nil {
return nil, err
}
- path := n.fullpath()
- var c []noder.Noder
- for _, file := range files {
- if _, ok := ignore[file.Name()]; ok {
- continue
- }
+ return &node{
+ fs: n.fs,
+ path: path,
+ hash: hash,
+ isDir: file.IsDir(),
+ }, nil
+}
- c = append(c, &Node{
- fs: n.fs,
- parent: path,
- info: file,
- name: file.Name(),
- isDir: file.IsDir(),
- })
+func (n *node) calculateHash(path string, file billy.FileInfo) ([]byte, error) {
+ if file.IsDir() {
+ return make([]byte, 24), nil
}
- return c, nil
-}
-
-func (n *Node) NumChildren() (int, error) {
- files, err := n.readDir()
- return len(files), err
-}
+ f, err := n.fs.Open(path)
+ if err != nil {
+ return nil, err
+ }
-func (n *Node) fullpath() string {
- return filepath.Join(n.parent, n.name)
-}
+ defer f.Close()
-func (n *Node) readDir() ([]billy.FileInfo, error) {
- if !n.IsDir() {
- return nil, nil
+ h := plumbing.NewHasher(plumbing.BlobObject, file.Size())
+ if _, err := io.Copy(h, f); err != nil {
+ return nil, err
}
- l, err := n.fs.ReadDir(n.fullpath())
- if err != nil && os.IsNotExist(err) {
- return l, nil
+ mode, err := filemode.NewFromOSFileMode(file.Mode())
+ if err != nil {
+ return nil, err
}
- return l, err
+ hash := h.Sum()
+ return append(hash[:], mode.Bytes()...), nil
+}
+
+func (n *node) String() string {
+ return n.path
}
diff --git a/utils/merkletrie/filesystem/node_test.go b/utils/merkletrie/filesystem/node_test.go
index 291af6b..b7c124d 100644
--- a/utils/merkletrie/filesystem/node_test.go
+++ b/utils/merkletrie/filesystem/node_test.go
@@ -1,6 +1,7 @@
package filesystem
import (
+ "bytes"
"io"
"os"
"testing"
@@ -9,6 +10,7 @@ import (
"gopkg.in/src-d/go-billy.v2"
"gopkg.in/src-d/go-billy.v2/memfs"
"gopkg.in/src-d/go-git.v4/utils/merkletrie"
+ "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder"
)
func Test(t *testing.T) { TestingT(t) }
@@ -28,12 +30,7 @@ func (s *NoderSuite) TestDiff(c *C) {
WriteFile(fsB, "qux/bar", []byte("foo"), 0644)
WriteFile(fsB, "qux/qux", []byte("foo"), 0644)
- nodeA, err := NewRootNode(fsA)
- c.Assert(err, IsNil)
- nodeB, err := NewRootNode(fsB)
- c.Assert(err, IsNil)
-
- ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals)
+ ch, err := merkletrie.DiffTree(NewRootNode(fsA), NewRootNode(fsB), IsEquals)
c.Assert(err, IsNil)
c.Assert(ch, HasLen, 0)
}
@@ -49,12 +46,7 @@ func (s *NoderSuite) TestDiffChangeContent(c *C) {
WriteFile(fsB, "qux/bar", []byte("bar"), 0644)
WriteFile(fsB, "qux/qux", []byte("foo"), 0644)
- nodeA, err := NewRootNode(fsA)
- c.Assert(err, IsNil)
- nodeB, err := NewRootNode(fsB)
- c.Assert(err, IsNil)
-
- ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals)
+ ch, err := merkletrie.DiffTree(NewRootNode(fsA), NewRootNode(fsB), IsEquals)
c.Assert(err, IsNil)
c.Assert(ch, HasLen, 1)
}
@@ -66,12 +58,7 @@ func (s *NoderSuite) TestDiffChangeMissing(c *C) {
fsB := memfs.New()
WriteFile(fsB, "bar", []byte("bar"), 0644)
- nodeA, err := NewRootNode(fsA)
- c.Assert(err, IsNil)
- nodeB, err := NewRootNode(fsB)
- c.Assert(err, IsNil)
-
- ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals)
+ ch, err := merkletrie.DiffTree(NewRootNode(fsA), NewRootNode(fsB), IsEquals)
c.Assert(err, IsNil)
c.Assert(ch, HasLen, 2)
}
@@ -83,12 +70,7 @@ func (s *NoderSuite) TestDiffChangeMode(c *C) {
fsB := memfs.New()
WriteFile(fsB, "foo", []byte("foo"), 0755)
- nodeA, err := NewRootNode(fsA)
- c.Assert(err, IsNil)
- nodeB, err := NewRootNode(fsB)
- c.Assert(err, IsNil)
-
- ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals)
+ ch, err := merkletrie.DiffTree(NewRootNode(fsA), NewRootNode(fsB), IsEquals)
c.Assert(err, IsNil)
c.Assert(ch, HasLen, 1)
}
@@ -100,12 +82,7 @@ func (s *NoderSuite) TestDiffChangeModeNotRelevant(c *C) {
fsB := memfs.New()
WriteFile(fsB, "foo", []byte("foo"), 0655)
- nodeA, err := NewRootNode(fsA)
- c.Assert(err, IsNil)
- nodeB, err := NewRootNode(fsB)
- c.Assert(err, IsNil)
-
- ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals)
+ ch, err := merkletrie.DiffTree(NewRootNode(fsA), NewRootNode(fsB), IsEquals)
c.Assert(err, IsNil)
c.Assert(ch, HasLen, 0)
}
@@ -125,3 +102,13 @@ func WriteFile(fs billy.Filesystem, filename string, data []byte, perm os.FileMo
}
return err
}
+
+var empty = make([]byte, 24)
+
+func IsEquals(a, b noder.Hasher) bool {
+ if bytes.Equal(a.Hash(), empty) || bytes.Equal(b.Hash(), empty) {
+ return false
+ }
+
+ return bytes.Equal(a.Hash(), b.Hash())
+}
diff --git a/utils/merkletrie/index/node.go b/utils/merkletrie/index/node.go
index 7972f7f..2c72f6d 100644
--- a/utils/merkletrie/index/node.go
+++ b/utils/merkletrie/index/node.go
@@ -1,113 +1,86 @@
package index
import (
- "bytes"
"path/filepath"
-
"strings"
"gopkg.in/src-d/go-git.v4/plumbing/format/index"
"gopkg.in/src-d/go-git.v4/utils/merkletrie/noder"
)
-func IsEquals(a, b noder.Hasher) bool {
- pathA := a.(noder.Path)
- pathB := b.(noder.Path)
- if pathA[len(pathA)-1].IsDir() || pathB[len(pathB)-1].IsDir() {
- return false
- }
-
- return bytes.Equal(a.Hash(), b.Hash())
-}
-
-type Node struct {
- index *index.Index
- parent string
- name string
- entry index.Entry
- isDir bool
-}
-
-func NewRootNode(idx *index.Index) (*Node, error) {
- return &Node{index: idx, isDir: true}, nil
+// The node represents a index.Entry or a directory inferred from the path
+// of all entries. It implements the interface noder.Noder of merkletrie
+// package.
+//
+// This implementation implements a "standard" hash method being able to be
+// compared with any other noder.Noder implementation inside of go-git
+type node struct {
+ path string
+ entry index.Entry
+ children []noder.Noder
+ isDir bool
}
-func (n *Node) String() string {
- return n.fullpath()
-}
-
-func (n *Node) Hash() []byte {
- if n.IsDir() {
- return nil
- }
-
- return append(n.entry.Hash[:], n.entry.Mode.Bytes()...)
-}
-
-func (n *Node) Name() string {
- return n.name
-}
+// NewRootNode returns the root node of a computed tree from a index.Index,
+func NewRootNode(idx *index.Index) noder.Noder {
+ const rootNode = ""
-func (n *Node) IsDir() bool {
- return n.isDir
-}
+ m := map[string]*node{rootNode: {isDir: true}}
-func (n *Node) Children() ([]noder.Noder, error) {
- path := n.fullpath()
- dirs := make(map[string]bool)
+ for _, e := range idx.Entries {
+ parts := strings.Split(e.Name, string(filepath.Separator))
- var c []noder.Noder
- for _, e := range n.index.Entries {
- if e.Name == path {
- continue
- }
+ var path string
+ for _, part := range parts {
+ parent := path
+ path = filepath.Join(path, part)
- prefix := path
- if prefix != "" {
- prefix += "/"
- }
+ if _, ok := m[path]; ok {
+ continue
+ }
- if !strings.HasPrefix(e.Name, prefix) {
- continue
- }
+ n := &node{path: path}
+ if path == e.Name {
+ n.entry = e
+ } else {
+ n.isDir = true
+ }
- name := e.Name[len(path):]
- if len(name) != 0 && name[0] == '/' {
- name = name[1:]
+ m[n.path] = n
+ m[parent].children = append(m[parent].children, n)
}
+ }
- parts := strings.Split(name, "/")
- if len(parts) > 1 {
- dirs[parts[0]] = true
- continue
- }
+ return m[rootNode]
+}
- c = append(c, &Node{
- index: n.index,
- parent: path,
- name: name,
- entry: e,
- })
- }
+func (n *node) String() string {
+ return n.path
+}
- for dir := range dirs {
- c = append(c, &Node{
- index: n.index,
- parent: path,
- name: dir,
- isDir: true,
- })
+// Hash the hash of a filesystem is a 24-byte slice, is the result of
+// concatenating the computed plumbing.Hash of the file as a Blob and its
+// plumbing.FileMode; that way the difftree algorithm will detect changes in the
+// contents of files and also in their mode.
+//
+// If the node is computed and not based on a index.Entry the hash is equals
+// to a 24-bytes slices of zero values.
+func (n *node) Hash() []byte {
+ return append(n.entry.Hash[:], n.entry.Mode.Bytes()...)
+}
- }
+func (n *node) Name() string {
+ return filepath.Base(n.path)
+}
- return c, nil
+func (n *node) IsDir() bool {
+ return n.isDir
}
-func (n *Node) NumChildren() (int, error) {
- files, err := n.Children()
- return len(files), err
+func (n *node) Children() ([]noder.Noder, error) {
+ return n.children, nil
}
-func (n *Node) fullpath() string {
- return filepath.Join(n.parent, n.name)
+func (n *node) NumChildren() (int, error) {
+ return len(n.children), nil
}
diff --git a/utils/merkletrie/index/node_test.go b/utils/merkletrie/index/node_test.go
index 0ee0884..48aa35f 100644
--- a/utils/merkletrie/index/node_test.go
+++ b/utils/merkletrie/index/node_test.go
@@ -1,12 +1,14 @@
package index
import (
+ "bytes"
"testing"
. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/format/index"
"gopkg.in/src-d/go-git.v4/utils/merkletrie"
+ "gopkg.in/src-d/go-git.v4/utils/merkletrie/noder"
)
func Test(t *testing.T) { TestingT(t) }
@@ -34,12 +36,7 @@ func (s *NoderSuite) TestDiff(c *C) {
},
}
- nodeA, err := NewRootNode(indexA)
- c.Assert(err, IsNil)
- nodeB, err := NewRootNode(indexB)
- c.Assert(err, IsNil)
-
- ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals)
+ ch, err := merkletrie.DiffTree(NewRootNode(indexA), NewRootNode(indexB), isEquals)
c.Assert(err, IsNil)
c.Assert(ch, HasLen, 0)
}
@@ -57,12 +54,7 @@ func (s *NoderSuite) TestDiffChange(c *C) {
},
}
- nodeA, err := NewRootNode(indexA)
- c.Assert(err, IsNil)
- nodeB, err := NewRootNode(indexB)
- c.Assert(err, IsNil)
-
- ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals)
+ ch, err := merkletrie.DiffTree(NewRootNode(indexA), NewRootNode(indexB), isEquals)
c.Assert(err, IsNil)
c.Assert(ch, HasLen, 2)
}
@@ -80,12 +72,7 @@ func (s *NoderSuite) TestDiffDir(c *C) {
},
}
- nodeA, err := NewRootNode(indexA)
- c.Assert(err, IsNil)
- nodeB, err := NewRootNode(indexB)
- c.Assert(err, IsNil)
-
- ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals)
+ ch, err := merkletrie.DiffTree(NewRootNode(indexA), NewRootNode(indexB), isEquals)
c.Assert(err, IsNil)
c.Assert(ch, HasLen, 2)
}
@@ -105,12 +92,17 @@ func (s *NoderSuite) TestDiffSameRoot(c *C) {
},
}
- nodeA, err := NewRootNode(indexA)
- c.Assert(err, IsNil)
- nodeB, err := NewRootNode(indexB)
- c.Assert(err, IsNil)
-
- ch, err := merkletrie.DiffTree(nodeA, nodeB, IsEquals)
+ ch, err := merkletrie.DiffTree(NewRootNode(indexA), NewRootNode(indexB), isEquals)
c.Assert(err, IsNil)
c.Assert(ch, HasLen, 1)
}
+
+var empty = make([]byte, 24)
+
+func isEquals(a, b noder.Hasher) bool {
+ if bytes.Equal(a.Hash(), empty) || bytes.Equal(b.Hash(), empty) {
+ return false
+ }
+
+ return bytes.Equal(a.Hash(), b.Hash())
+}