From 0b8b8da617d5a077f282e57d0300dc106a604236 Mon Sep 17 00:00:00 2001 From: Alberto Cortés Date: Tue, 21 Feb 2017 10:24:10 +0100 Subject: difftree for git.Trees (#273) Last PR to fix #82: This PR modifies the difftree package itself. The old version extracted the files in both trees and compare them by hand. The new version turn the trees into merkletrie.Noders and call the merkletrie.Difftree function on them. How to review this PR: treenoder.go: defines the treeNoder type that wraps a git.Tree and implements merkletrie.Noder. change.go: defines the type of the output of a difftree operation. The type is the same as before, but I have moved it into its own file to keep the package organized. The old package defines the Action type too (insert, delete, modify), now, we reuse merkletrie.Action and it is no longer a field, but a method. change_adaptor.go: defines functions to turn merkletrie.Changes into difftree.Changes. difftree.go: before this patch this file holds all the logic to do a difftree, now it just turns the git.Trees into treeNoders, call merkletrie.difftree on them, and turns the resulting merkletrie.Changes into difftree.Changes. The only interesting piece of code here is that noders don't have the concept of mode (file permissions). The treenoder type codifies git.Tree modes into the merkletrie.Noder hash, so changes in the mode of a file are detected as modifications, just as the original git diff-tree command does. --- plumbing/difftree/treenoder.go | 141 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 plumbing/difftree/treenoder.go (limited to 'plumbing/difftree/treenoder.go') diff --git a/plumbing/difftree/treenoder.go b/plumbing/difftree/treenoder.go new file mode 100644 index 0000000..c0ed948 --- /dev/null +++ b/plumbing/difftree/treenoder.go @@ -0,0 +1,141 @@ +package difftree + +// A treenoder is a helper type that wraps git trees into merkletrie +// noders. +// +// As a merkletrie noder doesn't understand the concept of modes (e.g. +// file permissions), the treenoder includes the mode of the git tree in +// the hash, so changes in the modes will be detected as modifications +// to the file contents by the merkletrie difftree algorithm. This is +// consistent with how the "git diff-tree" command works. +import ( + "encoding/binary" + "io" + "os" + + "srcd.works/go-git.v4/plumbing" + "srcd.works/go-git.v4/plumbing/object" + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +type treeNoder struct { + parent *object.Tree // the root node is its own parent + name string // empty string for the root node + mode os.FileMode + hash plumbing.Hash + children []noder.Noder // memoized +} + +func newTreeNoder(t *object.Tree) *treeNoder { + if t == nil { + return &treeNoder{} + } + + return &treeNoder{ + parent: t, + name: "", + mode: os.ModeDir, + hash: t.Hash, + } +} + +func (t *treeNoder) isRoot() bool { + return t.name == "" +} + +func (t *treeNoder) String() string { + return "treeNoder <" + t.name + ">" +} + +func (t *treeNoder) Hash() []byte { + return append(t.hash[:], modeToBytes(t.mode)...) +} + +// mode in little endian (endianess is an arbitrary decission). +func modeToBytes(m os.FileMode) []byte { + ret := make([]byte, 4) + binary.LittleEndian.PutUint32(ret, uint32(m)) + return ret +} + +func (t *treeNoder) Name() string { + return t.name +} + +func (t *treeNoder) IsDir() bool { + return t.mode.IsDir() +} + +// Children will return the children of a treenoder as treenoders, +// building them from the children of the wrapped git tree. +func (t *treeNoder) Children() ([]noder.Noder, error) { + if !t.mode.IsDir() { + return noder.NoChildren, nil + } + + // children are memoized for efficiency + if t.children != nil { + return t.children, nil + } + + // the parent of the returned children will be ourself as a tree if + // we are a not the root treenoder. The root is special as it + // is is own parent. + parent := t.parent + if !t.isRoot() { + var err error + if parent, err = t.parent.Tree(t.name); err != nil { + return nil, err + } + } + + return transformChildren(parent) +} + +// Returns the children of a tree as treenoders. +// Efficiency is key here. +func transformChildren(t *object.Tree) ([]noder.Noder, error) { + var err error + var e object.TreeEntry + + // there will be more tree entries than children in the tree, + // due to submodules and empty directories, but I think it is still + // worth it to pre-allocate the whole array now, even if sometimes + // is bigger than needed. + ret := make([]noder.Noder, 0, len(t.Entries)) + + walker := object.NewTreeWalker(t, false) // don't recurse + // don't defer walker.Close() for efficiency reasons. + for { + _, e, err = walker.Next() + if err == io.EOF { + break + } + if err != nil { + walker.Close() + return nil, err + } + + ret = append(ret, &treeNoder{ + parent: t, + name: e.Name, + mode: e.Mode, + hash: e.Hash, + }) + } + walker.Close() + + return ret, nil +} + +// len(t.tree.Entries) != the number of elements walked by treewalker +// for some reason because of empty directories, submodules, etc, so we +// have to walk here. +func (t *treeNoder) NumChildren() (int, error) { + children, err := t.Children() + if err != nil { + return 0, err + } + + return len(children), nil +} -- cgit