diff options
-rw-r--r-- | plumbing/difftree/change.go | 121 | ||||
-rw-r--r-- | plumbing/difftree/change_adaptor.go | 61 | ||||
-rw-r--r-- | plumbing/difftree/change_adaptor_test.go | 413 | ||||
-rw-r--r-- | plumbing/difftree/change_test.go | 300 | ||||
-rw-r--r-- | plumbing/difftree/treenoder.go | 141 | ||||
-rw-r--r-- | utils/merkletrie/change.go | 149 | ||||
-rw-r--r-- | utils/merkletrie/change_test.go | 76 | ||||
-rw-r--r-- | utils/merkletrie/difftree.go | 404 | ||||
-rw-r--r-- | utils/merkletrie/difftree_test.go | 429 | ||||
-rw-r--r-- | utils/merkletrie/doubleiter.go | 187 |
10 files changed, 2281 insertions, 0 deletions
diff --git a/plumbing/difftree/change.go b/plumbing/difftree/change.go new file mode 100644 index 0000000..c636577 --- /dev/null +++ b/plumbing/difftree/change.go @@ -0,0 +1,121 @@ +package difftree + +import ( + "bytes" + "fmt" + "strings" + + "srcd.works/go-git.v4/plumbing/object" + "srcd.works/go-git.v4/utils/merkletrie" +) + +// Change values represent a detected change between two git trees. For +// modifications, From is the original status of the node and To is its +// final status. For insertions, From is the zero value and for +// deletions To is the zero value. +type Change struct { + From ChangeEntry + To ChangeEntry +} + +var empty = ChangeEntry{} + +// Action returns the kind of action represented by the change, an +// insertion, a deletion or a modification. +func (c *Change) Action() (merkletrie.Action, error) { + if c.From == empty && c.To == empty { + return merkletrie.Action(0), + fmt.Errorf("malformed change: empty from and to") + } + if c.From == empty { + return merkletrie.Insert, nil + } + if c.To == empty { + return merkletrie.Delete, nil + } + + return merkletrie.Modify, nil +} + +// Files return the files before and after a change. +// For insertions from will be nil. For deletions to will be nil. +func (c *Change) Files() (from, to *object.File, err error) { + action, err := c.Action() + if err != nil { + return + } + + if action == merkletrie.Insert || action == merkletrie.Modify { + to, err = c.To.Tree.TreeEntryFile(&c.To.TreeEntry) + if err != nil { + return + } + } + + if action == merkletrie.Delete || action == merkletrie.Modify { + from, err = c.From.Tree.TreeEntryFile(&c.From.TreeEntry) + if err != nil { + return + } + } + + return +} + +func (c *Change) String() string { + action, err := c.Action() + if err != nil { + return fmt.Sprintf("malformed change") + } + + return fmt.Sprintf("<Action: %s, Path: %s>", action, c.name()) +} + +func (c *Change) name() string { + if c.From != empty { + return c.From.Name + } + + return c.To.Name +} + +// ChangeEntry values represent a node that has suffered a change. +type ChangeEntry struct { + // Full path of the node using "/" as separator. + Name string + // Parent tree of the node that has changed. + Tree *object.Tree + // The entry of the node. + TreeEntry object.TreeEntry +} + +// Changes represents a collection of changes between two git trees. +// Implements sort.Interface lexicographically over the path of the +// changed files. +type Changes []*Change + +func (c Changes) Len() int { + return len(c) +} + +func (c Changes) Swap(i, j int) { + c[i], c[j] = c[j], c[i] +} + +func (c Changes) Less(i, j int) bool { + return strings.Compare(c[i].name(), c[j].name()) < 0 +} + +func (c Changes) String() string { + var buffer bytes.Buffer + buffer.WriteString("[") + comma := "" + for _, v := range c { + buffer.WriteString(comma) + buffer.WriteString(v.String()) + comma = ", " + } + buffer.WriteString("]") + + return buffer.String() +} diff --git a/plumbing/difftree/change_adaptor.go b/plumbing/difftree/change_adaptor.go new file mode 100644 index 0000000..edc20b3 --- /dev/null +++ b/plumbing/difftree/change_adaptor.go @@ -0,0 +1,61 @@ +package difftree + +// The folowing functions transform changes types form the merkletrie +// package to changes types from this package. + +import ( + "fmt" + + "srcd.works/go-git.v4/plumbing/object" + "srcd.works/go-git.v4/utils/merkletrie" + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +func newChange(c merkletrie.Change) (*Change, error) { + ret := &Change{} + + var err error + if ret.From, err = newChangeEntry(c.From); err != nil { + return nil, fmt.Errorf("From field: ", err) + } + + if ret.To, err = newChangeEntry(c.To); err != nil { + return nil, fmt.Errorf("To field: ", err) + } + + return ret, nil +} + +func newChangeEntry(p noder.Path) (ChangeEntry, error) { + if p == nil { + return empty, nil + } + + asTreeNoder, ok := p.Last().(*treeNoder) + if !ok { + return ChangeEntry{}, fmt.Errorf("cannot transform non-TreeNoders") + } + + return ChangeEntry{ + Name: p.String(), + Tree: asTreeNoder.parent, + TreeEntry: object.TreeEntry{ + Name: asTreeNoder.name, + Mode: asTreeNoder.mode, + Hash: asTreeNoder.hash, + }, + }, nil +} + +func newChanges(src merkletrie.Changes) (Changes, error) { + ret := make(Changes, len(src)) + var err error + for i, e := range src { + ret[i], err = newChange(e) + if err != nil { + return nil, fmt.Errorf("change #%d: %s", err) + } + } + + return ret, nil +} diff --git a/plumbing/difftree/change_adaptor_test.go b/plumbing/difftree/change_adaptor_test.go new file mode 100644 index 0000000..2ce2c54 --- /dev/null +++ b/plumbing/difftree/change_adaptor_test.go @@ -0,0 +1,413 @@ +package difftree + +import ( + "os" + "sort" + + "srcd.works/go-git.v4/plumbing" + "srcd.works/go-git.v4/plumbing/object" + "srcd.works/go-git.v4/plumbing/storer" + "srcd.works/go-git.v4/storage/filesystem" + "srcd.works/go-git.v4/utils/merkletrie" + "srcd.works/go-git.v4/utils/merkletrie/noder" + + "github.com/src-d/go-git-fixtures" + . "gopkg.in/check.v1" +) + +type ChangeAdaptorSuite struct { + fixtures.Suite + Storer storer.EncodedObjectStorer + Fixture *fixtures.Fixture +} + +func (s *ChangeAdaptorSuite) SetUpSuite(c *C) { + s.Suite.SetUpSuite(c) + s.Fixture = fixtures.Basic().One() + sto, err := filesystem.NewStorage(s.Fixture.DotGit()) + c.Assert(err, IsNil) + s.Storer = sto +} + +func (s *ChangeAdaptorSuite) tree(c *C, h plumbing.Hash) *object.Tree { + t, err := object.GetTree(s.Storer, h) + c.Assert(err, IsNil) + return t +} + +var _ = Suite(&ChangeAdaptorSuite{}) + +// utility function to build Noders from a tree and an tree entry. +func newNoder(t *object.Tree, e object.TreeEntry) noder.Noder { + return &treeNoder{ + parent: t, + name: e.Name, + mode: e.Mode, + hash: e.Hash, + } +} + +// utility function to build Paths +func newPath(nn ...noder.Noder) noder.Path { return noder.Path(nn) } + +func (s *ChangeAdaptorSuite) TestTreeNoderHashHasMode(c *C) { + hash := plumbing.NewHash("aaaa") + mode := object.FileMode + + treeNoder := &treeNoder{ + hash: hash, + mode: mode, + } + + expected := []byte{ + 0xaa, 0xaa, 0x00, 0x00, // original hash is aaaa and 16 zeros + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0xa4, 0x81, 0x00, 0x00, // object.FileMode in little endian + } + + c.Assert(treeNoder.Hash(), DeepEquals, expected) +} + +func (s *ChangeAdaptorSuite) TestNewChangeInsert(c *C) { + tree := &object.Tree{} + entry := object.TreeEntry{ + Name: "name", + Mode: os.FileMode(42), + Hash: plumbing.NewHash("aaaaa"), + } + path := newPath(newNoder(tree, entry)) + + expectedTo, err := newChangeEntry(path) + c.Assert(err, IsNil) + + src := merkletrie.Change{ + From: nil, + To: path, + } + + obtained, err := newChange(src) + c.Assert(err, IsNil) + action, err := obtained.Action() + c.Assert(err, IsNil) + c.Assert(action, Equals, merkletrie.Insert) + c.Assert(obtained.From, Equals, ChangeEntry{}) + c.Assert(obtained.To, Equals, expectedTo) +} + +func (s *ChangeAdaptorSuite) TestNewChangeDelete(c *C) { + tree := &object.Tree{} + entry := object.TreeEntry{ + Name: "name", + Mode: os.FileMode(42), + Hash: plumbing.NewHash("aaaaa"), + } + path := newPath(newNoder(tree, entry)) + + expectedFrom, err := newChangeEntry(path) + c.Assert(err, IsNil) + + src := merkletrie.Change{ + From: path, + To: nil, + } + + obtained, err := newChange(src) + c.Assert(err, IsNil) + action, err := obtained.Action() + c.Assert(err, IsNil) + c.Assert(action, Equals, merkletrie.Delete) + c.Assert(obtained.From, Equals, expectedFrom) + c.Assert(obtained.To, Equals, ChangeEntry{}) +} + +func (s *ChangeAdaptorSuite) TestNewChangeModify(c *C) { + treeA := &object.Tree{} + entryA := object.TreeEntry{ + Name: "name", + Mode: os.FileMode(42), + Hash: plumbing.NewHash("aaaaa"), + } + pathA := newPath(newNoder(treeA, entryA)) + expectedFrom, err := newChangeEntry(pathA) + c.Assert(err, IsNil) + + treeB := &object.Tree{} + entryB := object.TreeEntry{ + Name: "name", + Mode: os.FileMode(42), + Hash: plumbing.NewHash("bbbb"), + } + pathB := newPath(newNoder(treeB, entryB)) + expectedTo, err := newChangeEntry(pathB) + c.Assert(err, IsNil) + + src := merkletrie.Change{ + From: pathA, + To: pathB, + } + + obtained, err := newChange(src) + c.Assert(err, IsNil) + action, err := obtained.Action() + c.Assert(err, IsNil) + c.Assert(action, Equals, merkletrie.Modify) + c.Assert(obtained.From, Equals, expectedFrom) + c.Assert(obtained.To, Equals, expectedTo) +} + +func (s *ChangeAdaptorSuite) TestEmptyChangeFails(c *C) { + change := &Change{ + From: empty, + To: empty, + } + _, err := change.Action() + c.Assert(err, ErrorMatches, "malformed change.*") + + _, _, err = change.Files() + c.Assert(err, ErrorMatches, "malformed change.*") + + str := change.String() + c.Assert(str, Equals, "malformed change") +} + +type noderMock struct{ noder.Noder } + +func (s *ChangeAdaptorSuite) TestNewChangeFailsWithChangesFromOtherNoders(c *C) { + src := merkletrie.Change{ + From: newPath(noderMock{}), + To: nil, + } + _, err := newChange(src) + c.Assert(err, Not(IsNil)) + + src = merkletrie.Change{ + From: nil, + To: newPath(noderMock{}), + } + _, err = newChange(src) + c.Assert(err, Not(IsNil)) +} + +func (s *ChangeAdaptorSuite) TestChangeStringFrom(c *C) { + expected := "<Action: Delete, Path: foo>" + change := Change{} + change.From.Name = "foo" + + obtained := change.String() + c.Assert(obtained, Equals, expected) +} + +func (s *ChangeAdaptorSuite) TestChangeStringTo(c *C) { + expected := "<Action: Insert, Path: foo>" + change := Change{} + change.To.Name = "foo" + + obtained := change.String() + c.Assert(obtained, Equals, expected) +} + +func (s *ChangeAdaptorSuite) TestChangeFilesInsert(c *C) { + tree := s.tree(c, plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c")) + + change := Change{} + change.To.Name = "json/long.json" + change.To.Tree = tree + change.To.TreeEntry.Hash = plumbing.NewHash("49c6bb89b17060d7b4deacb7b338fcc6ea2352a9") + + from, to, err := change.Files() + c.Assert(err, IsNil) + c.Assert(from, IsNil) + c.Assert(to.ID(), Equals, change.To.TreeEntry.Hash) +} + +func (s *ChangeAdaptorSuite) TestChangeFilesInsertNotFound(c *C) { + tree := s.tree(c, plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c")) + + change := Change{} + change.To.Name = "json/long.json" + change.To.Tree = tree + // there is no object for this hash + change.To.TreeEntry.Hash = plumbing.NewHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") + + _, _, err := change.Files() + c.Assert(err, Not(IsNil)) +} + +func (s *ChangeAdaptorSuite) TestChangeFilesDelete(c *C) { + tree := s.tree(c, plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c")) + + change := Change{} + change.From.Name = "json/long.json" + change.From.Tree = tree + change.From.TreeEntry.Hash = plumbing.NewHash("49c6bb89b17060d7b4deacb7b338fcc6ea2352a9") + + from, to, err := change.Files() + c.Assert(err, IsNil) + c.Assert(to, IsNil) + c.Assert(from.ID(), Equals, change.From.TreeEntry.Hash) +} + +func (s *ChangeAdaptorSuite) TestChangeFilesDeleteNotFound(c *C) { + tree := s.tree(c, plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c")) + + change := Change{} + change.From.Name = "json/long.json" + change.From.Tree = tree + // there is no object for this hash + change.From.TreeEntry.Hash = plumbing.NewHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") + + _, _, err := change.Files() + c.Assert(err, Not(IsNil)) +} + +func (s *ChangeAdaptorSuite) TestChangeFilesModify(c *C) { + tree := s.tree(c, plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c")) + + change := Change{} + change.To.Name = "json/long.json" + change.To.Tree = tree + change.To.TreeEntry.Hash = plumbing.NewHash("49c6bb89b17060d7b4deacb7b338fcc6ea2352a9") + change.From.Name = "json/long.json" + change.From.Tree = tree + change.From.TreeEntry.Hash = plumbing.NewHash("9a48f23120e880dfbe41f7c9b7b708e9ee62a492") + + from, to, err := change.Files() + c.Assert(err, IsNil) + c.Assert(to.ID(), Equals, change.To.TreeEntry.Hash) + c.Assert(from.ID(), Equals, change.From.TreeEntry.Hash) +} + +func (s *ChangeAdaptorSuite) TestChangeEntryFailsWithOtherNoders(c *C) { + path := noder.Path{noderMock{}} + _, err := newChangeEntry(path) + c.Assert(err, Not(IsNil)) +} + +func (s *ChangeAdaptorSuite) TestChangeEntryFromNilIsZero(c *C) { + obtained, err := newChangeEntry(nil) + c.Assert(err, IsNil) + c.Assert(obtained, Equals, ChangeEntry{}) +} + +func (s *ChangeAdaptorSuite) TestChangeEntryFromSortPath(c *C) { + tree := &object.Tree{} + entry := object.TreeEntry{ + Name: "name", + Mode: os.FileMode(42), + Hash: plumbing.NewHash("aaaaa"), + } + path := newPath(newNoder(tree, entry)) + + obtained, err := newChangeEntry(path) + c.Assert(err, IsNil) + + c.Assert(obtained.Name, Equals, entry.Name) + c.Assert(obtained.Tree, Equals, tree) + c.Assert(obtained.TreeEntry, DeepEquals, entry) +} + +func (s *ChangeAdaptorSuite) TestChangeEntryFromLongPath(c *C) { + treeA := &object.Tree{} + entryA := object.TreeEntry{ + Name: "nameA", + Mode: os.FileMode(42), + Hash: plumbing.NewHash("aaaa"), + } + + treeB := &object.Tree{} + entryB := object.TreeEntry{ + Name: "nameB", + Mode: os.FileMode(24), + Hash: plumbing.NewHash("bbbb"), + } + + path := newPath( + newNoder(treeA, entryA), + newNoder(treeB, entryB), + ) + + obtained, err := newChangeEntry(path) + c.Assert(err, IsNil) + + c.Assert(obtained.Name, Equals, entryA.Name+"/"+entryB.Name) + c.Assert(obtained.Tree, Equals, treeB) + c.Assert(obtained.TreeEntry, Equals, entryB) +} + +func (s *ChangeAdaptorSuite) TestNewChangesEmpty(c *C) { + expected := "[]" + changes, err := newChanges(nil) + c.Assert(err, IsNil) + obtained := changes.String() + c.Assert(obtained, Equals, expected) + + expected = "[]" + changes, err = newChanges(merkletrie.Changes{}) + c.Assert(err, IsNil) + obtained = changes.String() + c.Assert(obtained, Equals, expected) +} + +func (s *ChangeAdaptorSuite) TestNewChanges(c *C) { + treeA := &object.Tree{} + entryA := object.TreeEntry{Name: "nameA"} + pathA := newPath(newNoder(treeA, entryA)) + changeA := merkletrie.Change{ + From: nil, + To: pathA, + } + + treeB := &object.Tree{} + entryB := object.TreeEntry{Name: "nameB"} + pathB := newPath(newNoder(treeB, entryB)) + changeB := merkletrie.Change{ + From: pathB, + To: nil, + } + src := merkletrie.Changes{changeA, changeB} + + changes, err := newChanges(src) + c.Assert(err, IsNil) + c.Assert(len(changes), Equals, 2) + action, err := changes[0].Action() + c.Assert(err, IsNil) + c.Assert(action, Equals, merkletrie.Insert) + c.Assert(changes[0].To.Name, Equals, "nameA") + action, err = changes[1].Action() + c.Assert(err, IsNil) + c.Assert(action, Equals, merkletrie.Delete) + c.Assert(changes[1].From.Name, Equals, "nameB") +} + +func (s *ChangeAdaptorSuite) TestNewChangesFailsWithOtherNoders(c *C) { + change := merkletrie.Change{ + From: nil, + To: newPath(noderMock{}), + } + src := merkletrie.Changes{change} + + _, err := newChanges(src) + c.Assert(err, Not(IsNil)) +} + +func (s *ChangeAdaptorSuite) TestSortChanges(c *C) { + c1 := &Change{} + c1.To.Name = "1" + + c2 := &Change{} + c2.From.Name = "2" + c2.To.Name = "2" + + c3 := &Change{} + c3.From.Name = "3" + + changes := Changes{c3, c1, c2} + sort.Sort(changes) + + c.Assert(changes[0].String(), Equals, "<Action: Insert, Path: 1>") + c.Assert(changes[1].String(), Equals, "<Action: Modify, Path: 2>") + c.Assert(changes[2].String(), Equals, "<Action: Delete, Path: 3>") +} diff --git a/plumbing/difftree/change_test.go b/plumbing/difftree/change_test.go new file mode 100644 index 0000000..cda4ff9 --- /dev/null +++ b/plumbing/difftree/change_test.go @@ -0,0 +1,300 @@ +package difftree + +import ( + "os" + "sort" + + "srcd.works/go-git.v4/plumbing" + "srcd.works/go-git.v4/plumbing/object" + "srcd.works/go-git.v4/plumbing/storer" + "srcd.works/go-git.v4/storage/filesystem" + "srcd.works/go-git.v4/utils/merkletrie" + + fixtures "github.com/src-d/go-git-fixtures" + . "gopkg.in/check.v1" +) + +type ChangeSuite struct { + fixtures.Suite + Storer storer.EncodedObjectStorer + Fixture *fixtures.Fixture +} + +func (s *ChangeSuite) SetUpSuite(c *C) { + s.Suite.SetUpSuite(c) + s.Fixture = fixtures.ByURL("https://github.com/src-d/go-git.git"). + ByTag(".git").One() + sto, err := filesystem.NewStorage(s.Fixture.DotGit()) + c.Assert(err, IsNil) + s.Storer = sto +} + +func (s *ChangeSuite) tree(c *C, h plumbing.Hash) *object.Tree { + t, err := object.GetTree(s.Storer, h) + c.Assert(err, IsNil) + return t +} + +var _ = Suite(&ChangeSuite{}) + +func (s *ChangeSuite) TestInsert(c *C) { + // Commit a5078b19f08f63e7948abd0a5e2fb7d319d3a565 of the go-git + // fixture inserted "examples/clone/main.go". + // + // On that commit, the "examples/clone" tree is + // 6efca3ff41cab651332f9ebc0c96bb26be809615 + // + // and the "examples/colone/main.go" is + // f95dc8f7923add1a8b9f72ecb1e8db1402de601a + + path := "examples/clone/main.go" + name := "main.go" + mode := os.FileMode(100644) + blob := plumbing.NewHash("f95dc8f7923add1a8b9f72ecb1e8db1402de601a") + tree := plumbing.NewHash("6efca3ff41cab651332f9ebc0c96bb26be809615") + + change := &Change{ + From: empty, + To: ChangeEntry{ + Name: path, + Tree: s.tree(c, tree), + TreeEntry: object.TreeEntry{ + Name: name, + Mode: mode, + Hash: blob, + }, + }, + } + + action, err := change.Action() + c.Assert(err, IsNil) + c.Assert(action, Equals, merkletrie.Insert) + + from, to, err := change.Files() + c.Assert(err, IsNil) + c.Assert(from, IsNil) + c.Assert(to.Name, Equals, name) + c.Assert(to.Blob.Hash, Equals, blob) + + str := change.String() + c.Assert(str, Equals, "<Action: Insert, Path: examples/clone/main.go>") +} + +func (s *ChangeSuite) TestDelete(c *C) { + // Commit f6011d65d57c2a866e231fc21a39cb618f86f9ea of the go-git + // fixture deleted "utils/difftree/difftree.go". + // + // The parent of that commit is + // 9b4a386db3d98a4362516a00ef3d04d4698c9bcd. + // + // On that parent commit, the "utils/difftree" tree is + // f3d11566401ce4b0808aab9dd6fad3d5abf1481a. + // + // and the "utils/difftree/difftree.go" is + // e2cb9a5719daf634d45a063112b4044ee81da13ea. + + path := "utils/difftree/difftree.go" + name := "difftree.go" + mode := os.FileMode(100644) + blob := plumbing.NewHash("e2cb9a5719daf634d45a063112b4044ee81da13e") + tree := plumbing.NewHash("f3d11566401ce4b0808aab9dd6fad3d5abf1481a") + + change := &Change{ + From: ChangeEntry{ + Name: path, + Tree: s.tree(c, tree), + TreeEntry: object.TreeEntry{ + Name: name, + Mode: mode, + Hash: blob, + }, + }, + To: empty, + } + + action, err := change.Action() + c.Assert(err, IsNil) + c.Assert(action, Equals, merkletrie.Delete) + + from, to, err := change.Files() + c.Assert(err, IsNil) + c.Assert(to, IsNil) + c.Assert(from.Name, Equals, name) + c.Assert(from.Blob.Hash, Equals, blob) + + str := change.String() + c.Assert(str, Equals, "<Action: Delete, Path: utils/difftree/difftree.go>") +} + +func (s *ChangeSuite) TestModify(c *C) { + // Commit 7beaad711378a4daafccc2c04bc46d36df2a0fd1 of the go-git + // fixture modified "examples/latest/latest.go". + // the "examples/latest" tree is + // b1f01b730b855c82431918cb338ad47ed558999b. + // and "examples/latest/latest.go" is blob + // 05f583ace3a9a078d8150905a53a4d82567f125f. + // + // The parent of that commit is + // 337148ef6d751477796922ac127b416b8478fcc4. + // the "examples/latest" tree is + // 8b0af31d2544acb5c4f3816a602f11418cbd126e. + // and "examples/latest/latest.go" is blob + // de927fad935d172929aacf20e71f3bf0b91dd6f9. + + path := "utils/difftree/difftree.go" + name := "difftree.go" + mode := os.FileMode(100644) + fromBlob := plumbing.NewHash("05f583ace3a9a078d8150905a53a4d82567f125f") + fromTree := plumbing.NewHash("b1f01b730b855c82431918cb338ad47ed558999b") + toBlob := plumbing.NewHash("de927fad935d172929aacf20e71f3bf0b91dd6f9") + toTree := plumbing.NewHash("8b0af31d2544acb5c4f3816a602f11418cbd126e") + + change := &Change{ + From: ChangeEntry{ + Name: path, + Tree: s.tree(c, fromTree), + TreeEntry: object.TreeEntry{ + Name: name, + Mode: mode, + Hash: fromBlob, + }, + }, + To: ChangeEntry{ + Name: path, + Tree: s.tree(c, toTree), + TreeEntry: object.TreeEntry{ + Name: name, + Mode: mode, + Hash: toBlob, + }, + }, + } + + action, err := change.Action() + c.Assert(err, IsNil) + c.Assert(action, Equals, merkletrie.Modify) + + from, to, err := change.Files() + c.Assert(err, IsNil) + + c.Assert(from.Name, Equals, name) + c.Assert(from.Blob.Hash, Equals, fromBlob) + c.Assert(to.Name, Equals, name) + c.Assert(to.Blob.Hash, Equals, toBlob) + + str := change.String() + c.Assert(str, Equals, "<Action: Modify, Path: utils/difftree/difftree.go>") +} + +func (s *ChangeSuite) TestEmptyChangeFails(c *C) { + change := &Change{} + + _, err := change.Action() + c.Assert(err, ErrorMatches, "malformed.*") + + _, _, err = change.Files() + c.Assert(err, ErrorMatches, "malformed.*") + + str := change.String() + c.Assert(str, Equals, "malformed change") +} + +func (s *ChangeSuite) TestErrorsFindingChildsAreDetected(c *C) { + // Commit 7beaad711378a4daafccc2c04bc46d36df2a0fd1 of the go-git + // fixture modified "examples/latest/latest.go". + // the "examples/latest" tree is + // b1f01b730b855c82431918cb338ad47ed558999b. + // and "examples/latest/latest.go" is blob + // 05f583ace3a9a078d8150905a53a4d82567f125f. + // + // The parent of that commit is + // 337148ef6d751477796922ac127b416b8478fcc4. + // the "examples/latest" tree is + // 8b0af31d2544acb5c4f3816a602f11418cbd126e. + // and "examples/latest/latest.go" is blob + // de927fad935d172929aacf20e71f3bf0b91dd6f9. + + path := "utils/difftree/difftree.go" + name := "difftree.go" + mode := os.FileMode(100644) + fromBlob := plumbing.NewHash("aaaa") // does not exists + fromTree := plumbing.NewHash("b1f01b730b855c82431918cb338ad47ed558999b") + toBlob := plumbing.NewHash("bbbb") // does not exists + toTree := plumbing.NewHash("8b0af31d2544acb5c4f3816a602f11418cbd126e") + + change := &Change{ + From: ChangeEntry{ + Name: path, + Tree: s.tree(c, fromTree), + TreeEntry: object.TreeEntry{ + Name: name, + Mode: mode, + Hash: fromBlob, + }, + }, + To: ChangeEntry{}, + } + + _, _, err := change.Files() + c.Assert(err, ErrorMatches, "object not found") + + change = &Change{ + From: empty, + To: ChangeEntry{ + Name: path, + Tree: s.tree(c, toTree), + TreeEntry: object.TreeEntry{ + Name: name, + Mode: mode, + Hash: toBlob, + }, + }, + } + + _, _, err = change.Files() + c.Assert(err, ErrorMatches, "object not found") +} + +func (s *ChangeSuite) TestChangesString(c *C) { + expected := "[]" + changes := Changes{} + obtained := changes.String() + c.Assert(obtained, Equals, expected) + + expected = "[<Action: Modify, Path: bla>]" + changes = make([]*Change, 1) + changes[0] = &Change{} + changes[0].From.Name = "bla" + changes[0].To.Name = "bla" + + obtained = changes.String() + c.Assert(obtained, Equals, expected) + + expected = "[<Action: Modify, Path: bla>, <Action: Delete, Path: foo/bar>]" + changes = make([]*Change, 2) + changes[0] = &Change{} + changes[0].From.Name = "bla" + changes[0].To.Name = "bla" + changes[1] = &Change{} + changes[1].From.Name = "foo/bar" + obtained = changes.String() + c.Assert(obtained, Equals, expected) +} + +func (s *ChangeSuite) TestChangesSort(c *C) { + changes := make(Changes, 3) + changes[0] = &Change{} + changes[0].From.Name = "z" + changes[0].To.Name = "z" + changes[1] = &Change{} + changes[1].From.Name = "b/b" + changes[2] = &Change{} + changes[2].To.Name = "b/a" + + expected := "[<Action: Insert, Path: b/a>, " + + "<Action: Delete, Path: b/b>, " + + "<Action: Modify, Path: z>]" + + sort.Sort(changes) + c.Assert(changes.String(), Equals, expected) +} diff --git a/plumbing/difftree/treenoder.go b/plumbing/difftree/treenoder.go new file mode 100644 index 0000000..c0ed948 --- /dev/null +++ b/plumbing/difftree/treenoder.go @@ -0,0 +1,141 @@ +package difftree + +// A treenoder is a helper type that wraps git trees into merkletrie +// noders. +// +// As a merkletrie noder doesn't understand the concept of modes (e.g. +// file permissions), the treenoder includes the mode of the git tree in +// the hash, so changes in the modes will be detected as modifications +// to the file contents by the merkletrie difftree algorithm. This is +// consistent with how the "git diff-tree" command works. +import ( + "encoding/binary" + "io" + "os" + + "srcd.works/go-git.v4/plumbing" + "srcd.works/go-git.v4/plumbing/object" + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +type treeNoder struct { + parent *object.Tree // the root node is its own parent + name string // empty string for the root node + mode os.FileMode + hash plumbing.Hash + children []noder.Noder // memoized +} + +func newTreeNoder(t *object.Tree) *treeNoder { + if t == nil { + return &treeNoder{} + } + + return &treeNoder{ + parent: t, + name: "", + mode: os.ModeDir, + hash: t.Hash, + } +} + +func (t *treeNoder) isRoot() bool { + return t.name == "" +} + +func (t *treeNoder) String() string { + return "treeNoder <" + t.name + ">" +} + +func (t *treeNoder) Hash() []byte { + return append(t.hash[:], modeToBytes(t.mode)...) +} + +// mode in little endian (endianess is an arbitrary decission). +func modeToBytes(m os.FileMode) []byte { + ret := make([]byte, 4) + binary.LittleEndian.PutUint32(ret, uint32(m)) + return ret +} + +func (t *treeNoder) Name() string { + return t.name +} + +func (t *treeNoder) IsDir() bool { + return t.mode.IsDir() +} + +// Children will return the children of a treenoder as treenoders, +// building them from the children of the wrapped git tree. +func (t *treeNoder) Children() ([]noder.Noder, error) { + if !t.mode.IsDir() { + return noder.NoChildren, nil + } + + // children are memoized for efficiency + if t.children != nil { + return t.children, nil + } + + // the parent of the returned children will be ourself as a tree if + // we are a not the root treenoder. The root is special as it + // is is own parent. + parent := t.parent + if !t.isRoot() { + var err error + if parent, err = t.parent.Tree(t.name); err != nil { + return nil, err + } + } + + return transformChildren(parent) +} + +// Returns the children of a tree as treenoders. +// Efficiency is key here. +func transformChildren(t *object.Tree) ([]noder.Noder, error) { + var err error + var e object.TreeEntry + + // there will be more tree entries than children in the tree, + // due to submodules and empty directories, but I think it is still + // worth it to pre-allocate the whole array now, even if sometimes + // is bigger than needed. + ret := make([]noder.Noder, 0, len(t.Entries)) + + walker := object.NewTreeWalker(t, false) // don't recurse + // don't defer walker.Close() for efficiency reasons. + for { + _, e, err = walker.Next() + if err == io.EOF { + break + } + if err != nil { + walker.Close() + return nil, err + } + + ret = append(ret, &treeNoder{ + parent: t, + name: e.Name, + mode: e.Mode, + hash: e.Hash, + }) + } + walker.Close() + + return ret, nil +} + +// len(t.tree.Entries) != the number of elements walked by treewalker +// for some reason because of empty directories, submodules, etc, so we +// have to walk here. +func (t *treeNoder) NumChildren() (int, error) { + children, err := t.Children() + if err != nil { + return 0, err + } + + return len(children), nil +} diff --git a/utils/merkletrie/change.go b/utils/merkletrie/change.go new file mode 100644 index 0000000..e6c99f6 --- /dev/null +++ b/utils/merkletrie/change.go @@ -0,0 +1,149 @@ +package merkletrie + +import ( + "fmt" + "io" + + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +// Action values represent the kind of things a Change can represent: +// insertion, deletions or modifications of files. +type Action int + +// The set of possible actions in a change. +const ( + _ Action = iota + Insert + Delete + Modify +) + +// String returns the action as a human readable text. +func (a Action) String() string { + switch a { + case Insert: + return "Insert" + case Delete: + return "Delete" + case Modify: + return "Modify" + default: + panic(fmt.Sprintf("unsupported action: %d", a)) + } +} + +// A Change value represent how a noder has change between to merkletries. +type Change struct { + // The noder before the change or nil if it was inserted. + From noder.Path + // The noder after the change or nil if it was deleted. + To noder.Path +} + +// Action is convenience method that returns what Action c represents. +func (c *Change) Action() (Action, error) { + if c.From == nil && c.To == nil { + return Action(0), fmt.Errorf("malformed change: nil from and to") + } + if c.From == nil { + return Insert, nil + } + if c.To == nil { + return Delete, nil + } + + return Modify, nil +} + +// NewInsert returns a new Change representing the insertion of n. +func NewInsert(n noder.Path) Change { return Change{To: n} } + +// NewDelete returns a new Change representing the deletion of n. +func NewDelete(n noder.Path) Change { return Change{From: n} } + +// NewModify returns a new Change representing that a has been modified and +// it is now b. +func NewModify(a, b noder.Path) Change { + return Change{ + From: a, + To: b, + } +} + +// String returns a single change in human readable form, using the +// format: '<' + action + space + path + '>'. The contents of the file +// before or after the change are not included in this format. +// +// Example: inserting a file at the path a/b/c.txt will return "<Insert +// a/b/c.txt>". +func (c Change) String() string { + action, err := c.Action() + if err != nil { + panic(err) + } + + var path string + if action == Delete { + path = c.From.String() + } else { + path = c.To.String() + } + + return fmt.Sprintf("<%s %s>", action, path) +} + +// Changes is a list of changes between to merkletries. +type Changes []Change + +// NewChanges returns an empty list of changes. +func NewChanges() Changes { + return Changes{} +} + +// Add adds the change c to the list of changes. +func (l *Changes) Add(c Change) { + *l = append(*l, c) +} + +// AddRecursiveInsert adds the required changes to insert all the +// file-like noders found in root, recursively. +func (l *Changes) AddRecursiveInsert(root noder.Path) error { + return l.addRecursive(root, NewInsert) +} + +// AddRecursiveDelete adds the required changes to delete all the +// file-like noders found in root, recursively. +func (l *Changes) AddRecursiveDelete(root noder.Path) error { + return l.addRecursive(root, NewDelete) +} + +type noderToChangeFn func(noder.Path) Change // NewInsert or NewDelete + +func (l *Changes) addRecursive(root noder.Path, ctor noderToChangeFn) error { + if !root.IsDir() { + l.Add(ctor(root)) + return nil + } + + i, err := NewIterFromPath(root) + if err != nil { + return err + } + + var current noder.Path + for { + if current, err = i.Step(); err != nil { + if err == io.EOF { + break + } + return err + } + if current.IsDir() { + continue + } + l.Add(ctor(current)) + } + + return nil +} diff --git a/utils/merkletrie/change_test.go b/utils/merkletrie/change_test.go new file mode 100644 index 0000000..4f908ce --- /dev/null +++ b/utils/merkletrie/change_test.go @@ -0,0 +1,76 @@ +package merkletrie_test + +import ( + "srcd.works/go-git.v4/utils/merkletrie" + "srcd.works/go-git.v4/utils/merkletrie/internal/fsnoder" + "srcd.works/go-git.v4/utils/merkletrie/noder" + + . "gopkg.in/check.v1" +) + +type ChangeSuite struct{} + +var _ = Suite(&ChangeSuite{}) + +func (s *ChangeSuite) TestActionString(c *C) { + action := merkletrie.Insert + c.Assert(action.String(), Equals, "Insert") + + action = merkletrie.Delete + c.Assert(action.String(), Equals, "Delete") + + action = merkletrie.Modify + c.Assert(action.String(), Equals, "Modify") +} + +func (s *ChangeSuite) TestUnsupportedAction(c *C) { + a := merkletrie.Action(42) + c.Assert(a.String, PanicMatches, "unsupported action.*") +} + +func (s ChangeSuite) TestNewInsert(c *C) { + tree, err := fsnoder.New("(a(b(z<>)))") + c.Assert(err, IsNil) + path := find(c, tree, "z") + change := merkletrie.NewInsert(path) + c.Assert(change.String(), Equals, "<Insert a/b/z>") + + shortPath := noder.Path([]noder.Noder{path.Last()}) + change = merkletrie.NewInsert(shortPath) + c.Assert(change.String(), Equals, "<Insert z>") +} + +func (s ChangeSuite) TestNewDelete(c *C) { + tree, err := fsnoder.New("(a(b(z<>)))") + c.Assert(err, IsNil) + path := find(c, tree, "z") + change := merkletrie.NewDelete(path) + c.Assert(change.String(), Equals, "<Delete a/b/z>") + + shortPath := noder.Path([]noder.Noder{path.Last()}) + change = merkletrie.NewDelete(shortPath) + c.Assert(change.String(), Equals, "<Delete z>") +} + +func (s ChangeSuite) TestNewModify(c *C) { + tree1, err := fsnoder.New("(a(b(z<>)))") + c.Assert(err, IsNil) + path1 := find(c, tree1, "z") + + tree2, err := fsnoder.New("(a(b(z<1>)))") + c.Assert(err, IsNil) + path2 := find(c, tree2, "z") + + change := merkletrie.NewModify(path1, path2) + c.Assert(change.String(), Equals, "<Modify a/b/z>") + + shortPath1 := noder.Path([]noder.Noder{path1.Last()}) + shortPath2 := noder.Path([]noder.Noder{path2.Last()}) + change = merkletrie.NewModify(shortPath1, shortPath2) + c.Assert(change.String(), Equals, "<Modify z>") +} + +func (s ChangeSuite) TestMalformedChange(c *C) { + change := merkletrie.Change{} + c.Assert(change.String, PanicMatches, "malformed change.*") +} diff --git a/utils/merkletrie/difftree.go b/utils/merkletrie/difftree.go new file mode 100644 index 0000000..1a626cd --- /dev/null +++ b/utils/merkletrie/difftree.go @@ -0,0 +1,404 @@ +package merkletrie + +// The focus of this difftree implementation is to save time by +// skipping whole directories if their hash is the same in both +// trees. +// +// The diff algorithm implemented here is based on the doubleiter +// type defined in this same package; we will iterate over both +// trees at the same time, while comparing the current noders in +// each iterator. Depending on how they differ we will output the +// corresponding chages and move the iterators further over both +// trees. +// +// The table bellow show all the possible comparison results, along +// with what changes should we produce and how to advance the +// iterators. +// +// The table is implemented by the switches in this function, +// diffTwoNodes, diffTwoNodesSameName and diffTwoDirs. +// +// Many Bothans died to bring us this information, make sure you +// understand the table before modifying this code. + +// # Cases +// +// When comparing noders in both trees you will found yourself in +// one of 169 possible cases, but if we ignore moves, we can +// simplify a lot the search space into the following table: +// +// - "-": nothing, no file or directory +// - a<>: an empty file named "a". +// - a<1>: a file named "a", with "1" as its contents. +// - a<2>: a file named "a", with "2" as its contents. +// - a(): an empty dir named "a". +// - a(...): a dir named "a", with some files and/or dirs inside (possibly +// empty). +// - a(;;;): a dir named "a", with some other files and/or dirs inside +// (possibly empty), which different from the ones in "a(...)". +// +// \ to - a<> a<1> a<2> a() a(...) a(;;;) +// from \ +// - 00 01 02 03 04 05 06 +// a<> 10 11 12 13 14 15 16 +// a<1> 20 21 22 23 24 25 26 +// a<2> 30 31 32 33 34 35 36 +// a() 40 41 42 43 44 45 46 +// a(...) 50 51 52 53 54 55 56 +// a(;;;) 60 61 62 63 64 65 66 +// +// Every (from, to) combination in the table is a special case, but +// some of them can be merged into some more general cases, for +// instance 11 and 22 can be merged into the general case: both +// noders are equal. +// +// Here is a full list of all the cases that are similar and how to +// merge them together into more general cases. Each general case +// is labeled wiht an uppercase letter for further reference, and it +// is followed by the pseudocode of the checks you have to perfrom +// on both noders to see if you are in such a case, the actions to +// perform (i.e. what changes to output) and how to advance the +// iterators of each tree to continue the comparison process. +// +// ## A. Impossible: 00 +// +// ## B. Same thing on both sides: 11, 22, 33, 44, 55, 66 +// - check: `SameName() && SameHash()` +// - action: do nothing. +// - advance: `FromNext(); ToNext()` +// +// ### C. To was created: 01, 02, 03, 04, 05, 06 +// - check: `DifferentName() && ToBeforeFrom()` +// - action: inserRecursively(to) +// - advance: `ToNext()` +// +// ### D. From was deleted: 10, 20, 30, 40, 50, 60 +// - check: `DifferentName() && FromBeforeTo()` +// - action: `DeleteRecursively(from)` +// - advance: `FromNext()` +// +// ### E. Empty file to file with contents: 12, 13 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// ToIsFile() && FromIsEmpty()` +// - action: `modifyFile(from, to)` +// - advance: `FromNext()` or `FromStep()` +// +// ### E'. file with contents to empty file: 21, 31 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// ToIsFile() && ToIsEmpty()` +// - action: `modifyFile(from, to)` +// - advance: `FromNext()` or `FromStep()` +// +// ### F. empty file to empty dir with the same name: 14 +// - check: `SameName() && FromIsFile() && FromIsEmpty() && +// ToIsDir() && ToIsEmpty()` +// - action: `DeleteFile(from); InsertEmptyDir(to)` +// - advance: `FromNext(); ToNext()` +// +// ### F'. empty dir to empty file of the same name: 41 +// - check: `SameName() && FromIsDir() && FromIsEmpty && +// ToIsFile() && ToIsEmpty()` +// - action: `DeleteEmptyDir(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` or step for any of them. +// +// ### G. empty file to non-empty dir of the same name: 15, 16 +// - check: `SameName() && FromIsFile() && ToIsDir() && +// FromIsEmpty() && ToIsNotEmpty()` +// - action: `DeleteFile(from); InsertDirRecursively(to)` +// - advance: `FromNext(); ToNext()` +// +// ### G'. non-empty dir to empty file of the same name: 51, 61 +// - check: `SameName() && FromIsDir() && FromIsNotEmpty() && +// ToIsFile() && FromIsEmpty()` +// - action: `DeleteDirRecursively(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` +// +// ### H. modify file contents: 23, 32 +// - check: `SameName() && FromIsFile() && ToIsFile() && +// FromIsNotEmpty() && ToIsNotEmpty()` +// - action: `ModifyFile(from, to)` +// - advance: `FromNext(); ToNext()` +// +// ### I. file with contents to empty dir: 24, 34 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// FromIsNotEmpty() && ToIsDir() && ToIsEmpty()` +// - action: `DeleteFile(from); InsertEmptyDir(to)` +// - advance: `FromNext(); ToNext()` +// +// ### I'. empty dir to file with contents: 42, 43 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsEmpty() && ToIsFile() && ToIsEmpty()` +// - action: `DeleteDir(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` +// +// ### J. file with contents to dir with contetns: 25, 26, 35, 36 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: `DeleteFile(from); InsertDirRecursively(to)` +// - advance: `FromNext(); ToNext()` +// +// ### J'. dir with contetns to file with contents: 52, 62, 53, 63 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsNotEmpty() && ToIsFile() && ToIsNotEmpty()` +// - action: `DeleteDirRecursively(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` +// +// ### K. empty dir to dir with contents: 45, 46 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: `InsertChildrenRecursively(to)` +// - advance: `FromNext(); ToNext()` +// +// ### K'. dir with contents to empty dir: 54, 64 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: `DeleteChildrenRecursively(from)` +// - advance: `FromNext(); ToNext()` +// +// ### L. dir with contents to dir with different contents: 56, 65 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: nothing +// - advance: `FromStep(); ToStep()` +// +// + +// All these cases can be further simplified by a truth table +// reduction process, in which we gather similar checks together to +// make the final code easier to read and understand. +// +// The first 6 columns are the outputs of the checks to perform on +// both noders. I have labeled them 1 to 6, this is what they mean: +// +// 1: SameName() +// 2: SameHash() +// 3: FromIsDir() +// 4: ToIsDir() +// 5: FromIsEmpty() +// 6: ToIsEmpty() +// +// The from and to columns are a fsnoder example of the elements +// that you will find on each tree under the specified comparison +// results (columns 1 to 6). +// +// The type column identifies the case we are into, from the list above. +// +// The type' column identifies the new set of reduced cases, using +// lowercase letters, and they are explained after the table. +// +// The last column is the set of actions and advances for each case. +// +// "---" means impossible except in case of hash collision. +// +// advance meaning: +// - NN: from.Next(); to.Next() +// - SS: from.Step(); to.Step() +// +// 1 2 3 4 5 6 | from | to |type|type'|action ; advance +// ------------+--------+--------+----+------------------------------------ +// 0 0 0 0 0 0 | | | | | if !SameName() { +// . | | | | | if FromBeforeTo() { +// . | | | D | d | delete(from); from.Next() +// . | | | | | } else { +// . | | | C | c | insert(to); to.Next() +// . | | | | | } +// 0 1 1 1 1 1 | | | | | } +// 1 0 0 0 0 0 | a<1> | a<2> | H | e | modify(from, to); NN +// 1 0 0 0 0 1 | a<1> | a<> | E' | e | modify(from, to); NN +// 1 0 0 0 1 0 | a<> | a<1> | E | e | modify(from, to); NN +// 1 0 0 0 1 1 | ---- | ---- | | e | +// 1 0 0 1 0 0 | a<1> | a(...) | J | f | delete(from); insert(to); NN +// 1 0 0 1 0 1 | a<1> | a() | I | f | delete(from); insert(to); NN +// 1 0 0 1 1 0 | a<> | a(...) | G | f | delete(from); insert(to); NN +// 1 0 0 1 1 1 | a<> | a() | F | f | delete(from); insert(to); NN +// 1 0 1 0 0 0 | a(...) | a<1> | J' | f | delete(from); insert(to); NN +// 1 0 1 0 0 1 | a(...) | a<> | G' | f | delete(from); insert(to); NN +// 1 0 1 0 1 0 | a() | a<1> | I' | f | delete(from); insert(to); NN +// 1 0 1 0 1 1 | a() | a<> | F' | f | delete(from); insert(to); NN +// 1 0 1 1 0 0 | a(...) | a(;;;) | L | g | nothing; SS +// 1 0 1 1 0 1 | a(...) | a() | K' | h | deleteChidren(from); NN +// 1 0 1 1 1 0 | a() | a(...) | K | i | insertChildren(to); NN +// 1 0 1 1 1 1 | ---- | ---- | | | +// 1 1 0 0 0 0 | a<1> | a<1> | B | b | nothing; NN +// 1 1 0 0 0 1 | ---- | ---- | | b | +// 1 1 0 0 1 0 | ---- | ---- | | b | +// 1 1 0 0 1 1 | a<> | a<> | B | b | nothing; NN +// 1 1 0 1 0 0 | ---- | ---- | | b | +// 1 1 0 1 0 1 | ---- | ---- | | b | +// 1 1 0 1 1 0 | ---- | ---- | | b | +// 1 1 0 1 1 1 | ---- | ---- | | b | +// 1 1 1 0 0 0 | ---- | ---- | | b | +// 1 1 1 0 0 1 | ---- | ---- | | b | +// 1 1 1 0 1 0 | ---- | ---- | | b | +// 1 1 1 0 1 1 | ---- | ---- | | b | +// 1 1 1 1 0 0 | a(...) | a(...) | B | b | nothing; NN +// 1 1 1 1 0 1 | ---- | ---- | | b | +// 1 1 1 1 1 0 | ---- | ---- | | b | +// 1 1 1 1 1 1 | a() | a() | B | b | nothing; NN +// +// c and d: +// if !SameName() +// d if FromBeforeTo() +// c else +// b: SameName) && sameHash() +// e: SameName() && !sameHash() && BothAreFiles() +// f: SameName() && !sameHash() && FileAndDir() +// g: SameName() && !sameHash() && BothAreDirs() && NoneIsEmpty +// i: SameName() && !sameHash() && BothAreDirs() && FromIsEmpty +// h: else of i + +import ( + "fmt" + "strings" + + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +// DiffTree calculates the list of changes between two merkletries. It +// uses the provided hashEqual callback to compare noders. +func DiffTree(fromTree, toTree noder.Noder, + hashEqual noder.Equal) (Changes, error) { + ret := NewChanges() + + ii, err := newDoubleIter(fromTree, toTree, hashEqual) + if err != nil { + return nil, err + } + + for { + from := ii.from.current + to := ii.to.current + + switch r := ii.remaining(); r { + case noMoreNoders: + return ret, nil + case onlyFromRemains: + if err = ret.AddRecursiveDelete(from); err != nil { + return nil, err + } + if err = ii.nextFrom(); err != nil { + return nil, err + } + case onlyToRemains: + if err = ret.AddRecursiveInsert(to); err != nil { + return nil, err + } + if err = ii.nextTo(); err != nil { + return nil, err + } + case bothHaveNodes: + if err = diffNodes(&ret, ii); err != nil { + return nil, err + } + default: + panic(fmt.Sprintf("unknown remaining value: %d", r)) + } + } +} + +func diffNodes(changes *Changes, ii *doubleIter) error { + from := ii.from.current + to := ii.to.current + var err error + + // compare their full paths as strings + switch strings.Compare(from.String(), to.String()) { + case -1: + if err = changes.AddRecursiveDelete(from); err != nil { + return err + } + if err = ii.nextFrom(); err != nil { + return err + } + case 1: + if err = changes.AddRecursiveInsert(to); err != nil { + return err + } + if err = ii.nextTo(); err != nil { + return err + } + default: + if err := diffNodesSameName(changes, ii); err != nil { + return err + } + } + + return nil +} + +func diffNodesSameName(changes *Changes, ii *doubleIter) error { + from := ii.from.current + to := ii.to.current + + status, err := ii.compare() + if err != nil { + return err + } + + switch { + case status.sameHash: + // do nothing + if err = ii.nextBoth(); err != nil { + return err + } + case status.bothAreFiles: + changes.Add(NewModify(from, to)) + if err = ii.nextBoth(); err != nil { + return err + } + case status.fileAndDir: + if err = changes.AddRecursiveDelete(from); err != nil { + return err + } + if err = changes.AddRecursiveInsert(to); err != nil { + return err + } + if err = ii.nextBoth(); err != nil { + return err + } + case status.bothAreDirs: + if err = diffDirs(changes, ii); err != nil { + return err + } + default: + return fmt.Errorf("bad status from double iterator") + } + + return nil +} + +func diffDirs(changes *Changes, ii *doubleIter) error { + from := ii.from.current + to := ii.to.current + + status, err := ii.compare() + if err != nil { + return err + } + + switch { + case status.fromIsEmptyDir: + if err = changes.AddRecursiveInsert(to); err != nil { + return err + } + if err = ii.nextBoth(); err != nil { + return err + } + case status.toIsEmptyDir: + if err = changes.AddRecursiveDelete(from); err != nil { + return err + } + if err = ii.nextBoth(); err != nil { + return err + } + case !status.fromIsEmptyDir && !status.toIsEmptyDir: + // do nothing + if err = ii.stepBoth(); err != nil { + return err + } + default: + return fmt.Errorf("both dirs are empty but has different hash") + } + + return nil +} diff --git a/utils/merkletrie/difftree_test.go b/utils/merkletrie/difftree_test.go new file mode 100644 index 0000000..6a193af --- /dev/null +++ b/utils/merkletrie/difftree_test.go @@ -0,0 +1,429 @@ +package merkletrie_test + +import ( + "bytes" + "fmt" + "reflect" + "sort" + "strings" + "testing" + "unicode" + + "srcd.works/go-git.v4/utils/merkletrie" + "srcd.works/go-git.v4/utils/merkletrie/internal/fsnoder" + + . "gopkg.in/check.v1" +) + +func Test(t *testing.T) { TestingT(t) } + +type DiffTreeSuite struct{} + +var _ = Suite(&DiffTreeSuite{}) + +type diffTreeTest struct { + from string + to string + expected string +} + +func (t diffTreeTest) innerRun(c *C, context string, reverse bool) { + comment := Commentf("\n%s", context) + if reverse { + comment = Commentf("%s [REVERSED]", comment.CheckCommentString()) + } + + a, err := fsnoder.New(t.from) + c.Assert(err, IsNil, comment) + comment = Commentf("%s\n\t from = %s", comment.CheckCommentString(), a) + + b, err := fsnoder.New(t.to) + c.Assert(err, IsNil, comment) + comment = Commentf("%s\n\t to = %s", comment.CheckCommentString(), b) + + expected, err := newChangesFromString(t.expected) + c.Assert(err, IsNil, comment) + + if reverse { + a, b = b, a + expected = expected.reverse() + } + comment = Commentf("%s\n\texpected = %s", comment.CheckCommentString(), expected) + + results, err := merkletrie.DiffTree(a, b, fsnoder.HashEqual) + c.Assert(err, IsNil, comment) + + obtained, err := newChanges(results) + c.Assert(err, IsNil, comment) + + comment = Commentf("%s\n\tobtained = %s", comment.CheckCommentString(), obtained) + + c.Assert(obtained, changesEquals, expected, comment) +} + +func (t diffTreeTest) run(c *C, context string) { + t.innerRun(c, context, false) + t.innerRun(c, context, true) +} + +type change struct { + merkletrie.Action + path string +} + +func (c change) String() string { + return fmt.Sprintf("<%s %s>", c.Action, c.path) +} + +func (c change) reverse() change { + ret := change{ + path: c.path, + } + + switch c.Action { + case merkletrie.Insert: + ret.Action = merkletrie.Delete + case merkletrie.Delete: + ret.Action = merkletrie.Insert + case merkletrie.Modify: + ret.Action = merkletrie.Modify + default: + panic(fmt.Sprintf("unknown action type: %d", c.Action)) + } + + return ret +} + +type changes []change + +func newChanges(original merkletrie.Changes) (changes, error) { + ret := make(changes, len(original)) + for i, c := range original { + action, err := c.Action() + if err != nil { + return nil, err + } + switch action { + case merkletrie.Insert: + ret[i] = change{ + Action: merkletrie.Insert, + path: c.To.String(), + } + case merkletrie.Delete: + ret[i] = change{ + Action: merkletrie.Delete, + path: c.From.String(), + } + case merkletrie.Modify: + ret[i] = change{ + Action: merkletrie.Modify, + path: c.From.String(), + } + default: + panic(fmt.Sprintf("unsupported action %d", c.Action)) + } + } + + return ret, nil +} + +func newChangesFromString(s string) (changes, error) { + ret := make([]change, 0) + + s = strings.TrimSpace(s) + s = removeDuplicatedSpace(s) + s = turnSpaceIntoLiteralSpace(s) + + if s == "" { + return ret, nil + } + + for _, chunk := range strings.Split(s, " ") { + change := change{ + path: string(chunk[1:]), + } + + switch chunk[0] { + case '+': + change.Action = merkletrie.Insert + case '-': + change.Action = merkletrie.Delete + case '*': + change.Action = merkletrie.Modify + default: + panic(fmt.Sprintf("unsupported action descriptor %q", chunk[0])) + } + + ret = append(ret, change) + } + + return ret, nil +} + +func removeDuplicatedSpace(s string) string { + var buf bytes.Buffer + + var lastWasSpace, currentIsSpace bool + for _, r := range s { + currentIsSpace = unicode.IsSpace(r) + + if lastWasSpace && currentIsSpace { + continue + } + lastWasSpace = currentIsSpace + + buf.WriteRune(r) + } + + return buf.String() +} + +func turnSpaceIntoLiteralSpace(s string) string { + return strings.Map( + func(r rune) rune { + if unicode.IsSpace(r) { + return ' ' + } + return r + }, s) +} + +func (cc changes) Len() int { return len(cc) } +func (cc changes) Swap(i, j int) { cc[i], cc[j] = cc[j], cc[i] } +func (cc changes) Less(i, j int) bool { return strings.Compare(cc[i].String(), cc[j].String()) < 0 } + +func (cc changes) equals(other changes) bool { + sort.Sort(cc) + sort.Sort(other) + return reflect.DeepEqual(cc, other) +} + +func (cc changes) String() string { + var buf bytes.Buffer + fmt.Fprintf(&buf, "len(%d) [", len(cc)) + sep := "" + for _, c := range cc { + fmt.Fprintf(&buf, "%s%s", sep, c) + sep = ", " + } + buf.WriteByte(']') + return buf.String() +} + +func (cc changes) reverse() changes { + ret := make(changes, len(cc)) + for i, c := range cc { + ret[i] = c.reverse() + } + + return ret +} + +type changesEqualsChecker struct { + *CheckerInfo +} + +var changesEquals Checker = &changesEqualsChecker{ + &CheckerInfo{Name: "changesEquals", Params: []string{"obtained", "expected"}}, +} + +func (checker *changesEqualsChecker) Check(params []interface{}, names []string) (result bool, error string) { + a, ok := params[0].(changes) + if !ok { + return false, "first parameter must be a changes" + } + b, ok := params[1].(changes) + if !ok { + return false, "second parameter must be a changes" + } + + return a.equals(b), "" +} + +func do(c *C, list []diffTreeTest) { + for i, t := range list { + t.run(c, fmt.Sprintf("test #%d:", i)) + } +} + +func (s *DiffTreeSuite) TestEmptyVsEmpty(c *C) { + do(c, []diffTreeTest{ + {"()", "()", ""}, + {"A()", "A()", ""}, + {"A()", "()", ""}, + {"A()", "B()", ""}, + }) +} + +func (s *DiffTreeSuite) TestBasicCases(c *C) { + do(c, []diffTreeTest{ + {"()", "()", ""}, + {"()", "(a<>)", "+a"}, + {"()", "(a<1>)", "+a"}, + {"()", "(a())", ""}, + {"()", "(a(b()))", ""}, + {"()", "(a(b<>))", "+a/b"}, + {"()", "(a(b<1>))", "+a/b"}, + {"(a<>)", "(a<>)", ""}, + {"(a<>)", "(a<1>)", "*a"}, + {"(a<>)", "(a())", "-a"}, + {"(a<>)", "(a(b()))", "-a"}, + {"(a<>)", "(a(b<>))", "-a +a/b"}, + {"(a<>)", "(a(b<1>))", "-a +a/b"}, + {"(a<>)", "(c())", "-a"}, + {"(a<>)", "(c(b()))", "-a"}, + {"(a<>)", "(c(b<>))", "-a +c/b"}, + {"(a<>)", "(c(b<1>))", "-a +c/b"}, + {"(a<>)", "(c(a()))", "-a"}, + {"(a<>)", "(c(a<>))", "-a +c/a"}, + {"(a<>)", "(c(a<1>))", "-a +c/a"}, + {"(a<1>)", "(a<1>)", ""}, + {"(a<1>)", "(a<2>)", "*a"}, + {"(a<1>)", "(b<1>)", "-a +b"}, + {"(a<1>)", "(b<2>)", "-a +b"}, + {"(a<1>)", "(a())", "-a"}, + {"(a<1>)", "(a(b()))", "-a"}, + {"(a<1>)", "(a(b<>))", "-a +a/b"}, + {"(a<1>)", "(a(b<1>))", "-a +a/b"}, + {"(a<1>)", "(a(b<2>))", "-a +a/b"}, + {"(a<1>)", "(c())", "-a"}, + {"(a<1>)", "(c(b()))", "-a"}, + {"(a<1>)", "(c(b<>))", "-a +c/b"}, + {"(a<1>)", "(c(b<1>))", "-a +c/b"}, + {"(a<1>)", "(c(b<2>))", "-a +c/b"}, + {"(a<1>)", "(c())", "-a"}, + {"(a<1>)", "(c(a()))", "-a"}, + {"(a<1>)", "(c(a<>))", "-a +c/a"}, + {"(a<1>)", "(c(a<1>))", "-a +c/a"}, + {"(a<1>)", "(c(a<2>))", "-a +c/a"}, + {"(a())", "(a())", ""}, + {"(a())", "(b())", ""}, + {"(a())", "(a(b()))", ""}, + {"(a())", "(b(a()))", ""}, + {"(a())", "(a(b<>))", "+a/b"}, + {"(a())", "(a(b<1>))", "+a/b"}, + {"(a())", "(b(a<>))", "+b/a"}, + {"(a())", "(b(a<1>))", "+b/a"}, + }) +} + +func (s *DiffTreeSuite) TestHorizontals(c *C) { + do(c, []diffTreeTest{ + {"()", "(a<> b<>)", "+a +b"}, + {"()", "(a<> b<1>)", "+a +b"}, + {"()", "(a<> b())", "+a"}, + {"()", "(a() b<>)", "+b"}, + {"()", "(a<1> b<>)", "+a +b"}, + {"()", "(a<1> b<1>)", "+a +b"}, + {"()", "(a<1> b<2>)", "+a +b"}, + {"()", "(a<1> b())", "+a"}, + {"()", "(a() b<1>)", "+b"}, + {"()", "(a() b())", ""}, + {"()", "(a<> b<> c<> d<>)", "+a +b +c +d"}, + {"()", "(a<> b<1> c() d<> e<2> f())", "+a +b +d +e"}, + }) +} + +func (s *DiffTreeSuite) TestVerticals(c *C) { + do(c, []diffTreeTest{ + {"()", "(z<>)", "+z"}, + {"()", "(a(z<>))", "+a/z"}, + {"()", "(a(b(z<>)))", "+a/b/z"}, + {"()", "(a(b(c(z<>))))", "+a/b/c/z"}, + {"()", "(a(b(c(d(z<>)))))", "+a/b/c/d/z"}, + {"()", "(a(b(c(d(z<1>)))))", "+a/b/c/d/z"}, + }) +} + +func (s *DiffTreeSuite) TestSingleInserts(c *C) { + do(c, []diffTreeTest{ + {"()", "(z<>)", "+z"}, + {"(a())", "(a(z<>))", "+a/z"}, + {"(a())", "(a(b(z<>)))", "+a/b/z"}, + {"(a(b(c())))", "(a(b(c(z<>))))", "+a/b/c/z"}, + {"(a<> b<> c<>)", "(a<> b<> c<> z<>)", "+z"}, + {"(a(b<> c<> d<>))", "(a(b<> c<> d<> z<>))", "+a/z"}, + {"(a(b(c<> d<> e<>)))", "(a(b(c<> d<> e<> z<>)))", "+a/b/z"}, + {"(a(b<>) f<>)", "(a(b<>) f<> z<>)", "+z"}, + {"(a(b<>) f<>)", "(a(b<> z<>) f<>)", "+a/z"}, + }) +} + +func (s *DiffTreeSuite) TestDebug(c *C) { + do(c, []diffTreeTest{ + {"(a(b<>) f<>)", "(a(b<> z<>) f<>)", "+a/z"}, + }) +} + +// root +// / | \ +// / | ---- +// f d h -------- +// /\ / \ | +// e a j b/ g +// | / \ | +// l n k icm +// | +// o +// | +// p/ +func (s *DiffTreeSuite) TestCrazy(c *C) { + crazy := "(f(e(l<1>) a(n(o(p())) k<1>)) d<1> h(j(i<1> c<2> m<>) b() g<>))" + do(c, []diffTreeTest{ + { + crazy, + "()", + "-d -f/e/l -f/a/k -h/j/i -h/j/c -h/j/m -h/g", + }, { + crazy, + crazy, + "", + }, { + crazy, + "(d<1>)", + "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m -h/g", + }, { + crazy, + "(d<1> h(b() g<>))", + "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m", + }, { + crazy, + "(d<1> f(e(l()) a()) h(b() g<>))", + "-f/e/l -f/a/k -h/j/i -h/j/c -h/j/m", + }, { + crazy, + "(d<1> f(e(l<1>) a()) h(b() g<>))", + "-f/a/k -h/j/i -h/j/c -h/j/m", + }, { + crazy, + "(d<2> f(e(l<2>) a(s(t<1>))) h(b() g<> r<> j(i<> c<3> m<>)))", + "+f/a/s/t +h/r -f/a/k *d *f/e/l *h/j/c *h/j/i", + }, { + crazy, + "(f(e(l<2>) a(n(o(p<1>)) k<>)) h(j(i<1> c<2> m<>) b() g<>))", + "*f/e/l +f/a/n/o/p *f/a/k -d", + }, { + crazy, + "(f(e(l<1>) a(n(o(p(r<1>))) k<1>)) d<1> h(j(i<1> c<2> b() m<>) g<1>))", + "+f/a/n/o/p/r *h/g", + }, + }) +} + +func (s *DiffTreeSuite) TestSameNames(c *C) { + do(c, []diffTreeTest{ + { + "(a(a(a<>)))", + "(a(a(a<1>)))", + "*a/a/a", + }, { + "(a(b(a<>)))", + "(a(b(a<>)) b(a<>))", + "+b/a", + }, { + "(a(b(a<>)))", + "(a(b()) b(a<>))", + "-a/b/a +b/a", + }, + }) +} diff --git a/utils/merkletrie/doubleiter.go b/utils/merkletrie/doubleiter.go new file mode 100644 index 0000000..6f5e538 --- /dev/null +++ b/utils/merkletrie/doubleiter.go @@ -0,0 +1,187 @@ +package merkletrie + +import ( + "fmt" + "io" + + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +// A doubleIter is a convenience type to keep track of the current +// noders in two merkletries that are going to be iterated in parallel. +// It has methods for: +// +// - iterating over the merkletries, both at the same time or +// individually: nextFrom, nextTo, nextBoth, stepBoth +// +// - checking if there are noders left in one or both of them with the +// remaining method and its associated returned type. +// +// - comparing the current noders of both merkletries in several ways, +// with the compare method and its associated returned type. +type doubleIter struct { + from struct { + iter *Iter + current noder.Path // nil if no more nodes + } + to struct { + iter *Iter + current noder.Path // nil if no more nodes + } + hashEqual noder.Equal +} + +// NewdoubleIter returns a new doubleIter for the merkletries "from" and +// "to". The hashEqual callback function will be used by the doubleIter +// to compare the hash of the noders in the merkletries. The doubleIter +// will be initialized to the first elements in each merkletrie if any. +func newDoubleIter(from, to noder.Noder, hashEqual noder.Equal) ( + *doubleIter, error) { + var ii doubleIter + var err error + + if ii.from.iter, err = NewIter(from); err != nil { + return nil, fmt.Errorf("from: %s", err) + } + if ii.from.current, err = ii.from.iter.Next(); turnEOFIntoNil(err) != nil { + return nil, fmt.Errorf("from: %s", err) + } + + if ii.to.iter, err = NewIter(to); err != nil { + return nil, fmt.Errorf("to: %s", err) + } + if ii.to.current, err = ii.to.iter.Next(); turnEOFIntoNil(err) != nil { + return nil, fmt.Errorf("to: %s", err) + } + + ii.hashEqual = hashEqual + + return &ii, nil +} + +func turnEOFIntoNil(e error) error { + if e != nil && e != io.EOF { + return e + } + return nil +} + +// NextBoth makes d advance to the next noder in both merkletries. If +// any of them is a directory, it skips its contents. +func (d *doubleIter) nextBoth() error { + if err := d.nextFrom(); err != nil { + return err + } + if err := d.nextTo(); err != nil { + return err + } + + return nil +} + +// NextFrom makes d advance to the next noder in the "from" merkletrie, +// skipping its contents if it is a directory. +func (d *doubleIter) nextFrom() (err error) { + d.from.current, err = d.from.iter.Next() + return turnEOFIntoNil(err) +} + +// NextTo makes d advance to the next noder in the "to" merkletrie, +// skipping its contents if it is a directory. +func (d *doubleIter) nextTo() (err error) { + d.to.current, err = d.to.iter.Next() + return turnEOFIntoNil(err) +} + +// StepBoth makes d advance to the next noder in both merkletries, +// getting deeper into directories if that is the case. +func (d *doubleIter) stepBoth() (err error) { + if d.from.current, err = d.from.iter.Step(); turnEOFIntoNil(err) != nil { + return err + } + if d.to.current, err = d.to.iter.Step(); turnEOFIntoNil(err) != nil { + return err + } + return nil +} + +// Remaining returns if there are no more noders in the tree, if both +// have noders or if one of them doesn't. +func (d *doubleIter) remaining() remaining { + if d.from.current == nil && d.to.current == nil { + return noMoreNoders + } + + if d.from.current == nil && d.to.current != nil { + return onlyToRemains + } + + if d.from.current != nil && d.to.current == nil { + return onlyFromRemains + } + + return bothHaveNodes +} + +// Remaining values tells you whether both trees still have noders, or +// only one of them or none of them. +type remaining int + +const ( + noMoreNoders remaining = iota + onlyToRemains + onlyFromRemains + bothHaveNodes +) + +// Compare returns the comparison between the current elements in the +// merkletries. +func (d *doubleIter) compare() (s comparison, err error) { + s.sameHash = d.hashEqual(d.from.current, d.to.current) + + fromIsDir := d.from.current.IsDir() + toIsDir := d.to.current.IsDir() + + s.bothAreDirs = fromIsDir && toIsDir + s.bothAreFiles = !fromIsDir && !toIsDir + s.fileAndDir = !s.bothAreDirs && !s.bothAreFiles + + fromNumChildren, err := d.from.current.NumChildren() + if err != nil { + return comparison{}, fmt.Errorf("from: %s", err) + } + + toNumChildren, err := d.to.current.NumChildren() + if err != nil { + return comparison{}, fmt.Errorf("to: %s", err) + } + + s.fromIsEmptyDir = fromIsDir && fromNumChildren == 0 + s.toIsEmptyDir = toIsDir && toNumChildren == 0 + + return +} + +// Answers to a lot of questions you can ask about how to noders are +// equal or different. +type comparison struct { + // the following are only valid if both nodes have the same name + // (i.e. nameComparison == 0) + + // Do both nodes have the same hash? + sameHash bool + // Are both nodes files? + bothAreFiles bool + + // the following are only valid if any of the noders are dirs, + // this is, if !bothAreFiles + + // Is one a file and the other a dir? + fileAndDir bool + // Are both nodes dirs? + bothAreDirs bool + // Is the from node an empty dir? + fromIsEmptyDir bool + // Is the to Node an empty dir? + toIsEmptyDir bool +} |