From 1fdfe887278bf1e6039215fa5f48e5b497c20bee Mon Sep 17 00:00:00 2001 From: Alberto Cortés Date: Mon, 13 Feb 2017 10:28:39 +0100 Subject: add difftree for noders (#262) difftree for noders --- utils/merkletrie/difftree.go | 404 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 404 insertions(+) create mode 100644 utils/merkletrie/difftree.go (limited to 'utils/merkletrie/difftree.go') diff --git a/utils/merkletrie/difftree.go b/utils/merkletrie/difftree.go new file mode 100644 index 0000000..1a626cd --- /dev/null +++ b/utils/merkletrie/difftree.go @@ -0,0 +1,404 @@ +package merkletrie + +// The focus of this difftree implementation is to save time by +// skipping whole directories if their hash is the same in both +// trees. +// +// The diff algorithm implemented here is based on the doubleiter +// type defined in this same package; we will iterate over both +// trees at the same time, while comparing the current noders in +// each iterator. Depending on how they differ we will output the +// corresponding chages and move the iterators further over both +// trees. +// +// The table bellow show all the possible comparison results, along +// with what changes should we produce and how to advance the +// iterators. +// +// The table is implemented by the switches in this function, +// diffTwoNodes, diffTwoNodesSameName and diffTwoDirs. +// +// Many Bothans died to bring us this information, make sure you +// understand the table before modifying this code. + +// # Cases +// +// When comparing noders in both trees you will found yourself in +// one of 169 possible cases, but if we ignore moves, we can +// simplify a lot the search space into the following table: +// +// - "-": nothing, no file or directory +// - a<>: an empty file named "a". +// - a<1>: a file named "a", with "1" as its contents. +// - a<2>: a file named "a", with "2" as its contents. +// - a(): an empty dir named "a". +// - a(...): a dir named "a", with some files and/or dirs inside (possibly +// empty). +// - a(;;;): a dir named "a", with some other files and/or dirs inside +// (possibly empty), which different from the ones in "a(...)". +// +// \ to - a<> a<1> a<2> a() a(...) a(;;;) +// from \ +// - 00 01 02 03 04 05 06 +// a<> 10 11 12 13 14 15 16 +// a<1> 20 21 22 23 24 25 26 +// a<2> 30 31 32 33 34 35 36 +// a() 40 41 42 43 44 45 46 +// a(...) 50 51 52 53 54 55 56 +// a(;;;) 60 61 62 63 64 65 66 +// +// Every (from, to) combination in the table is a special case, but +// some of them can be merged into some more general cases, for +// instance 11 and 22 can be merged into the general case: both +// noders are equal. +// +// Here is a full list of all the cases that are similar and how to +// merge them together into more general cases. Each general case +// is labeled wiht an uppercase letter for further reference, and it +// is followed by the pseudocode of the checks you have to perfrom +// on both noders to see if you are in such a case, the actions to +// perform (i.e. what changes to output) and how to advance the +// iterators of each tree to continue the comparison process. +// +// ## A. Impossible: 00 +// +// ## B. Same thing on both sides: 11, 22, 33, 44, 55, 66 +// - check: `SameName() && SameHash()` +// - action: do nothing. +// - advance: `FromNext(); ToNext()` +// +// ### C. To was created: 01, 02, 03, 04, 05, 06 +// - check: `DifferentName() && ToBeforeFrom()` +// - action: inserRecursively(to) +// - advance: `ToNext()` +// +// ### D. From was deleted: 10, 20, 30, 40, 50, 60 +// - check: `DifferentName() && FromBeforeTo()` +// - action: `DeleteRecursively(from)` +// - advance: `FromNext()` +// +// ### E. Empty file to file with contents: 12, 13 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// ToIsFile() && FromIsEmpty()` +// - action: `modifyFile(from, to)` +// - advance: `FromNext()` or `FromStep()` +// +// ### E'. file with contents to empty file: 21, 31 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// ToIsFile() && ToIsEmpty()` +// - action: `modifyFile(from, to)` +// - advance: `FromNext()` or `FromStep()` +// +// ### F. empty file to empty dir with the same name: 14 +// - check: `SameName() && FromIsFile() && FromIsEmpty() && +// ToIsDir() && ToIsEmpty()` +// - action: `DeleteFile(from); InsertEmptyDir(to)` +// - advance: `FromNext(); ToNext()` +// +// ### F'. empty dir to empty file of the same name: 41 +// - check: `SameName() && FromIsDir() && FromIsEmpty && +// ToIsFile() && ToIsEmpty()` +// - action: `DeleteEmptyDir(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` or step for any of them. +// +// ### G. empty file to non-empty dir of the same name: 15, 16 +// - check: `SameName() && FromIsFile() && ToIsDir() && +// FromIsEmpty() && ToIsNotEmpty()` +// - action: `DeleteFile(from); InsertDirRecursively(to)` +// - advance: `FromNext(); ToNext()` +// +// ### G'. non-empty dir to empty file of the same name: 51, 61 +// - check: `SameName() && FromIsDir() && FromIsNotEmpty() && +// ToIsFile() && FromIsEmpty()` +// - action: `DeleteDirRecursively(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` +// +// ### H. modify file contents: 23, 32 +// - check: `SameName() && FromIsFile() && ToIsFile() && +// FromIsNotEmpty() && ToIsNotEmpty()` +// - action: `ModifyFile(from, to)` +// - advance: `FromNext(); ToNext()` +// +// ### I. file with contents to empty dir: 24, 34 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// FromIsNotEmpty() && ToIsDir() && ToIsEmpty()` +// - action: `DeleteFile(from); InsertEmptyDir(to)` +// - advance: `FromNext(); ToNext()` +// +// ### I'. empty dir to file with contents: 42, 43 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsEmpty() && ToIsFile() && ToIsEmpty()` +// - action: `DeleteDir(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` +// +// ### J. file with contents to dir with contetns: 25, 26, 35, 36 +// - check: `SameName() && DifferentHash() && FromIsFile() && +// FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: `DeleteFile(from); InsertDirRecursively(to)` +// - advance: `FromNext(); ToNext()` +// +// ### J'. dir with contetns to file with contents: 52, 62, 53, 63 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsNotEmpty() && ToIsFile() && ToIsNotEmpty()` +// - action: `DeleteDirRecursively(from); InsertFile(to)` +// - advance: `FromNext(); ToNext()` +// +// ### K. empty dir to dir with contents: 45, 46 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: `InsertChildrenRecursively(to)` +// - advance: `FromNext(); ToNext()` +// +// ### K'. dir with contents to empty dir: 54, 64 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: `DeleteChildrenRecursively(from)` +// - advance: `FromNext(); ToNext()` +// +// ### L. dir with contents to dir with different contents: 56, 65 +// - check: `SameName() && DifferentHash() && FromIsDir() && +// FromIsNotEmpty() && ToIsDir() && ToIsNotEmpty()` +// - action: nothing +// - advance: `FromStep(); ToStep()` +// +// + +// All these cases can be further simplified by a truth table +// reduction process, in which we gather similar checks together to +// make the final code easier to read and understand. +// +// The first 6 columns are the outputs of the checks to perform on +// both noders. I have labeled them 1 to 6, this is what they mean: +// +// 1: SameName() +// 2: SameHash() +// 3: FromIsDir() +// 4: ToIsDir() +// 5: FromIsEmpty() +// 6: ToIsEmpty() +// +// The from and to columns are a fsnoder example of the elements +// that you will find on each tree under the specified comparison +// results (columns 1 to 6). +// +// The type column identifies the case we are into, from the list above. +// +// The type' column identifies the new set of reduced cases, using +// lowercase letters, and they are explained after the table. +// +// The last column is the set of actions and advances for each case. +// +// "---" means impossible except in case of hash collision. +// +// advance meaning: +// - NN: from.Next(); to.Next() +// - SS: from.Step(); to.Step() +// +// 1 2 3 4 5 6 | from | to |type|type'|action ; advance +// ------------+--------+--------+----+------------------------------------ +// 0 0 0 0 0 0 | | | | | if !SameName() { +// . | | | | | if FromBeforeTo() { +// . | | | D | d | delete(from); from.Next() +// . | | | | | } else { +// . | | | C | c | insert(to); to.Next() +// . | | | | | } +// 0 1 1 1 1 1 | | | | | } +// 1 0 0 0 0 0 | a<1> | a<2> | H | e | modify(from, to); NN +// 1 0 0 0 0 1 | a<1> | a<> | E' | e | modify(from, to); NN +// 1 0 0 0 1 0 | a<> | a<1> | E | e | modify(from, to); NN +// 1 0 0 0 1 1 | ---- | ---- | | e | +// 1 0 0 1 0 0 | a<1> | a(...) | J | f | delete(from); insert(to); NN +// 1 0 0 1 0 1 | a<1> | a() | I | f | delete(from); insert(to); NN +// 1 0 0 1 1 0 | a<> | a(...) | G | f | delete(from); insert(to); NN +// 1 0 0 1 1 1 | a<> | a() | F | f | delete(from); insert(to); NN +// 1 0 1 0 0 0 | a(...) | a<1> | J' | f | delete(from); insert(to); NN +// 1 0 1 0 0 1 | a(...) | a<> | G' | f | delete(from); insert(to); NN +// 1 0 1 0 1 0 | a() | a<1> | I' | f | delete(from); insert(to); NN +// 1 0 1 0 1 1 | a() | a<> | F' | f | delete(from); insert(to); NN +// 1 0 1 1 0 0 | a(...) | a(;;;) | L | g | nothing; SS +// 1 0 1 1 0 1 | a(...) | a() | K' | h | deleteChidren(from); NN +// 1 0 1 1 1 0 | a() | a(...) | K | i | insertChildren(to); NN +// 1 0 1 1 1 1 | ---- | ---- | | | +// 1 1 0 0 0 0 | a<1> | a<1> | B | b | nothing; NN +// 1 1 0 0 0 1 | ---- | ---- | | b | +// 1 1 0 0 1 0 | ---- | ---- | | b | +// 1 1 0 0 1 1 | a<> | a<> | B | b | nothing; NN +// 1 1 0 1 0 0 | ---- | ---- | | b | +// 1 1 0 1 0 1 | ---- | ---- | | b | +// 1 1 0 1 1 0 | ---- | ---- | | b | +// 1 1 0 1 1 1 | ---- | ---- | | b | +// 1 1 1 0 0 0 | ---- | ---- | | b | +// 1 1 1 0 0 1 | ---- | ---- | | b | +// 1 1 1 0 1 0 | ---- | ---- | | b | +// 1 1 1 0 1 1 | ---- | ---- | | b | +// 1 1 1 1 0 0 | a(...) | a(...) | B | b | nothing; NN +// 1 1 1 1 0 1 | ---- | ---- | | b | +// 1 1 1 1 1 0 | ---- | ---- | | b | +// 1 1 1 1 1 1 | a() | a() | B | b | nothing; NN +// +// c and d: +// if !SameName() +// d if FromBeforeTo() +// c else +// b: SameName) && sameHash() +// e: SameName() && !sameHash() && BothAreFiles() +// f: SameName() && !sameHash() && FileAndDir() +// g: SameName() && !sameHash() && BothAreDirs() && NoneIsEmpty +// i: SameName() && !sameHash() && BothAreDirs() && FromIsEmpty +// h: else of i + +import ( + "fmt" + "strings" + + "srcd.works/go-git.v4/utils/merkletrie/noder" +) + +// DiffTree calculates the list of changes between two merkletries. It +// uses the provided hashEqual callback to compare noders. +func DiffTree(fromTree, toTree noder.Noder, + hashEqual noder.Equal) (Changes, error) { + ret := NewChanges() + + ii, err := newDoubleIter(fromTree, toTree, hashEqual) + if err != nil { + return nil, err + } + + for { + from := ii.from.current + to := ii.to.current + + switch r := ii.remaining(); r { + case noMoreNoders: + return ret, nil + case onlyFromRemains: + if err = ret.AddRecursiveDelete(from); err != nil { + return nil, err + } + if err = ii.nextFrom(); err != nil { + return nil, err + } + case onlyToRemains: + if err = ret.AddRecursiveInsert(to); err != nil { + return nil, err + } + if err = ii.nextTo(); err != nil { + return nil, err + } + case bothHaveNodes: + if err = diffNodes(&ret, ii); err != nil { + return nil, err + } + default: + panic(fmt.Sprintf("unknown remaining value: %d", r)) + } + } +} + +func diffNodes(changes *Changes, ii *doubleIter) error { + from := ii.from.current + to := ii.to.current + var err error + + // compare their full paths as strings + switch strings.Compare(from.String(), to.String()) { + case -1: + if err = changes.AddRecursiveDelete(from); err != nil { + return err + } + if err = ii.nextFrom(); err != nil { + return err + } + case 1: + if err = changes.AddRecursiveInsert(to); err != nil { + return err + } + if err = ii.nextTo(); err != nil { + return err + } + default: + if err := diffNodesSameName(changes, ii); err != nil { + return err + } + } + + return nil +} + +func diffNodesSameName(changes *Changes, ii *doubleIter) error { + from := ii.from.current + to := ii.to.current + + status, err := ii.compare() + if err != nil { + return err + } + + switch { + case status.sameHash: + // do nothing + if err = ii.nextBoth(); err != nil { + return err + } + case status.bothAreFiles: + changes.Add(NewModify(from, to)) + if err = ii.nextBoth(); err != nil { + return err + } + case status.fileAndDir: + if err = changes.AddRecursiveDelete(from); err != nil { + return err + } + if err = changes.AddRecursiveInsert(to); err != nil { + return err + } + if err = ii.nextBoth(); err != nil { + return err + } + case status.bothAreDirs: + if err = diffDirs(changes, ii); err != nil { + return err + } + default: + return fmt.Errorf("bad status from double iterator") + } + + return nil +} + +func diffDirs(changes *Changes, ii *doubleIter) error { + from := ii.from.current + to := ii.to.current + + status, err := ii.compare() + if err != nil { + return err + } + + switch { + case status.fromIsEmptyDir: + if err = changes.AddRecursiveInsert(to); err != nil { + return err + } + if err = ii.nextBoth(); err != nil { + return err + } + case status.toIsEmptyDir: + if err = changes.AddRecursiveDelete(from); err != nil { + return err + } + if err = ii.nextBoth(); err != nil { + return err + } + case !status.fromIsEmptyDir && !status.toIsEmptyDir: + // do nothing + if err = ii.stepBoth(); err != nil { + return err + } + default: + return fmt.Errorf("both dirs are empty but has different hash") + } + + return nil +} -- cgit