5 files changed, 3065 insertions, 0 deletions
diff --git a/XDiff.py b/XDiff.py
new file mode 100644
index 0000000..ad0e93c
--- /dev/null
+++ b/XDiff.py
@@ -0,0 +1,1996 @@
+#!/usr/bin/env python
+"""
+java XDiff [-o|-g] [-p percent] [-e encoding] xml_file1 xml_file2 diff_result
+Options:
+  The default setting is "-o -p 0.3 -e UTF8"
+  -o    The optimal mode, to get the minimum editing distance.
+  -g    The greedy mode, to find a difference quickly.
+  -p    The maximum change percentage allowed.
+        Default value: 1.0 for -o mode; 0.3 for -g mode.
+  -e    The encoding of the output file.
+        Default value: UTF8.
+"""
+# Copyright (c) 2001 - 2005
+#     Yuan Wang. All rights reserved.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. Redistributions in any form must be accompanied by information on
+# how to obtain complete source code for the X-Diff software and any
+# accompanying software that uses the X-Diff software.  The source code
+# must either be included in the distribution or be available for no
+# more than the cost of distribution plus a nominal fee, and must be
+# freely redistributable under reasonable conditions.  For an executable
+# file, complete source code means the source code for all modules it
+# contains.  It does not include source code for modules or files that
+# typically accompany the major components of the operating system on
+# which the executable file runs.
+
+# THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+# ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+#import java.io.BufferedReader
+#import java.io.FileReader
+#import java.io.FileOutputStream
+#import java.io.OutputStreamWriter
+#import java.io.IOException
+#import java.util.Random
+#import java.util.Vector
+import sys, time, codecs
+import XTree, XLut
+from XParser import XParser
+
+# <code>XDiff</code> computes the difference of two input XML documents.
+
+_CIRCUIT_SIZE = 2048
+_MATRIX_SIZE = 1024
+_ATTRIBUTE_SIZE = 1024
+_TEXT_SIZE = 1024
+
+
+class XDiff:
+    _oFlag = False
+    _NO_MATCH_THRESHOLD = 0.3
+    _sampleCount = 3
+    _DEBUG = False
+    _encoding = "UTF8"
+
+#    self._xtree1, self._xtree2
+#    private XLut    _xlut
+#    private _leastCostMatrix[][], self._pathMatrix[][], self._circuit[]
+#
+#    private _attrList1[], _attrList2[], _textList1[], _textList2[]
+#    private boolean _attrMatch[], _textMatch1[], _textMatch2[]
+#    private long    _attrHash[], _textHash[]
+#    private String    _attrTag[]
+#
+#    private self._matchp[]
+#    private boolean    self._needNewLine
+
+
+
+    # Constructor
+    # @param    input1        input file #1
+    # @param    input2        input file #2
+    # @param    output        output file
+
+    def __init__(self, input1, input2, output):
+        # Parse input files
+        parser = XParser()
+        t0 = time.time()
+        self._xtree1 = parser.parse(input1)
+        t1 = time.time()
+        parser = XParser()
+        self._xtree2 = parser.parse(input2)
+        t2 = time.time()
+
+        # check both root nodes.
+        root1 = self._xtree1.getRoot()
+        root2 = self._xtree2.getRoot()
+        if (self._xtree1.getHashValue(root1) == self._xtree2.getHashValue(root2)):
+            print "No difference!"
+            print "Execution time: " + (t2 - t0) + " ms"
+            print "Parsing " + input1 + ": " + (t1 - t0) + " ms"
+            print "Parsing " + input2 + ": " + (t2 - t1) + " ms"
+        else:
+            self._xlut =  XLut.XLut()
+            self._matchp =  int[2]
+
+            if (self._xtree1.getTag(root1).compareTo(self._xtree2.getTag(root2)) != 0):
+                print "The root is changed!"
+                self._matchp[0] = XTree.NO_MATCH
+                self._xtree1.addMatching(root1, self._matchp)
+                self._xtree2.addMatching(root2, self._matchp)
+            else:
+                # initialize data structures.
+                self._attrList1    =  []
+                self._attrList2    =  []
+                self._attrMatch    =  []
+                self._attrHash    =  []
+                self._attrTag    =  []
+
+                self._textList1    =  []
+                self._textList2    =  []
+                self._textMatch1    =  []
+                self._textMatch2    =  []
+                self._textHash    =  []
+
+                self._leastCostMatrix =  []
+                self._pathMatrix     =  []
+                self._circuit     =  []
+
+                for i in range(_MATRIX_SIZE):
+                    self._leastCostMatrix[i] =  int[_MATRIX_SIZE]
+                    self._pathMatrix[i] =  int[_MATRIX_SIZE]
+
+                self._matchp[0] = XTree.CHANGE
+                self._matchp[1] = root2
+                self._xtree1.addMatching(root1, self._matchp)
+                self._matchp[1] = root1
+                self._xtree2.addMatching(root2, self._matchp)
+                self.xdiff(root1, root2, False)
+
+            t3 = time.time()
+            self.writeDiff(input1, output)
+            t4 = time.time()
+
+            print "Difference detected!"
+            print "Execution time: " + (t4 - t0) + " ms"
+            print "Parsing " + input1 + ": " + (t1 - t0) + " ms"
+            print "Parsing " + input2 + ": " + (t2 - t1) + " ms"
+            print "Diffing: " + (t3 - t2) + " ms"
+            print "Writing result: " + (t4 - t3) + " ms"
+
+
+    # Diff two element lists
+    # This is the official one that records matching top-down
+    # @param    pid1        parent id #1
+    # @param    pid2        parent id #2
+    # @param    matchFlag    indicates if distance computation needed
+    def xdiff(self, pid1, pid2, matchFlag):
+        # diff attributes.
+        attrCount1 = 0
+        attrCount2 = 0
+        attr1 = self._xtree1.getFirstAttribute(pid1)
+        while (attr1 != XTree.NULL_NODE):
+            self._attrList1[attrCount1] = attr1
+            attrCount1 += 1
+            attr1 = self._xtree1.getNextAttribute(attr1)
+        attr2 = self._xtree2.getFirstAttribute(pid2)
+        while (attr2 != XTree.NULL_NODE):
+            self._attrList2[attrCount2] = attr2
+            attrCount2 += 1
+            attr2 = self._xtree2.getNextAttribute(attr2)
+
+        if (attrCount1 > 0):
+            if (attrCount2 > 0):
+                self.diffAttributes(attrCount1, attrCount2)
+            else:
+                self._matchp[0] = XTree.NO_MATCH
+                for i in range(attrCount1):
+                    self._xtree1.addMatching(self._attrList1[i],
+                                self._matchp)
+        elif (attrCount2 > 0):    # attrCount1 == 0
+            self._matchp[0] = XTree.NO_MATCH
+            for i in range(attrCount2):
+                self._xtree2.addMatching(self._attrList2[i], self._matchp)
+
+        # Match element nodes.
+        count1 = self._xtree1.getChildrenCount(pid1) - attrCount1
+        count2 = self._xtree2.getChildrenCount(pid2) - attrCount2
+
+        if (count1 == 0):
+            self._matchp[0] = XTree.NO_MATCH
+            node2 = self._xtree2.getFirstChild(pid2)
+            self._xtree2.addMatching(node2, self._matchp)
+            for i in range(1,count2):
+                node2 = self._xtree2.getNextSibling(node2)
+                self._xtree2.addMatching(node2, self._matchp)
+        elif (count2 == 0):
+            self._matchp[0] = XTree.NO_MATCH
+            node1 = self._xtree1.getFirstChild(pid1)
+            self._xtree1.addMatching(node1, self._matchp)
+            for i in range(1, count1):
+                node1 = self._xtree1.getNextSibling(node1)
+                self._xtree1.addMatching(node1, self._matchp)
+        elif ((count1 == 1) and (count2 == 1)):
+            node1 = self._xtree1.getFirstChild(pid1)
+            node2 = self._xtree2.getFirstChild(pid2)
+
+            if (self._xtree1.getHashValue(node1) == self._xtree2.getHashValue(node2)):
+                return
+
+            isE1 = self._xtree1.isElement(node1)
+            isE2 = self._xtree2.isElement(node2)
+
+            if (isE1 and isE2):
+                tag1 = self._xtree1.getTag(node1)
+                tag2 = self._xtree2.getTag(node2)
+                if (tag1.compareTo(tag2) == 0):
+                    self._matchp[0] = XTree.CHANGE
+                    self._matchp[1] = node2
+                    self._xtree1.addMatching(node1, self._matchp)
+                    self._matchp[1] = node1
+                    self._xtree2.addMatching(node2, self._matchp)
+
+                    self.xdiff(node1, node2, matchFlag)
+                else:
+                    self._matchp[0] = XTree.NO_MATCH
+                    self._xtree1.addMatching(node1, self._matchp)
+                    self._xtree2.addMatching(node2, self._matchp)
+            elif (not isE1 and not isE2):
+                self._matchp[0] = XTree.CHANGE
+                self._matchp[1] = node2
+                self._xtree1.addMatching(node1, self._matchp)
+                self._matchp[1] = node1
+                self._xtree2.addMatching(node2, self._matchp)
+            else:
+                self._matchp[0] = XTree.NO_MATCH
+                self._xtree1.addMatching(node1, self._matchp)
+                self._xtree2.addMatching(node2, self._matchp)
+        else:
+            elements1 =  int[count1]
+            elements2 =  int[count2]
+            elementCount1 = 0
+            textCount1 = 0
+            elementCount2 = 0
+            textCount2 = 0
+
+            child1 = self._xtree1.getFirstChild(pid1)
+            if (self._xtree1.isElement(child1)):
+                elements1[elementCount1] = child1
+                elementCount1 += 1
+            else:
+                self._textList1[textCount1] = child1
+                textCount1 += 1
+            for i in range(1,count1):
+                child1 = self._xtree1.getNextSibling(child1)
+                if (self._xtree1.isElement(child1)):
+                    elements1[elementCount1] = child1
+                    elementCount1 += 1
+                else:
+                    self._textList1[textCount1] = child1
+                    textCount1 += 1
+
+            child2 = self._xtree2.getFirstChild(pid2)
+            if (self._xtree2.isElement(child2)):
+                elements2[elementCount2] = child2
+                elementCount2 += 1
+            else:
+                self._textList2[textCount2] = child2
+                textCount2 += 1
+            for i in range(1,count2):
+                child2 = self._xtree2.getNextSibling(child2)
+                if (self._xtree2.isElement(child2)):
+                    elements2[elementCount2] = child2
+                    elementCount2 += 1
+                else:
+                    self._textList2[textCount2] = child2
+                    textCount2 += 1
+
+            # Match text nodes.
+            if (textCount1 > 0):
+                if (textCount2 > 0):
+                    self.diffText(textCount1, textCount2)
+                else:
+                    self._matchp[0] = XTree.NO_MATCH
+                    for i in range(textCount1):
+                        self._xtree1.addMatching(self._textList1[i], self._matchp)
+            elif (textCount2 > 0):
+                self._matchp[0] = XTree.NO_MATCH
+                for i in (textCount2):
+                    self._xtree2.addMatching(self._textList2[i],
+                                self._matchp)
+
+            matched1 =  []
+            matched2 =  []
+            mcount = self._matchFilter(elements1, elementCount1,
+                              elements2, elementCount2,
+                              matched1, matched2)
+
+            if ((elementCount1 == mcount) and (elementCount2 == mcount)):
+                return
+
+            if (elementCount1 == mcount):
+                self._matchp[0] = XTree.NO_MATCH
+                for i in range(elementCount2):
+                    if (not matched2[i]):
+                        self._xtree2.addMatching(elements2[i], self._matchp)
+                return
+            if (elementCount2 == mcount):
+                self._matchp[0] = XTree.NO_MATCH
+                for i in range(elementCount1):
+                    if (not matched1[i]):
+                        self._xtree1.addMatching(elements1[i], self._matchp)
+                return
+
+            # Write the list of unmatched nodes.
+            ucount1 = elementCount1 - mcount
+            ucount2 = elementCount2 - mcount
+            unmatched1 =  int[ucount1]
+            unmatched2 =  int[ucount2]
+            muc1 = 0
+            muc2 = 0
+            start = 0
+
+            while ((muc1 < ucount1) and (muc2 < ucount2)):
+                while (start < elementCount1) and matched1[start]:
+                    start += 1
+                startTag = self._xtree1.getTag(elements1[start])
+                uele1 = 0
+                uele2 = 0
+                muc1 += 1
+                unmatched1[uele1] = elements1[start]
+                uele1 += 1
+                matched1[start] = True
+                start += 1
+
+                i = start
+                while (i < elementCount1) and (muc1 < ucount1):
+                    if (not matched1[i] and startTag.equals(self._xtree1.getTag(elements1[i]))):
+                        matched1[i] = True
+                        muc1 += 1
+                        unmatched1[uele1] = elements1[i]
+                        uele1 += 1
+                    i += 1
+
+                i = 0
+                while (i < elementCount2) and (muc2 < ucount2):
+                    if (not matched2[i] and startTag.equals(self._xtree2.getTag(elements2[i]))):
+                        matched2[i] = True
+                        muc2 += 1
+                        unmatched2[uele2] = elements2[i]
+                        uele2 += 1
+                    i += 1
+
+                if (uele2 == 0):
+                    self._matchp[0] = XTree.NO_MATCH
+                    for i in range(uele1):
+                        self._xtree1.addMatching(unmatched1[i], self._matchp)
+                else:
+                    if ((uele1 == 1) and (uele2 == 1)):
+                        self._matchp[0] = XTree.CHANGE
+                        self._matchp[1] = unmatched2[0]
+                        self._xtree1.addMatching(unmatched1[0], self._matchp)
+                        self._matchp[1] = unmatched1[0]
+                        self._xtree2.addMatching(unmatched2[0], self._matchp)
+                        self.xdiff(unmatched1[0],
+                              unmatched2[0],
+                              matchFlag)
+                    # To find minimal-cost matching between those unmatched.
+                    elif (uele1 >= uele2):
+                        if ((uele2 <= self._sampleCount) or not self._gFlag):
+                            self.matchListO(unmatched1, unmatched2, uele1, uele2, True, matchFlag)
+                        else:
+                            self.matchList(unmatched1, unmatched2, uele1, uele2, True, matchFlag)
+                    else:
+                        if ((uele1 <= self._sampleCount) or not self._gFlag):
+                            self.matchListO(unmatched2, unmatched1, uele2, uele1, False, matchFlag)
+                        else:
+                            self.matchList(unmatched2, unmatched1, uele2, uele1, False, matchFlag)
+
+            if (muc1 < ucount1):
+                self._matchp[0] = XTree.NO_MATCH
+                for i in range(start,elementCount1):
+                    if (not matched1[i]):
+                        self._xtree1.addMatching(elements1[i], self._matchp)
+            elif (muc2 < ucount2):
+                self._matchp[0] = XTree.NO_MATCH
+                for i in range(elementCount2):
+                    if (not matched2[i]):
+                        self._xtree2.addMatching(elements2[i], self._matchp)
+
+
+    # Diff and match two lists of attributes
+    # @param    attrCount1    number of attributes in the 1st list
+    # @param    attrCount2    number of attributes in the 2nd list
+    def diffAttributes(self, attrCount1,  attrCount2):
+        if ((attrCount1 == 1) and (attrCount2 == 1)):
+            ah1 = self._xtree1.getHashValue(self._attrList1[0])
+            ah2 = self._xtree2.getHashValue(self._attrList2[0])
+            if (ah1 == ah2):
+                return
+
+            tag1 = self._xtree1.getTag(self._attrList1[0])
+            tag2 = self._xtree2.getTag(self._attrList2[0])
+            if (tag1.compareTo(tag2) == 0):
+                self._matchp[0] = XTree.CHANGE
+                self._matchp[1] = self._attrList2[0]
+                self._xtree1.addMatching(self._attrList1[0], self._matchp)
+
+                self._matchp[1] = self._attrList1[0]
+                self._xtree2.addMatching(self._attrList2[0], self._matchp)
+
+                tid1 = self._xtree1.getFirstChild(self._attrList1[0])
+                tid2 = self._xtree2.getFirstChild(self._attrList2[0])
+                self._matchp[1] = tid2
+                self._xtree1.addMatching(tid1, self._matchp)
+
+                self._matchp[1] = tid1
+                self._xtree2.addMatching(tid2, self._matchp)
+
+                return
+            else:
+                self._matchp[0] = XTree.NO_MATCH
+                self._xtree1.addMatching(self._attrList1[0], self._matchp)
+                self._xtree2.addMatching(self._attrList2[0], self._matchp)
+                return
+
+        for i in range(attrCount2):
+            self._attrHash[i] = self._xtree2.getHashValue(self._attrList2[i])
+            self._attrTag[i] = self._xtree2.getTag(self._attrList2[i])
+            self._attrMatch[i] = False
+
+        matchCount = 0
+        for i in range(attrCount1):
+            attr1 = self._attrList1[i]
+            ah1 = self._xtree1.getHashValue(attr1)
+            tag1 = self._xtree1.getTag(attr1)
+
+            found = False
+            for j in range(attrCount2):
+                attr2 = self._attrList2[j]
+                if (self._attrMatch[j]):
+                    continue
+                elif (ah1 == self._attrHash[j]):
+                    self._attrMatch[j] = True
+                    matchCount += 1
+                    found = True
+                    break
+                elif (tag1.compareTo(self._attrTag[j]) == 0):
+                    self._attrMatch[j] = True
+                    matchCount += 1
+
+                    self._matchp[0] = XTree.CHANGE
+                    self._matchp[1] = attr2
+                    self._xtree1.addMatching(attr1, self._matchp)
+
+                    self._matchp[1] = attr1
+                    self._xtree2.addMatching(attr2, self._matchp)
+
+                    tid1 = self._xtree1.getFirstChild(attr1)
+                    tid2 = self._xtree2.getFirstChild(attr2)
+                    self._matchp[1] = tid2
+                    self._xtree1.addMatching(tid1, self._matchp)
+
+                    self._matchp[1] = tid1
+                    self._xtree2.addMatching(tid2, self._matchp)
+
+                    found = True
+                    break
+
+            if (not found):
+                self._matchp[0] = XTree.NO_MATCH
+                self._xtree1.addMatching(attr1, self._matchp)
+
+        if (matchCount != attrCount2):
+            self._matchp[0] = XTree.NO_MATCH
+            for i in range(attrCount2):
+                if (not self._attrMatch[i]):
+                    self._xtree2.addMatching(self._attrList2[i],
+                                self._matchp)
+
+    # Diff and match two lists of text nodes.
+    # XXX This is just a hack that treats text nodes as unordered, to
+    # be consistent with the entire algorithm.
+    # @param    textCount1    number of text nodes in the 1st list
+    # @param    textCount2    number of text nodes in the 2nd list
+
+    def diffText(self, textCount1, textCount2):
+        for i in range(textCount1):
+            self._textMatch1[i] = False
+        for i in range(textCount2):
+            self._textMatch2[i] = False
+            self._textHash[i] = self._xtree2.getHashValue(self._textList2[i])
+
+        mcount = 0
+        for i in range(textCount1):
+            hash1 = self._xtree1.getHashValue(self._textList1[i])
+            for j in range(textCount2):
+                if (not self._textMatch2[j] and (hash1 == self._textHash[j])):
+                    self._textMatch1[i] = True
+                    self._textMatch2[j] = True
+                    mcount += 1
+                    break
+
+            if (mcount == textCount2):
+                break
+
+        if ((mcount < textCount1) and (textCount1 <= textCount2)):
+            self._matchp[0] = XTree.CHANGE
+            i = 0
+            j = 0
+            while (i < textCount1) and (mcount < textCount1):
+                if (self._textMatch1[i]):
+                    continue
+                while self._textMatch2[j]:
+                    j += 1
+                self._matchp[1] = self._textList2[j]
+                self._xtree1.addMatching(self._textList1[i], self._matchp)
+                self._textMatch1[i] = True
+                self._matchp[1] = self._textList1[i]
+                self._xtree2.addMatching(self._textList2[j], self._matchp)
+                self._textMatch2[j] = True
+                mcount += 1
+                i += 1
+        elif ((mcount < textCount2) and (textCount2 < textCount1)):
+            self._matchp[0] = XTree.CHANGE
+            i = 0
+            j = 0
+            while (i < textCount2) and (mcount < textCount2):
+                if (self._textMatch2[i]):
+                    continue
+                while (self._textMatch1[j]):
+                    j += 1
+                self._matchp[1] = self._textList1[j]
+                self._xtree2.addMatching(self._textList2[i], self._matchp)
+                self._textMatch2[i] = True
+                self._matchp[1] = self._textList2[i]
+                self._xtree1.addMatching(self._textList1[j], self._matchp)
+                self._textMatch1[j] = True
+                mcount += 1
+                i += 1
+
+        self._matchp[0] = XTree.NO_MATCH
+        if (mcount < textCount1):
+            for i in range(textCount1):
+                if (not self._textMatch1[i]):
+                    self._xtree1.addMatching(self._textList1[i],
+                                self._matchp)
+        elif (mcount < textCount2):
+            for i in range(textCount2):
+                if (not self._textMatch2[i]):
+                    self._xtree2.addMatching(self._textList2[i],
+                                self._matchp)
+
+
+    # Filter out matched nodepairs.
+    # @param    elements1    node list #1
+    # @param    elements2    node list #2
+    # @param    matched1    match list #1
+    # @param    matched2    match list #2
+    # @return    how many matched pairs found
+
+    def _matchFilter(self, elements1, count1, elements2, count2, matched1, matched2):
+        value1 =  int[count1]
+        value2 =  int[count2]
+
+        for i in range(count1):
+            value1[i] = self._xtree1.getHashValue(elements1[i])
+            matched1[i] = False
+        for i in range(count2):
+            value2[i] = self._xtree2.getHashValue(elements2[i])
+            matched2[i] = False
+
+        mcount = 0
+        for i in range(count2):
+            for j in range (count1):
+                if (not matched1[j] and not matched2[i] and (value1[j] == value2[i])):
+                    matched1[j] = True
+                    matched2[i] = True
+                    mcount += 1
+                    break
+
+        return mcount
+
+
+    # Find minimal cost matching between two node lists
+    # Record the matching info back to the trees
+    # Using the original algorithm
+    # @param    nodes1        node list #1
+    # @param    nodes2        node list #2
+    # @param    count1        # of nodes in node list #1
+    # @param    count2        # of nodes in node list #2
+    # @param    treeOrder    True for original, False for inverse
+    # @param    matchFlag    indicates if distance computation needed
+
+    def matchListO(self, nodes1, nodes2, count1, count2, treeOrder, matchFlag):
+        distance =  []
+        matching1 =  []
+        matching2 =  []
+
+        # insert cost.
+        distance[count1] =  int[count2+1]
+        for i in range(count2):
+            if treeOrder:
+                distance[count1][i] = self._xtree2.getDecendentsCount(nodes2[i])
+            else:
+                distance[count1][i] = self._xtree1.getDecendentsCount(nodes2[i]) + 1
+
+        for i in range(count1):
+            distance[i] =  int[count2+1]
+            if treeOrder:
+                deleteCost = self._xtree1.getDecendentsCount(nodes1[i])
+            else:
+                deleteCost = self._xtree2.getDecendentsCount(nodes1[i]) + 1
+            for j in range(count2):
+                dist = 0
+                if (matchFlag):
+                    if treeOrder:
+                        dist = self._xlut.get(nodes1[i], nodes2[j])
+                    else:
+                        dist = self._xlut.get(nodes2[j], nodes1[i])
+                else:
+                    if treeOrder:
+                        dist = distance(nodes1[i], nodes2[j], True, XTree.NO_CONNECTION)
+                    else:
+                        dist = distance(nodes2[j], nodes1[i], True, XTree.NO_CONNECTION)
+                    # the default mode.
+                    if (not self._oFlag and (dist > 1) and (dist >= self._NO_MATCH_THRESHOLD * (deleteCost + distance[count1][j]))):
+                        dist = XTree.NO_CONNECTION
+                    if (dist < XTree.NO_CONNECTION):
+                        if (treeOrder):
+                            self._xlut.add(nodes1[i],
+                                  nodes2[j],
+                                  dist)
+                        else:
+                            self._xlut.add(nodes2[j],
+                                  nodes1[i],
+                                  dist)
+                distance[i][j] = dist
+            # delete cost.
+            distance[i][count2] = deleteCost
+
+        # compute the minimal cost matching.
+        self.findMatching(count1, count2, distance, matching1, matching2)
+
+        for i in range(count1):
+            if (matching1[i] == XTree.NO_MATCH):
+                self._matchp[0] = XTree.NO_MATCH
+            else:
+                self._matchp[0] = XTree.CHANGE
+                self._matchp[1] = nodes2[matching1[i]]
+            if (treeOrder):
+                self._xtree1.addMatching(nodes1[i], self._matchp)
+            else:
+                self._xtree2.addMatching(nodes1[i], self._matchp)
+
+        for i in range(count2):
+            if (matching2[i] == XTree.NO_MATCH):
+                self._matchp[0] = XTree.NO_MATCH
+            else:
+                self._matchp[0] = XTree.CHANGE
+                self._matchp[1] = nodes1[matching2[i]]
+            if (treeOrder):
+                self._xtree2.addMatching(nodes2[i], self._matchp)
+            else:
+                self._xtree1.addMatching(nodes2[i], self._matchp)
+
+        for i in range(count1):
+            if (matching1[i] != XTree.NO_MATCH):
+                todo1 = nodes1[i]
+                todo2 = nodes2[matching1[i]]
+                if (treeOrder):
+                    if (self._xtree1.isElement(todo1) and self._xtree2.isElement(todo2)):
+                        self.xdiff(todo1, todo2, True)
+                else:
+                    if (self._xtree1.isElement(todo2) and self._xtree2.isElement(todo1)):
+                        self.xdiff(todo2, todo1, True)
+
+
+    # Find minimal cost matching between two node lists
+    # Record the matching info back to the trees
+    # Do sampling.
+    # @param    nodes1        node list #1
+    # @param    nodes2        node list #2
+    # @param    count1        # of nodes in node list #1
+    # @param    count2        # of nodes in node list #2
+    # @param    treeOrder    True for original, False for inverse
+    # @param    matchFlag    indicates if distance computation needed
+
+    def matchList(self, nodes1, nodes2, count1, count2, treeOrder, matchFlag):
+        matching1 =  []
+        matching2 =  []
+        for i in range(count1):
+            matching1[i] = XTree.NO_MATCH
+        for i in range(count2):
+            matching2[i] = XTree.NO_MATCH
+
+        if (matchFlag):
+            for i in range(count1):
+                for j in range(count2):
+                    if treeOrder:
+                        d = self._xlut.get(nodes1[i], nodes2[j])
+                    else: 
+                        d = self._xlut.get(nodes2[j], nodes1[i])
+                    if (d != XTree.NO_CONNECTION):
+                        matching1[i] = j
+                        matching2[j] = i
+                        break
+        else:
+            r =  Random(time.time()) # FIXME
+            scount1 = 0
+            scount2 = 0
+            matchingThreshold = 0
+            i = 0
+            while (i < self._sampleCount) and (scount2 < count2):
+                snode = r.nextInt(count2 - scount2) + scount2
+                dist = XTree.NO_CONNECTION
+                bestmatch = XTree.NO_MATCH
+                for j in range(scount1,count1):
+                    if treeOrder:
+                        d = self.distance(nodes1[j], nodes2[snode], False, dist)
+                    else:
+                        d = self.distance(nodes2[snode], nodes1[j], False, dist)
+                    if (d < dist):
+                        dist = d
+                        bestmatch = j
+                        if (d == 1):
+                            break
+                scount2 += 1
+
+                if treeOrder:
+                    deleteCost = self._xtree2.getDecendentsCount(nodes2[snode]) + 1
+                else:
+                    deleteCost = self._xtree1.getDecendentsCount(nodes2[snode]) + 1
+                if ((dist > 1) and (dist > (self._NO_MATCH_THRESHOLD * deleteCost))):
+                    tmp = nodes2[snode]
+                    nodes2[snode] = nodes2[scount2]
+                    nodes2[scount2] = tmp
+                else:
+                    tmp = nodes1[bestmatch]
+                    nodes1[bestmatch] = nodes1[scount1]
+                    nodes1[scount1] = tmp
+                    tmp = nodes2[snode]
+                    nodes2[snode] = nodes2[scount2]
+                    nodes2[scount2] = tmp
+
+                    if (treeOrder):
+                        self._xlut.add(nodes1[scount1], nodes2[scount2], dist)
+                    else:
+                        self._xlut.add(nodes2[scount2], nodes1[scount1], dist)
+                    matching1[scount1] = scount2
+                    matching2[scount2] = scount1
+
+                    i += 1
+                    scount1 += 1
+                    if (matchingThreshold < dist):
+                        matchingThreshold = dist
+
+            while scount2 < count2:
+                dist = XTree.NO_CONNECTION
+                bestmatch = XTree.NO_MATCH
+                for i in range(scount1,count1):
+                    if treeOrder:
+                        d = self.distance(nodes1[i], nodes2[scount2], False, dist)
+                    else:
+                        d = self.distance(nodes2[scount2], nodes1[i], False, dist)
+                    if (d <= matchingThreshold):
+                        dist = d
+                        bestmatch = i
+                        break
+                    elif (d < dist):
+                        dist = d
+                        bestmatch = i
+
+                if (bestmatch != XTree.NO_MATCH):
+                    tmp = nodes1[bestmatch]
+                    nodes1[bestmatch] = nodes1[scount1]
+                    nodes1[scount1] = tmp
+
+                    if (treeOrder):
+                        self._xlut.add(nodes1[scount1], nodes2[scount2], dist)
+                    else:
+                        self._xlut.add(nodes2[scount2], nodes1[scount1], dist)
+                    matching1[scount1] = scount2
+                    matching2[scount2] = scount1
+                    scount1 += 1
+                scount2 += 1
+
+        # Record matching
+        for i in range(count1):
+            if (matching1[i] == XTree.NO_MATCH):
+                self._matchp[0] = XTree.NO_MATCH
+            else:
+                self._matchp[0] = XTree.CHANGE
+                self._matchp[1] = nodes2[matching1[i]]
+            if (treeOrder):
+                self._xtree1.addMatching(nodes1[i], self._matchp)
+            else:
+                self._xtree2.addMatching(nodes1[i], self._matchp)
+
+        for i in range(count2):
+            if (matching2[i] == XTree.NO_MATCH):
+                self._matchp[0] = XTree.NO_MATCH
+            else:
+                self._matchp[0] = XTree.CHANGE
+                self._matchp[1] = nodes1[matching2[i]]
+            if (treeOrder):
+                self._xtree2.addMatching(nodes2[i], self._matchp)
+            else:
+                self._xtree1.addMatching(nodes2[i], self._matchp)
+
+        for i in range(count1):
+            if (matching1[i] != XTree.NO_MATCH):
+                todo1 = nodes1[i]
+                todo2 = nodes2[matching1[i]]
+                if (treeOrder):
+                    if (self._xtree1.isElement(todo1) and self._xtree2.isElement(todo2)):
+                        self.xdiff(todo1, todo2, True)
+                else:
+                    if (self._xtree1.isElement(todo2) and self._xtree2.isElement(todo1)):
+                        self.xdiff(todo2, todo1, True)
+
+
+    # Compute (minimal-editing) distance between two nodes.
+    # @param    eid1        element id #1
+    # @param    eid2        element id #2
+    # @param    toRecord    whether or not to keep the result
+    # @param    threshold    No need to return a distance higher
+    #                than this threshold
+    # @return    the distance
+
+    def distance(self, eid1, eid2, toRecord, threshold):
+        isE1 = self._xtree1.isElement(eid1)
+        isE2 = self._xtree2.isElement(eid2)
+        if (isE1 and isE2):
+            if (self._xtree1.getTag(eid1).compareTo(self._xtree2.getTag(eid2)) != 0):
+                return XTree.NO_CONNECTION
+            else:
+                dist = self._xdiff(eid1, eid2, threshold)
+                if (toRecord and (dist < XTree.NO_CONNECTION)):
+                    self._xlut.add(eid1, eid2, dist)
+                return dist
+        elif (not isE1 and not isE2):
+            return 1
+        else:
+            return XTree.NO_CONNECTION
+
+
+    # To compute the editing distance between two nodes
+    # @param    pid1        parent id #1
+    # @param    pid2        parent id #2
+    # @param    threshold    No need to return a distance higher
+    #                  than this threshold
+    # @return    the distance
+
+    def _xdiff(self, pid1, pid2, threshold):
+        dist = 0
+
+        # diff attributes.
+        attrCount1 = 0
+        attrCount2 = 0
+        attr1 = self._xtree1.getFirstAttribute(pid1)
+        while (attr1 != XTree.NULL_NODE):
+            self._attrList1[attrCount1] = attr1
+            attrCount1 += 1
+            attr1 = self._xtree1.getNextAttribute(attr1)
+        attr2 = self._xtree2.getFirstAttribute(pid2)
+        while (attr2 != XTree.NULL_NODE):
+            self._attrList2[attrCount2] = attr2
+            attrCount2 += 1
+            attr2 = self._xtree2.getNextAttribute(attr2)
+
+        if (attrCount1 == 0):
+            dist = attrCount2 * 2
+        elif (attrCount2 == 0):
+            dist = attrCount1 * 2
+        else:
+            dist = self._diffAttributes(attrCount1, attrCount2)
+        if (self._gFlag and (dist >= threshold)):
+            return XTree.NO_CONNECTION
+
+        # Match second level nodes first.
+        count1 = self._xtree1.getChildrenCount(pid1) - attrCount1
+        count2 = self._xtree2.getChildrenCount(pid2) - attrCount2
+
+        if (count1 == 0):
+            node2 = self._xtree2.getFirstChild(pid2)
+            while (node2 != XTree.NULL_NODE):
+                dist += self._xtree2.getDecendentsCount(node2) + 1
+                if (self._gFlag and (dist >= threshold)):
+                    return XTree.NO_CONNECTION
+                node2 = self._xtree2.getNextSibling(node2)
+        elif (count2 == 0):
+            node1 = self._xtree1.getFirstChild(pid1)
+            while (node1 != XTree.NULL_NODE):
+                dist += self._xtree1.getDecendentsCount(node1) + 1
+                if (self._gFlag and (dist >= threshold)):
+                    return XTree.NO_CONNECTION
+                node1 = self._xtree1.getNextSibling(node1)
+        elif ((count1 == 1) and (count2 == 1)):
+            node1 = self._xtree1.getFirstChild(pid1)
+            node2 = self._xtree2.getFirstChild(pid2)
+
+            if (self._xtree1.getHashValue(node1) == self._xtree2.getHashValue(node2)):
+                return dist
+
+            isE1 = self._xtree1.isElement(node1)
+            isE2 = self._xtree2.isElement(node2)
+
+            if (isE1 and isE2):
+                tag1 = self._xtree1.getTag(node1)
+                tag2 = self._xtree2.getTag(node2)
+                if (tag1.compareTo(tag2) == 0):
+                    dist += self._xdiff(node1, node2, threshold - dist)
+                else:
+                    dist += self._xtree1.getDecendentsCount(node1) + self._xtree2.getDecendentsCount(node2) + 2
+            elif (not isE1 and not isE2):
+                dist += 1
+            else:
+                dist += self._xtree1.getDecendentsCount(node1) + self._xtree2.getDecendentsCount(node2) + 2
+        else:
+            elements1 =  int[count1]
+            elements2 =  int[count2]
+            elementCount1 = 0
+            textCount1 = 0
+            elementCount2 = 0
+            textCount2 = 0
+
+            child1 = self._xtree1.getFirstChild(pid1)
+            if (self._xtree1.isElement(child1)):
+                elements1[elementCount1] = child1
+                elementCount1 += 1
+            else:
+                self._textList1[textCount1] = child1
+                textCount1 += 1
+            for i in range(1,count1):
+                child1 = self._xtree1.getNextSibling(child1)
+                if (self._xtree1.isElement(child1)):
+                    elements1[elementCount1] = child1
+                    elementCount1 += 1
+                else:
+                    self._textList1[textCount1] = child1
+                    textCount1 += 1
+
+            child2 = self._xtree2.getFirstChild(pid2)
+            if (self._xtree2.isElement(child2)):
+                elements2[elementCount2] = child2
+                elementCount2 += 1
+            else:
+                self._textList2[textCount2] = child2
+                textCount2 += 1
+            for i in range(1,count2):
+                child2 = self._xtree2.getNextSibling(child2)
+                if (self._xtree2.isElement(child2)):
+                    elements2[elementCount2] = child2
+                    elementCount2 += 1
+                else:
+                    self._textList2[textCount2] = child2
+                    textCount2 += 1
+
+            # Match text nodes.
+            if (textCount1 == 0):
+                dist += textCount2
+            elif (textCount2 == 0):
+                dist += textCount1
+            else:
+                dist += self._diffText(textCount1, textCount2)
+
+            if (self._gFlag and (dist >= threshold)):
+                return XTree.NO_CONNECTION
+
+            matched1 =  []
+            matched2 =  []
+            mcount = self._matchFilter(elements1, elementCount1,
+                              elements2, elementCount2,
+                              matched1, matched2)
+
+            if ((elementCount1 == mcount) and (elementCount2 == mcount)):
+                return dist
+            if (elementCount1 == mcount):
+                for i in range(elementCount2):
+                    if (not matched2[i]):
+                        dist += self._xtree2.getDecendentsCount(elements2[i]) + 1
+                        if (self._gFlag and (dist >= threshold)):
+                            return XTree.NO_CONNECTION
+                return dist
+            if (elementCount2 == mcount):
+                for i in range(elementCount1):
+                    if (not matched1[i]):
+                        dist += self._xtree1.getDecendentsCount(elements1[i]) + 1
+                        if (self._gFlag and (dist >= threshold)):
+                            return XTree.NO_CONNECTION
+                return dist
+
+            # Write the list of unmatched nodes.
+            ucount1 = elementCount1 - mcount
+            ucount2 = elementCount2 - mcount
+            unmatched1 =  []
+            unmatched2 =  []
+            muc1 = 0
+            muc2 = 0
+            start = 0
+
+            while ((muc1 < ucount1) and (muc2 < ucount2)):
+                while (start < elementCount1) and matched1[start]:
+                    start += 1
+                startTag = self._xtree1.getTag(elements1[start])
+                uele1 = 0
+                uele2 = 0
+                muc1 += 1
+                unmatched1[uele1] = elements1[start]
+                uele1 += 1
+                matched1[start] = True
+                start += 1
+
+                i = start
+                while (i < elementCount1) and (muc1 < ucount1):
+                    if (not matched1[i] and startTag.equals(self._xtree1.getTag(elements1[i]))):
+                        matched1[i] = True
+                        muc1 += 1
+                        unmatched1[uele1] = elements1[i]
+                        uele1 += 1
+                    i += 1
+
+                i = 0
+                while (i < elementCount2) and (muc2 < ucount2):
+                    if (not matched2[i] and startTag.equals(self._xtree2.getTag(elements2[i]))):
+                        matched2[i] = True
+                        muc2 += 1
+                        unmatched2[uele2] = elements2[i]
+                        uele2 += 1
+                    i += 1
+
+                if (uele2 == 0):
+                    for i in range(uele1):
+                        dist += self._xtree1.getDecendentsCount(unmatched1[i])
+                else:
+#                    if ((uele1 == 1) and (uele2 == 1)):
+#                        dist += self._xdiff(unmatched1[0],
+#                               unmatched2[0],
+#                               threshold-dist)
+#                elif (uele1 >= uele2):
+                # To find minimal-cost matching between those unmatched.
+                    if (uele1 >= uele2):
+                        if ((uele2 <= self._sampleCount) or not self._gFlag):
+                            dist += self._matchListO(unmatched1, unmatched2, uele1, uele2, True)
+                        else:
+                            dist += self._matchList(unmatched1, unmatched2, uele1, uele2, True, threshold - dist)
+                    else:
+                        if ((uele1 <= self._sampleCount) or not self._gFlag):
+                            dist += self._matchListO(unmatched2, unmatched1, uele2, uele1, False)
+                        else:
+                            dist += self._matchList(unmatched2, unmatched1, uele2, uele1, False, threshold - dist)
+
+                if (self._gFlag and (dist >= threshold)):
+                    return XTree.NO_CONNECTION
+
+            if (muc1 < ucount1):
+                for i in range (start,elementCount1):
+                    if (not matched1[i]):
+                        dist += self._xtree1.getDecendentsCount(elements1[i])
+            elif (muc2 < ucount2):
+                for i in range(elementCount2):
+                    if (not matched2[i]):
+                        dist += self._xtree2.getDecendentsCount(elements2[i])
+
+        if (not self._gFlag or (dist < threshold)):
+            return dist
+        else:
+            return XTree.NO_CONNECTION
+
+
+    # Diff two lists of attributes
+    # @param    attrCount1    number of attributes in the 1st list
+    # @param    attrCount2    number of attributes in the 2nd list
+    # @return    the distance
+
+    def _diffAttributes(self, attrCount1, attrCount2):
+        if ((attrCount1 == 1) and (attrCount2 == 1)):
+            ah1 = self._xtree1.getHashValue(self._attrList1[0])
+            ah2 = self._xtree2.getHashValue(self._attrList2[0])
+            if (ah1 == ah2):
+                return 0
+
+            tag1 = self._xtree1.getTag(self._attrList1[0])
+            tag2 = self._xtree2.getTag(self._attrList2[0])
+            if (tag1.compareTo(tag2) == 0):
+                return 1
+            else:
+                return 2
+
+        dist = 0
+        for i in range(attrCount2):
+            self._attrHash[i] = self._xtree2.getHashValue(self._attrList2[i])
+            self._attrTag[i] = self._xtree2.getTag(self._attrList2[i])
+            self._attrMatch[i] = False
+
+        matchCount = 0
+        for i in range(attrCount1):
+            ah1 = self._xtree1.getHashValue(self._attrList1[i])
+            tag1 = self._xtree1.getTag(self._attrList1[i])
+            found = False
+
+            for j in range(attrCount2):
+                if (self._attrMatch[j]):
+                    continue
+                elif (ah1 == self._attrHash[j]):
+                    self._attrMatch[j] = True
+                    found = True
+                    matchCount += 1
+                    break
+                elif (tag1.compareTo(self._attrTag[j]) == 0):
+                    self._attrMatch[j] = True
+                    dist += 1
+                    found = True
+                    matchCount += 1
+                    break
+
+            if (not found):
+                dist += 2
+
+        dist += (attrCount2 - matchCount) * 2
+        return dist
+
+
+    # Diff and match two lists of text nodes.
+    # XXX This is just a hack that treats text nodes as unordered, to
+    # be consistent with the entire algorithm.
+    # @param    textCount1    number of text nodes in the 1st list
+    # @param    textCount2    number of text nodes in the 2nd list
+    # @return the "distance" between these two lists.
+
+    def _diffText(self, textCount1, textCount2):
+        for i in range(textCount2):
+            self._textMatch2[i] = False
+            self._textHash[i] = self._xtree2.getHashValue(self._textList2[i])
+
+        mcount = 0
+        for i in range(textCount1):
+            hash1 = self._xtree1.getHashValue(self._textList1[i])
+            for j in range(textCount2):
+                if (not self._textMatch2[j] and (hash1 == self._textHash[j])):
+                    self._textMatch2[j] = True
+                    mcount += 1
+                    break
+
+            if (mcount == textCount2):
+                break
+
+        if (textCount1 >= textCount2):
+            return textCount1 - mcount
+        else:
+            return textCount2 - mcount
+
+
+    # Find minimal cost matching between two node lists
+    # Using the original algorithm
+    # @param    nodes1        node list #1
+    # @param    nodes2        node list #2
+    # @param    count1        # of nodes in node list #1
+    # @param    count2        # of nodes in node list #2
+    # @param    treeOrder    True for original, False for inverse
+
+    def _matchListO(self, nodes1, nodes2, count1, count2, treeOrder):
+        distance =  []
+        matching1 =  []
+        matching2 =  []
+
+        # insert cost.
+        distance[count1] =  int[count2+1]
+        for i in range(count2):
+            if treeOrder:
+                distance[count1][i] = self._xtree2.getDecendentsCount(nodes2[i]) + 1
+            else:
+                distance[count1][i] = self._xtree1.getDecendentsCount(nodes2[i]) + 1
+
+        for i in range(count1):
+            distance[i] =  int[count2+1]
+            if treeOrder:
+                deleteCost = self._xtree1.getDecendentsCount(nodes1[i]) + 1
+            else:
+                deleteCost = self._xtree2.getDecendentsCount(nodes1[i]) + 1
+            for j in range(count2):
+                if treeOrder:
+                    dist = distance(nodes1[i], nodes2[j], True, XTree.NO_CONNECTION)
+                else:
+                    dist = distance(nodes2[j], nodes1[i], True, XTree.NO_CONNECTION)
+                # the default mode.
+                if (not self._oFlag and (dist > 1) and (dist < XTree.NO_CONNECTION) and \
+                    (dist >= self._NO_MATCH_THRESHOLD * \
+                    (deleteCost + distance[count1][j]))):
+                        dist = XTree.NO_CONNECTION
+
+                if (dist < XTree.NO_CONNECTION):
+                    if (treeOrder):
+                        self._xlut.add(nodes1[i], nodes2[j], dist)
+                    else:
+                        self._xlut.add(nodes2[j], nodes1[i], dist)
+                distance[i][j] = dist
+            # delete cost.
+            distance[i][count2] = deleteCost
+
+        # compute the minimal cost matching.
+        return self.findMatching(count1, count2, distance, matching1,
+                    matching2)
+
+
+    # Find minimal cost matching between two node lists
+    # Do sampling
+    # @param    nodes1        node list #1
+    # @param    nodes2        node list #2
+    # @param    count1        # of nodes in node list #1
+    # @param    count2        # of nodes in node list #2
+    # @param    treeOrder    True for original, False for inverse
+    # @param    threshold    No need to return a distance higher
+    #                  than this threshold
+    def _matchList(self, nodes1, nodes2, count1, count2, treeOrder, threshold):
+        matching1 =  []
+        matching2 =  []
+        for i in range(count1):
+            matching1[i] = XTree.NO_MATCH
+        for i in range(count2):
+            matching2[i] = XTree.NO_MATCH
+
+        distance = 0
+        r =  Random(time.time())
+        scount1 = 0
+        scount2 = 0
+        matchingThreshold = 0
+
+        i = 0
+        while (i < self._sampleCount) and (scount2 < count2):
+            snode = r.nextInt(count2 - scount2) + scount2
+            dist = XTree.NO_CONNECTION
+            bestmatch = XTree.NO_MATCH
+            for j in range(scount1,count1):
+                if treeOrder:
+                    d = distance(nodes1[j], nodes2[snode], False, threshold - distance)
+                else:
+                    d = distance(nodes2[snode], nodes1[j], False, threshold - distance)
+                if (d < dist):
+                    dist = d
+                    bestmatch = j
+                    if (d == 1):
+                        break
+
+            if treeOrder:
+                deleteCost = self._xtree2.getDecendentsCount(nodes2[snode]) + 1
+                deleteCost =  self._xtree1.getDecendentsCount(nodes2[snode]) + 1
+
+            if ((dist > 1) and (dist > (self._NO_MATCH_THRESHOLD * deleteCost))):
+                tmp = nodes2[snode]
+                nodes2[snode] = nodes2[scount2]
+                nodes2[scount2] = tmp
+                distance += deleteCost
+            else:
+                tmp = nodes1[bestmatch]
+                nodes1[bestmatch] = nodes1[scount1]
+                nodes1[scount1] = tmp
+                tmp = nodes2[snode]
+                nodes2[snode] = nodes2[scount2]
+                nodes2[scount2] = tmp
+
+                if (treeOrder):
+                    self._xlut.add(nodes1[scount1], nodes2[scount2], dist)
+                else:
+                    self._xlut.add(nodes2[scount2], nodes1[scount1], dist)
+                matching1[scount1] = scount2
+                matching2[scount2] = scount1
+
+                i += 1
+                scount1 += 1
+                if (matchingThreshold < dist):
+                    matchingThreshold = dist
+                distance += dist
+
+            if (distance >= threshold):
+                return XTree.NO_CONNECTION
+            scount2 += 1
+
+        while (scount2 < count2):
+            if treeOrder:
+                deleteCost = self._xtree2.getDecendentsCount(nodes2[scount2]) + 1
+            else:
+                deleteCost = self._xtree1.getDecendentsCount(nodes2[scount2]) + 1
+            dist = XTree.NO_CONNECTION
+            bestmatch = XTree.NO_MATCH
+            for i in range(scount1,count1):
+                if treeOrder:
+                    d = distance(nodes1[i], nodes2[scount2], False, threshold - distance)
+                else:
+                    d = distance(nodes2[scount2], nodes1[i], False, threshold - distance)
+                if (d <= matchingThreshold):
+                    dist = d
+                    bestmatch = i
+                    break
+                elif ((d == 1) or ( d < (self._NO_MATCH_THRESHOLD * dist))):
+                    dist = d
+                    bestmatch = i
+
+            if (bestmatch == XTree.NO_MATCH):
+                distance += deleteCost
+            else:
+                tmp = nodes1[bestmatch]
+                nodes1[bestmatch] = nodes1[scount1]
+                nodes1[scount1] = tmp
+
+                if (treeOrder):
+                    self._xlut.add(nodes1[scount1], nodes2[scount2], dist)
+                else:
+                    self._xlut.add(nodes2[scount2], nodes1[scount1], dist)
+
+                matching1[scount1] = scount2
+                matching2[scount2] = scount1
+                scount1 += 1
+                distance += dist
+
+            if (distance >= threshold):
+                return XTree.NO_CONNECTION
+            scount2 += 1
+
+        for i in range(count1):
+            if (matching1[i] == XTree.NO_MATCH):
+                if treeOrder:
+                    distance += self._xtree1.getDecendentsCount(nodes1[i]) + 1
+                else:
+                    distance += self._xtree2.getDecendentsCount(nodes1[i]) + 1
+                if (distance >= threshold):
+                    return XTree.NO_CONNECTION
+
+        return distance
+
+
+    # Perform minimal-cost matching between two node lists #1
+    # Trivial part.
+    # @param    count1    length of node list #1
+    # @param    count2    length of node list #2
+    # @param    dist    distance matrix
+    # @param    matching1    matching list (for node list #1)
+    # @param    matching2    matching list (for node list #2)
+    # @return    distance
+    def findMatching(self, count1, count2, dist, matching1, matching2):
+        if (count1 == 1):
+            # count2 == 1
+            if (dist[0][0] < XTree.NO_CONNECTION):
+                matching1[0] = 0
+                matching2[0] = 0
+            else:
+                matching1[0] = XTree.DELETE
+                matching2[0] = XTree.DELETE
+
+            return dist[0][0]
+        elif (count2 == 1):
+            distance = 0
+            mate = 0
+            mindist = XTree.NO_CONNECTION
+            matching2[0] = XTree.DELETE
+
+            for i in range(count1):
+                matching1[i] = XTree.DELETE
+                if (mindist > dist[i][0]):
+                    mindist = dist[i][0]
+                    mate = i
+
+                # Suppose we delete every node on list1.
+                distance += dist[i][1]
+
+            if (mindist < XTree.NO_CONNECTION):
+                matching1[mate] = 0
+                matching2[0] = mate
+                distance += mindist - dist[mate][1]
+            else:
+                # Add the delete cost of the single node
+                # on list2.
+                distance += dist[count1][0]
+
+            return distance
+        elif ((count1 == 2) and (count2 == 2)):
+            distance1 = dist[0][0] + dist[1][1]
+            distance2 = dist[0][1] + dist[1][0]
+            if (distance1 < distance2):
+                if (dist[0][0] < XTree.NO_CONNECTION):
+                    matching1[0] = 0
+                    matching2[0] = 0
+                    distance1 = dist[0][0]
+                else:
+                    matching1[0] = XTree.DELETE
+                    matching2[0] = XTree.DELETE
+                    distance1 = dist[0][2] + dist[2][0]
+
+                if (dist[1][1] < XTree.NO_CONNECTION):
+                    matching1[1] = 1
+                    matching2[1] = 1
+                    distance1 += dist[1][1]
+                else:
+                    matching1[1] = XTree.DELETE
+                    matching2[1] = XTree.DELETE
+                    distance1 += dist[1][2] + dist[2][1]
+
+                return distance1
+            else:
+                if (dist[0][1] < XTree.NO_CONNECTION):
+                    matching1[0] = 1
+                    matching2[1] = 0
+                    distance2 = dist[0][1]
+                else:
+                    matching1[0] = XTree.DELETE
+                    matching2[1] = XTree.DELETE
+                    distance2 = dist[0][2] + dist[2][1]
+
+                if (dist[1][0] < XTree.NO_CONNECTION):
+                    matching1[1] = 0
+                    matching2[0] = 1
+                    distance2 += dist[1][0]
+                else:
+                    matching1[1] = XTree.DELETE
+                    matching2[0] = XTree.DELETE
+                    distance2 += dist[1][2] + dist[2][0]
+
+                return distance2
+        else:
+            return self.optimalMatching(count1, count2, dist,
+                           matching1, matching2)
+
+
+    # Perform minimal-cost matching between two node lists
+    # @param    count1    length of node list #1
+    # @param    count2    length of node list #2
+    # @param    dist    distance matrix
+    # @param    matching1    matching list (for node list #1)
+    # @param    matching2    matching list (for node list #2)
+    # @return    distance
+
+    def optimalMatching(self, count1, count2, dist, matching1, matching2):
+        # Initialize matching.
+        # Initial guess will be pair-matching between two lists.
+        # Others will be insertion or deletion
+        for i in range(count2):
+            matching1[i] = i
+        for i in range(count2, count1):
+            matching1[i] = XTree.DELETE
+
+        # Three artificial nodes: "start", "end" and "delete".
+        count = count1 + count2 + 3
+
+        # Initialize least cost matrix and path matrix.
+        # Both have been initialized at the very beginning.
+
+        # Start algorithm.
+        while (True):
+            # Construct least cost matrix.
+            self.constructLCM(dist, matching1, count1, count2)
+
+            # Initialize path matrix.
+            for i in range(count):
+                for j in range(count):
+                    self._pathMatrix[i][j] = i
+
+            # Search negative cost circuit.
+            clen = self.searchNCC(count)
+            if (clen > 0):
+                # Modify matching.
+                i = 0
+                next = 0
+                while (i < clen - 1):
+                    n1 = self._circuit[next]
+                    next = self._circuit[next+1]
+                    # Node in node list 1.
+                    if ((n1 > 0) and (n1 <= count1)):
+                        nid1 = n1 - 1
+                        nid2 = self._circuit[next] - count1 - 1
+                        if (nid2 == count2):
+                            nid2 = XTree.DELETE
+
+                        matching1[nid1] = nid2
+                    i += 1
+            else: # Stop.
+                break
+
+        distance = 0
+        # Suppose all insertion on list2
+        for i in range(count2):
+            matching2[i] = XTree.INSERT
+            distance += dist[count1][i]
+
+        # update distance by looking at matching pairs.
+        for i in range(count1):
+            mmm = matching1[i]
+            if (mmm == XTree.DELETE):
+                distance += dist[i][count2]
+            else:
+                matching2[mmm] = i
+                distance += dist[i][mmm] - dist[count1][mmm]
+
+        return distance
+
+
+    # Construct a least cost matrix (of the flow network) based on
+    # the cost matrix
+    # @param    costMatrix    cost matrix
+    # @param    matching    matching information
+    # @param    nodeCount1    # of nodes in node list 1
+    # @param    nodeCount2    # of nodes in node list 2
+
+    def constructLCM(self, costMatrix, matching, nodeCount1, nodeCount2):
+        # Three artificial nodes: "start", "end" and "delete".
+        nodeCount = nodeCount1 + nodeCount2 + 3
+
+        # Initialize.
+        for i in range(nodeCount):
+            for j in range(nodeCount):
+                self._leastCostMatrix[i][j] = XTree.NO_CONNECTION
+
+            # self.
+            self._leastCostMatrix[i][i] = 0
+
+        # Between start node and nodes in list 1.
+        # Start -> node1 = Infinity; node1 -> Start = -0.
+        for i in range(nodeCount1):
+            self._leastCostMatrix[i+1][0] = 0
+
+        # Between nodes in list2 and the end node.
+        # Unless matched (later), node2 -> end = 0
+        # end -> node2 = Infinity.
+        for i in range(nodeCount2):
+            self._leastCostMatrix[i+nodeCount1+1][nodeCount-1] = 0
+
+        deleteCount = 0
+
+        # Between nodes in list1 and nodes in list2.
+        # For matched, node1 -> node2 = Infinity
+        # node2 -> node1 = -1 * distance
+        # For unmatched, node1 -> node2 = distance
+        # node2 -> node1 = Infinity
+        for i in range(nodeCount1):
+            node1 = i + 1
+            
+            # According to cost matrix.
+            for j in range(nodeCount2):
+                node2 = j + nodeCount1 + 1
+                self._leastCostMatrix[node1][node2] = costMatrix[i][j]
+
+            # According to matching.
+            if (matching[i] == XTree.DELETE):
+                deleteCount += 1
+
+                # node1 -> Delete = Infinity
+                # Delete -> node1 = -1 * DELETE_COST
+                self._leastCostMatrix[nodeCount-2][node1] = -1 * costMatrix[i][nodeCount2]
+            else:
+                node2 = matching[i] + nodeCount1 + 1
+
+                # Between node1 and node2.
+                self._leastCostMatrix[node1][node2] = XTree.NO_CONNECTION
+                self._leastCostMatrix[node2][node1] = costMatrix[i][matching[i]] * -1
+
+                # Between node1 and delete.
+                self._leastCostMatrix[node1][nodeCount-2] = costMatrix[i][nodeCount2]
+
+                # Between node2 and end.
+                self._leastCostMatrix[node2][nodeCount-1] = XTree.NO_CONNECTION
+                self._leastCostMatrix[nodeCount-1][node2] = costMatrix[nodeCount1][matching[i]]
+
+        # Between the "Delete" and the "End".
+        # If delete all, delete -> end = Infinity; end -> delete = 0.
+        if (deleteCount == nodeCount1):
+            self._leastCostMatrix[nodeCount-1][nodeCount-2] = 0
+        # if no delete, delete -> end = 0; end -> delete = Infinity.
+        elif (deleteCount == 0):
+            self._leastCostMatrix[nodeCount-2][nodeCount-1] = 0
+        # else, both 0
+        else:
+            self._leastCostMatrix[nodeCount-2][nodeCount-1] = 0
+            self._leastCostMatrix[nodeCount-1][nodeCount-2] = 0
+
+
+    # Search for negative cost circuit in the least cost matrix.
+    # @param    nodeCount    node count
+    # @return    the length of the path if found; otherwise 0
+    def searchNCC(self, nodeCount):
+        for k in range(nodeCount):
+            for i in range(nodeCount):
+                if ((i != k) and (self._leastCostMatrix[i][k] != XTree.NO_CONNECTION)):
+                    for j in range(nodeCount):
+                        if ((j != k) and (self._leastCostMatrix[k][j] != XTree.NO_CONNECTION)):
+                            less = self._leastCostMatrix[i][k] + self._leastCostMatrix[k][j]
+                            if (less < self._leastCostMatrix[i][j]):
+                                self._leastCostMatrix[i][j] = less
+                                self._pathMatrix[i][j] = k
+                                
+                                # Found!
+                                if ((i == j) and (less < 0)):
+                                    clen = 0; # the length of the circuit.
+                                    
+                                    # Locate the circuit.
+                                    #circuit.addElement( Integer(i))
+                                    self._circuit[0] = i
+                                    self._circuit[1] = 2
+                                    
+                                    #circuit.addElement( Integer(pathMatrix[i][i]))
+                                    self._circuit[2] = self._pathMatrix[i][i]
+                                    self._circuit[3] = 4
+                                    
+                                    #circuit.addElement( Integer(i))
+                                    self._circuit[4] = i
+                                    self._circuit[5] = -1
+                                    
+                                    clen = 3
+                                    
+                                    finish = False 
+                                    while (not finish):
+                                        finish = True
+                                        cit = 0
+                                        n = 0
+                                        while (cit < clen - 1):
+                                            left = self._circuit[n]
+                                            next = self._circuit[n + 1]
+                                            if next == -1:
+                                                right = -1
+                                            else:
+                                                right = self._circuit[next]
+                                            
+                                            #int middle = pathMatrix[circuit[n-1]][circuit[n]]
+                                            middle = self._pathMatrix[left][right]
+                                            
+                                            if (middle != left):
+                                                #circuit.insert( cit, middle )
+                                                self._circuit[clen * 2] = middle
+                                                self._circuit[clen * 2 + 1] = next
+                                                self._circuit[n + 1] = clen * 2
+                                                clen += 1
+                                                
+                                                finish = False
+                                                break
+                                            n = next
+                                            cit += 1
+                                    
+                                    return clen
+                    
+        return 0
+
+
+    # For testing purpose -- print out matrixes
+    def printMatrix(self, nodeCount):
+        print "Cost Matrix:"
+        for i in range(nodeCount):
+            for j in range(nodeCount):
+                if (self._leastCostMatrix[i][j] < XTree.NO_CONNECTION):
+                    sys.stdout.write(self._leastCostMatrix[i][j] + "\t")
+                else:
+                    sys.stdout.write("\t")
+            print
+
+        print "\nPath Matrix:"
+        for i in range(nodeCount):
+            for j in range(nodeCount - 1):
+                sys.stdout.write(self._pathMatrix[i][j] + "\t")
+            print self._pathMatrix[i][nodeCount-1]
+
+
+    # Write out the diff result -- how doc1 is changed to doc2
+    # @param    input        the first/old xml document
+    # @param    output        output file name
+    # FIXME this is probably completely wrong ... IO is Java-specific!!!
+    def writeDiff(self, input, output):
+        try:
+            out = codecs.open(output, self._encoding)
+            br =  open(input)
+
+            root1 = self._xtree1.getRoot()
+            root2 = self._xtree2.getRoot()
+
+            # XXX <root > is as valid as <root>,
+            # but < root> is NOT!
+            rootTag = "<" + self._xtree1.getTag(root1)
+            line = br.readLine()
+            while (line != None):
+                if (line.indexOf(rootTag) >= 0):
+                    break
+                out.write(line + "\n")
+                line = br.readLine()
+
+            self._xtree1.getMatching(root1, self._matchp)
+            if (self._matchp[0] == XTree.DELETE):
+                self.writeDeleteNode(out, root1)
+                self.writeInsertNode(out, root2)
+            else:
+                self.writeDiffNode(out, root1, root2)
+
+            out.close()
+        except IOError as (errno, strerror):
+            print >>sys.stderr, strerror
+
+
+    # Write an element that has been deleted from the old document.
+    # @param    out    output file writer
+    # @param    node    element id
+
+    def writeDeleteNode(self, out, node):
+        if (self._xtree1.isElement(node)):
+            tag = self._xtree1.getTag(node)
+            out.write("<" + tag)
+
+            # Attributes.
+            attr = self._xtree1.getFirstAttribute(node)
+            while (attr > 0):
+                atag = self._xtree1.getTag(attr)
+                value = self._xtree1.getAttributeValue(attr)
+                out.write(" " + atag + "=\"" + value + "\"")
+                attr = self._xtree1.getNextAttribute(attr)
+
+            # Child nodes.
+            child = self._xtree1.getFirstChild(node)
+
+            if (child < 0):
+                out.write("/><?DELETE " + tag + "?>\n")
+                self._needNewLine = False
+                return
+
+            out.write("><?DELETE " + tag + "?>\n")
+            self._needNewLine = False
+
+            while (child > 0):
+                self.writeMatchNode(out, self._xtree1, child)
+                child = self._xtree1.getNextSibling(child)
+
+            if (self._needNewLine):
+                out.write("\n")
+                self._needNewLine = False
+
+            out.write("</" + tag + ">\n")
+        else:
+            out.write("<?DELETE \"" + self.constructText(self._xtree1, node) +
+                  "\"?>\n")
+            self._needNewLine = False
+
+
+    # Write an element that has been inserted from the  document.
+    # @param    out    output file writer
+    # @param    node    element id
+
+    def writeInsertNode(self, out, node):
+        if (self._xtree2.isElement(node)):
+            tag = self._xtree2.getTag(node)
+            out.write("<" + tag)
+
+            # Attributes.
+            attr = self._xtree2.getFirstAttribute(node)
+            while (attr > 0):
+                atag = self._xtree2.getTag(attr)
+                value = self._xtree2.getAttributeValue(attr)
+                out.write(" " + atag + "=\"" + value + "\"")
+                attr = self._xtree2.getNextAttribute(attr)
+
+            # Child nodes.
+            child = self._xtree2.getFirstChild(node)
+            if (child < 0):
+                out.write("/><?INSERT " + tag + "?>\n")
+                self._needNewLine = False
+                return
+
+            out.write("><?INSERT " + tag + "?>\n")
+            self._needNewLine = False
+
+            while (child > 0):
+                self.writeMatchNode(out, self._xtree2, child)
+                child = self._xtree2.getNextSibling(child)
+
+            if (self._needNewLine):
+                out.write("\n")
+                self._needNewLine = False
+
+            out.write("</" + tag + ">\n")
+        else:
+            out.write(self.constructText(self._xtree2, node) +
+                  "<?INSERT?>\n")
+            self._needNewLine = False
+
+
+    # Write an element that is unchanged or in a deleted node or in
+    # an inserted node.
+    # @param    out    output file writer
+    # @param    xtree    the document tree
+    # @param    node    element id
+
+    def writeMatchNode(self, out, xtree, node):
+        if (xtree.isElement(node)):
+            tag = xtree.getTag(node)
+            if (self._needNewLine):
+                out.write("\n")
+
+            out.write("<" + tag)
+
+            # Attributes.
+            attr = xtree.getFirstAttribute(node)
+            while (attr > 0):
+                atag = xtree.getTag(attr)
+                value = xtree.getAttributeValue(attr)
+                out.write(" " + atag + "=\"" + value + "\"")
+                attr = xtree.getNextAttribute(attr)
+
+            # Child nodes.
+            child = xtree.getFirstChild(node)
+            if (child < 0):
+                out.write("/>\n")
+                self._needNewLine = False
+                return
+
+            out.write(">")
+            self._needNewLine = True
+
+            while (child > 0):
+                self.writeMatchNode(out, xtree, child)
+                child = xtree.getNextSibling(child)
+
+            if (self._needNewLine):
+                out.write("\n")
+                self._needNewLine = False
+
+            out.write("</" + tag + ">\n")
+        else:
+            out.write(self.constructText(xtree, node))
+            self._needNewLine = False
+
+
+    # Write one node in the diff result.
+    # @param    out    output file writer
+    # @param    node1    the node in the first tree
+    # @param    node2    node1's conterpart in the second tree
+
+    def writeDiffNode(self, out, node1, node2):
+        if (self._xtree1.isElement(node1)):
+            tag = self._xtree1.getTag(node1)
+            if (self._needNewLine):
+                out.write("\n")
+            out.write("<" + tag)
+
+            # Attributes.
+            attr1 = self._xtree1.getFirstAttribute(node1)
+            diffff = ""
+            while (attr1 > 0):
+                atag = self._xtree1.getTag(attr1)
+                value = self._xtree1.getAttributeValue(attr1)
+                self._xtree1.getMatching(attr1, self._matchp)
+                if (self._matchp[0] == XTree.MATCH):
+                    out.write(" " + atag + "=\"" +
+                          value + "\"")
+                elif (self._matchp[0] == XTree.DELETE):
+                    out.write(" " + atag + "=\"" +
+                          value + "\"")
+                    diffff += "<?DELETE " + atag + "?>"
+                else:
+                    value2 = self._xtree2.getAttributeValue(self._matchp[1])
+                    out.write(" " + atag + "=\"" +
+                          value2 + "\"")
+                    diffff += "<?UPDATE " + atag + \
+                          " FROM \"" + value + "\"?>"
+
+                attr1 = self._xtree1.getNextAttribute(attr1)
+
+            attr2 = self._xtree2.getFirstAttribute(node2)
+            while (attr2 > 0):
+                self._xtree2.getMatching(attr2, self._matchp)
+                if (self._matchp[0] == XTree.INSERT):
+                    atag = self._xtree2.getTag(attr2)
+                    value = self._xtree2.getAttributeValue(attr2)
+                    out.write(" " + atag + "=\"" +
+                          value + "\"")
+                    diffff += "<?INSERT " + atag + "?>"
+
+                attr2 = self._xtree2.getNextAttribute(attr2)
+
+            # Child nodes.
+            child1 = self._xtree1.getFirstChild(node1)
+            if (child1 < 0):
+                out.write("/>" + diffff + "\n")
+                self._needNewLine = False
+                return
+
+            out.write(">" + diffff)
+            self._needNewLine = True
+
+            while (child1 > 0):
+                self._xtree1.getMatching(child1, self._matchp)
+                if (self._matchp[0] == XTree.MATCH):
+                    self.writeMatchNode(out, self._xtree1, child1)
+                elif (self._matchp[0] == XTree.DELETE):
+                    self.writeDeleteNode(out, child1)
+                else:
+                    self.writeDiffNode(out, child1, self._matchp[1])
+
+                child1 = self._xtree1.getNextSibling(child1)
+
+            child2 = self._xtree2.getFirstChild(node2)
+            while (child2 > 0):
+                self._xtree2.getMatching(child2, self._matchp)
+                if (self._matchp[0] == XTree.INSERT):
+                    self.writeInsertNode(out, child2)
+
+                child2 = self._xtree2.getNextSibling(child2)
+
+            if (self._needNewLine):
+                out.write("\n")
+                self._needNewLine = False
+
+            out.write("</" + tag + ">\n")
+        else:
+            out.write(self.constructText(self._xtree2, node2) +
+                  "<?UPDATE FROM \"" +
+                  self.constructText(self._xtree1, node1) + "\"?>")
+            self._needNewLine = False
+
+
+    # Construct the text node -- to handle the possible CDATA sections.
+
+    def constructText(self, xtree, eid):
+        text = xtree.getText(eid)
+        cdatalist = xtree.getCDATA(eid)
+        if (cdatalist == None):
+            return text
+
+        buf =  StringBuffer()
+        count = cdatalist.size()
+        lastEnd = 0
+
+        for i in range(0,count,2):
+            cdataStart = int(self.cdatalist[i])
+            cdataEnd = int(self.cdatalist[i+1])
+
+            if (cdataStart > lastEnd):
+                buf.append(text.substring(lastEnd, cdataStart))
+            buf.append("<![CDATA[" +
+                   text.substring(cdataStart, cdataEnd) +
+                   "]]>")
+            lastEnd = cdataEnd
+        if (lastEnd < text.length()):
+            buf.append(text.substring(lastEnd))
+
+        return buf.toString()
+
+def readParameters(args, parameters):
+    opid = 0
+    if (args.length < 3):
+        return False
+    # we are not in the object, so how can we get to these values?
+    # FIXME global module variables?
+    elif (args[0].equals("-o")):
+        _oFlag = True
+        opid += 1
+    elif (args[0].equals("-g")):
+        _gFlag = True
+        opid += 1
+
+    if (args[opid].equals("-p")):
+        opid += 1
+        p = 0
+#        try:
+        p = float(args[opid])
+        opid += 1
+# FIXME ... most likely FloatingPointError
+#        except NumberFormatException:
+#            return False
+
+        if ((p <= 0) or (p > 1)):
+            return False
+        XDiff._NO_MATCH_THRESHOLD = p
+
+    if (args[opid].equals("-e")):
+        opid += 1
+        _encoding = args[opid]
+        opid += 1
+
+    if ((args.length - opid) != 3):
+        return False
+    parameters.add(args[opid])
+    opid += 1
+    parameters.add(args[opid])
+    opid += 1
+    parameters.add(args[opid])
+
+    return True
+
+if __name__ == "__main__":
+    parameters =  []
+    if (not readParameters(sys.argv, parameters)):
+        print >>sys.stderr, __doc__
+        return
+
+    mydiff =  XDiff(parameters[0], parameters[1], parameters[2])
+\ No newline at end of file
diff --git a/XHash.py b/XHash.py
new file mode 100644
index 0000000..f0c1554
--- /dev/null
+++ b/XHash.py
@@ -0,0 +1,310 @@
+# Copyright (c) 2001 - 2005
+#     Yuan Wang. All rights reserved.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright 
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the 
+# documentation and/or other materials provided with the distribution.
+# 3. Redistributions in any form must be accompanied by information on
+# how to obtain complete source code for the X-Diff software and any
+# accompanying software that uses the X-Diff software.  The source code
+# must either be included in the distribution or be available for no
+# more than the cost of distribution plus a nominal fee, and must be
+# freely redistributable under reasonable conditions.  For an executable
+# file, complete source code means the source code for all modules it
+# contains.  It does not include source code for modules or files that
+# typically accompany the major components of the operating system on
+# which the executable file runs.
+
+# THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+# ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+# <code>XHash</code> is an implementaion of DES
+class XHash:
+    private static final int    _initialPermutation[] =
+         57, 49, 41, 33 , 25, 17, 9, 1, 59, 51, 43, 35, 27, 19, 11, 3,
+         61, 53, 45, 37, 29, 21, 13, 5, 63, 55, 47, 39, 31, 23, 15, 7,
+         56, 48, 40, 32 ,24, 16, 8, 0, 58, 50, 42, 34, 26, 18, 10, 2,
+         60, 52, 44, 36, 28, 20, 12, 4, 62, 54, 46, 38, 30, 22, 14, 6
+    };
+
+    private static final int    _finalPermutation[] =
+        39, 7, 47, 15, 55, 23, 63, 31, 38, 6, 46, 14, 54, 22, 62, 30,
+        37, 5, 45, 13, 53, 21, 61, 29, 36, 4, 44, 12, 52, 20, 60, 28,
+        35, 3, 43, 11, 51, 19, 59, 27, 34, 2, 42, 10, 50, 18, 58, 26,
+        33, 1, 41, 9,  49, 17, 57, 25, 32, 0, 40,  8, 48, 16, 56, 24
+    };
+
+    private static final int    _keyReducePermutation[] =
+        60, 52, 44, 36, 59, 51, 43, 35, 27, 19, 11, 3, 58, 50,
+        42, 34, 26, 18, 10, 2, 57, 49, 41, 33, 25, 17, 9, 1,
+        28, 20, 12, 4, 61, 53, 45, 37, 29, 21, 13, 5, 62, 54,
+        46, 38, 30, 22, 14, 6, 63, 55, 47, 39, 31, 23, 15, 7
+    };
+ 
+    private static final int    _keyCompressPermutation[] =
+        24, 27, 20, 6, 14, 10, 3, 22, 0, 17, 7, 12,
+        8, 23,  11, 5, 16, 26, 1, 9, 19, 25, 4, 15,
+        54, 43 ,36, 29, 49, 40, 48, 30, 52, 44, 37, 33,
+        46, 35, 50, 41, 28, 53, 51, 55, 32, 45, 39, 42
+    };
+
+    private static final int    _keyRot[] =
+        1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28
+    };
+
+     private static final int    _sBoxP[][] =
+            0x00808200, 0x00000000, 0x00008000, 0x00808202,
+            0x00808002, 0x00008202, 0x00000002, 0x00008000,
+            0x00000200, 0x00808200, 0x00808202, 0x00000200,
+            0x00800202, 0x00808002, 0x00800000, 0x00000002,
+            0x00000202, 0x00800200, 0x00800200, 0x00008200,
+            0x00008200, 0x00808000, 0x00808000, 0x00800202,
+            0x00008002, 0x00800002, 0x00800002, 0x00008002,
+            0x00000000, 0x00000202, 0x00008202, 0x00800000,
+            0x00008000, 0x00808202, 0x00000002, 0x00808000,
+            0x00808200, 0x00800000, 0x00800000, 0x00000200,
+            0x00808002, 0x00008000, 0x00008200, 0x00800002,
+            0x00000200, 0x00000002, 0x00800202, 0x00008202,
+            0x00808202, 0x00008002, 0x00808000, 0x00800202,
+            0x00800002, 0x00000202, 0x00008202, 0x00808200,
+            0x00000202, 0x00800200, 0x00800200, 0x00000000,
+            0x00008002, 0x00008200, 0x00000000, 0x00808002
+        },
+            0x40084010, 0x40004000, 0x00004000, 0x00084010,
+            0x00080000, 0x00000010, 0x40080010, 0x40004010,
+            0x40000010, 0x40084010, 0x40084000, 0x40000000,
+            0x40004000, 0x00080000, 0x00000010, 0x40080010,
+            0x00084000, 0x00080010, 0x40004010, 0x00000000,
+            0x40000000, 0x00004000, 0x00084010, 0x40080000,
+            0x00080010, 0x40000010, 0x00000000, 0x00084000,
+            0x00004010, 0x40084000, 0x40080000, 0x00004010,
+            0x00000000, 0x00084010, 0x40080010, 0x00080000,
+            0x40004010, 0x40080000, 0x40084000, 0x00004000,
+            0x40080000, 0x40004000, 0x00000010, 0x40084010,
+            0x00084010, 0x00000010, 0x00004000, 0x40000000,
+            0x00004010, 0x40084000, 0x00080000, 0x40000010,
+            0x00080010, 0x40004010, 0x40000010, 0x00080010,
+            0x00084000, 0x00000000, 0x40004000, 0x00004010,
+            0x40000000, 0x40080010, 0x40084010, 0x00084000
+        },
+            0x00000104, 0x04010100, 0x00000000, 0x04010004,
+            0x04000100, 0x00000000, 0x00010104, 0x04000100,
+            0x00010004, 0x04000004, 0x04000004, 0x00010000,
+            0x04010104, 0x00010004, 0x04010000, 0x00000104,
+            0x04000000, 0x00000004, 0x04010100, 0x00000100,
+            0x00010100, 0x04010000, 0x04010004, 0x00010104,
+            0x04000104, 0x00010100, 0x00010000, 0x04000104,
+            0x00000004, 0x04010104, 0x00000100, 0x04000000,
+            0x04010100, 0x04000000, 0x00010004, 0x00000104,
+            0x00010000, 0x04010100, 0x04000100, 0x00000000,
+            0x00000100, 0x00010004, 0x04010104, 0x04000100,
+            0x04000004, 0x00000100, 0x00000000, 0x04010004,
+            0x04000104, 0x00010000, 0x04000000, 0x04010104,
+            0x00000004, 0x00010104, 0x00010100, 0x04000004,
+            0x04010000, 0x04000104, 0x00000104, 0x04010000,
+            0x00010104, 0x00000004, 0x04010004, 0x00010100
+        },
+            0x80401000, 0x80001040, 0x80001040, 0x00000040,
+            0x00401040, 0x80400040, 0x80400000, 0x80001000,
+            0x00000000, 0x00401000, 0x00401000, 0x80401040,
+            0x80000040, 0x00000000, 0x00400040, 0x80400000,
+            0x80000000, 0x00001000, 0x00400000, 0x80401000,
+            0x00000040, 0x00400000, 0x80001000, 0x00001040,
+            0x80400040, 0x80000000, 0x00001040, 0x00400040,
+            0x00001000, 0x00401040, 0x80401040, 0x80000040,
+            0x00400040, 0x80400000, 0x00401000, 0x80401040,
+            0x80000040, 0x00000000, 0x00000000, 0x00401000,
+            0x00001040, 0x00400040, 0x80400040, 0x80000000,
+            0x80401000, 0x80001040, 0x80001040, 0x00000040,
+            0x80401040, 0x80000040, 0x80000000, 0x00001000,
+            0x80400000, 0x80001000, 0x00401040, 0x80400040,
+            0x80001000, 0x00001040, 0x00400000, 0x80401000,
+            0x00000040, 0x00400000, 0x00001000, 0x00401040
+        },
+            0x00000080, 0x01040080, 0x01040000, 0x21000080,
+            0x00040000, 0x00000080, 0x20000000, 0x01040000,
+            0x20040080, 0x00040000, 0x01000080, 0x20040080,
+            0x21000080, 0x21040000, 0x00040080, 0x20000000,
+            0x01000000, 0x20040000, 0x20040000, 0x00000000,
+            0x20000080, 0x21040080, 0x21040080, 0x01000080,
+            0x21040000, 0x20000080, 0x00000000, 0x21000000,
+            0x01040080, 0x01000000, 0x21000000, 0x00040080,
+            0x00040000, 0x21000080, 0x00000080, 0x01000000,
+            0x20000000, 0x01040000, 0x21000080, 0x20040080,
+            0x01000080, 0x20000000, 0x21040000, 0x01040080,
+            0x20040080, 0x00000080, 0x01000000, 0x21040000,
+            0x21040080, 0x00040080, 0x21000000, 0x21040080,
+            0x01040000, 0x00000000, 0x20040000, 0x21000000,
+            0x00040080, 0x01000080, 0x20000080, 0x00040000,
+            0x00000000, 0x20040000, 0x01040080, 0x20000080
+        },
+            0x10000008, 0x10200000, 0x00002000, 0x10202008,
+            0x10200000, 0x00000008, 0x10202008, 0x00200000,
+            0x10002000, 0x00202008, 0x00200000, 0x10000008,
+            0x00200008, 0x10002000, 0x10000000, 0x00002008,
+            0x00000000, 0x00200008, 0x10002008, 0x00002000,
+            0x00202000, 0x10002008, 0x00000008, 0x10200008,
+            0x10200008, 0x00000000, 0x00202008, 0x10202000,
+            0x00002008, 0x00202000, 0x10202000, 0x10000000,
+            0x10002000, 0x00000008, 0x10200008, 0x00202000,
+            0x10202008, 0x00200000, 0x00002008, 0x10000008,
+            0x00200000, 0x10002000, 0x10000000, 0x00002008,
+            0x10000008, 0x10202008, 0x00202000, 0x10200000,
+            0x00202008, 0x10202000, 0x00000000, 0x10200008,
+            0x00000008, 0x00002000, 0x10200000, 0x00202008,
+            0x00002000, 0x00200008, 0x10002008, 0x00000000,
+            0x10202000, 0x10000000, 0x00200008, 0x10002008
+        },
+            0x00100000, 0x02100001, 0x02000401, 0x00000000,
+            0x00000400, 0x02000401, 0x00100401, 0x02100400,
+            0x02100401, 0x00100000, 0x00000000, 0x02000001,
+            0x00000001, 0x02000000, 0x02100001, 0x00000401,
+            0x02000400, 0x00100401, 0x00100001, 0x02000400,
+            0x02000001, 0x02100000, 0x02100400, 0x00100001,
+            0x02100000, 0x00000400, 0x00000401, 0x02100401,
+            0x00100400, 0x00000001, 0x02000000, 0x00100400,
+            0x02000000, 0x00100400, 0x00100000, 0x02000401,
+            0x02000401, 0x02100001, 0x02100001, 0x00000001,
+            0x00100001, 0x02000000, 0x02000400, 0x00100000,
+            0x02100400, 0x00000401, 0x00100401, 0x02100400,
+            0x00000401, 0x02000001, 0x02100401, 0x02100000,
+            0x00100400, 0x00000000, 0x00000001, 0x02100401,
+            0x00000000, 0x00100401, 0x02100000, 0x00000400,
+            0x02000001, 0x02000400, 0x00000400, 0x00100001
+        },
+            0x08000820, 0x00000800, 0x00020000, 0x08020820,
+            0x08000000, 0x08000820, 0x00000020, 0x08000000,
+            0x00020020, 0x08020000, 0x08020820, 0x00020800,
+            0x08020800, 0x00020820, 0x00000800, 0x00000020,
+            0x08020000, 0x08000020, 0x08000800, 0x00000820,
+            0x00020800, 0x00020020, 0x08020020, 0x08020800,
+            0x00000820, 0x00000000, 0x00000000, 0x08020020,
+            0x08000020, 0x08000800, 0x00020820, 0x00020000,
+            0x00020820, 0x00020000, 0x08020800, 0x00000800,
+            0x00000020, 0x08020020, 0x00000800, 0x00020820,
+            0x08000800, 0x00000020, 0x08000020, 0x08020000,
+            0x08020020, 0x08000000, 0x00020000, 0x08000820,
+            0x00000000, 0x08020820, 0x00020020, 0x08000020,
+            0x08020000, 0x08000800, 0x08000820, 0x00000000,
+            0x08020820, 0x00020800, 0x00020800, 0x00000820,
+            0x00000820, 0x00020020, 0x08000000, 0x08020800
+    };
+
+    private static long    _keys[];
+    private static char    _word[];
+
+    private static long    _initialKey = 1007360890380L;
+
+    /**
+# Initialization #1.
+    public static void initialize()
+        makeKeys(_initialKey);
+        _word = new char[64];
+
+    /**
+# Initialization #2.
+    public static void initialize(long key)
+        makeKeys(key);
+        _word = new char[64];
+    
+    public static long hash(String word)
+        int    len = word.length();
+        long    value = 0L;
+        for (int start = 0; start < len; start += 64)
+            if (len - start > 64)
+                value += (_hash(word.substring(start, start + 64)) ^ 0xffffffffL);
+            else
+                value += (_hash(word.substring(start)) ^ 0xffffffffL);
+                break;
+        return value;
+
+    /**
+# The actual hash function.
+    private static long _hash(String word)
+        int    len = word.length();
+        for (int i = 0; i < len; i++)
+            _word[i] = word.charAt(i);
+        int    round = len / 8;
+        int    rest = len % 8;
+        if (rest > 0)
+            for (int i = 0; i < 8 - rest; i++)
+                _word[len+i] = 0;
+            round++;
+
+        int    value = 0;
+        for (int i = 0, pos = 0; i < round; i++, pos += 8)
+            long    todo = 0L + (byte)_word[pos];
+            for (int j = 1; j < 8; j++)
+                todo = (todo << 8) + (byte)_word[pos+j];
+            value += des(todo);
+
+        return value;
+
+    private static void makeKeys(long key)
+        long reduced = permutate(key, _keyReducePermutation);
+        int l = (int)(reduced >> 28);
+        int r = (int)(reduced & 0xfffffff);
+        _keys = new long[16];
+        for (int i = 0; i < 16; i++)
+            _keys[i] = permutate(rotate(l, r, _keyRot[i]),
+                        _keyCompressPermutation);
+
+    private static long des(long w)
+        long    x = permutate(w, _initialPermutation);
+        int    l = (int)(x >>> 32);
+        int    r = (int)x;
+        for (int i = 0; i < 16; i++)
+            int    tmp = desFunc(r, _keys[i]) ^ l;
+            l = r;
+            r = tmp;
+        long    y = ((long)r << 32) | ((long)l & 0xffffffffL);
+        return permutate(y, _finalPermutation);
+
+    private static long permutate(long k, int p[])
+        long    s = 0;
+        for (int i = 0; i < p.length; i++)
+            if ((k & (1L << p[i])) != 0)
+                s |= 1L << i;
+
+        return s;
+
+    private static long rotate(int l, int r, int s)
+        return ((long)(((l<<s) & 0xfffffff) | (l>>>(28 - s))) << 28) |
+            ((r<<s) & 0xfffffff) | (r>> (28 - s));
+
+    private static int desFunc(int x, long k)
+        int p = x >>> 27;
+        int q = (p & 3) << 4;
+        int r = x << 5;
+        p |=  r;
+        r = _sBoxP[0][(int)((k >> 42) ^ p) & 0x3f];
+        p >>>= 4;
+        r |= _sBoxP[7][(int)((k >> 0) ^ p) & 0x3f];
+        p >>>= 4;
+        r |= _sBoxP[6][(int)((k >> 6) ^ p) & 0x3f];
+        p >>>= 4;
+        r |= _sBoxP[5][(int)((k >> 12) ^ p) & 0x3f];
+        p >>>= 4;
+        r |= _sBoxP[4][(int)((k >> 18) ^ p) & 0x3f];
+        p >>>= 4;
+        r |= _sBoxP[3][(int)((k >> 24) ^ p) & 0x3f];
+        p >>>= 4;
+        r |= _sBoxP[2][(int)((k >> 30) ^ p) & 0x3f];
+        p >>>= 4;
+        r |= _sBoxP[1][(int)((k >> 36) ^ (p | q)) & 0x3f];
+        return r;
diff --git a/XLut.py b/XLut.py
new file mode 100644
index 0000000..98dc5b6
--- /dev/null
+++ b/XLut.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2001 - 2005
+#     Yuan Wang. All rights reserved.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright 
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the 
+# documentation and/or other materials provided with the distribution.
+# 3. Redistributions in any form must be accompanied by information on
+# how to obtain complete source code for the X-Diff software and any
+# accompanying software that uses the X-Diff software.  The source code
+# must either be included in the distribution or be available for no
+# more than the cost of distribution plus a nominal fee, and must be
+# freely redistributable under reasonable conditions.  For an executable
+# file, complete source code means the source code for all modules it
+# contains.  It does not include source code for modules or files that
+# typically accompany the major components of the operating system on
+# which the executable file runs.
+
+# THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+# ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+import XTree
+
+# <code>XLut</code> is the hash lookup table for node distance.
+class XLut:
+
+# Constructor.
+    def __init__(self):
+        self._xTable = {} 
+
+# Add a node pair and their distance to this table.
+# @param    eid1    element id #1
+# @param    eid2    element id #2
+# @param    dist    distance
+    def add(self, eid1, eid2, dist):
+        key = eid1
+        key = key << 32
+        key += eid2
+
+        self._xTable[key] = int(dist)
+
+# Get the distance of a node pair.
+# @param    eid1    element id #1
+# @param    eid2    element id #2
+# @return    distance or -1 if not found
+    def get(self, eid1, eid2):
+        key = eid1
+        key = key << 32
+        key += eid2
+
+        value = self._xTable[key]
+        if value == None:
+            return XTree.NO_CONNECTION
+        else:
+            return int(value)
diff --git a/XParser.py b/XParser.py
new file mode 100644
index 0000000..dcfedc4
--- /dev/null
+++ b/XParser.py
@@ -0,0 +1,269 @@
+# Copyright (c) 2001 - 2005
+#     Yuan Wang. All rights reserved.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright 
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the 
+# documentation and/or other materials provided with the distribution.
+# 3. Redistributions in any form must be accompanied by information on
+# how to obtain complete source code for the X-Diff software and any
+# accompanying software that uses the X-Diff software.  The source code
+# must either be included in the distribution or be available for no
+# more than the cost of distribution plus a nominal fee, and must be
+# freely redistributable under reasonable conditions.  For an executable
+# file, complete source code means the source code for all modules it
+# contains.  It does not include source code for modules or files that
+# typically accompany the major components of the operating system on
+# which the executable file runs.
+
+# THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+# ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import xml.sax
+_PARSER_NAME = "org.apache.xerces.parsers.SAXParser";
+
+# FIXME
+# This is interesting
+# http://www.virtuousprogrammer.com/?page_id=183
+# http://docs.python.org/library/xml.sax.reader.html
+# <code>XParser</code> parses an input XML document and constructs an
+# <code>XTree</code>
+# class XParser extends DefaultHandler implements LexicalHandler
+class XParser(xml.sax.handler.ContentHandler):
+    _setValidation = False
+    _setNameSpaces = True
+    _setSchemaSupport = True
+    _setSchemaFullSupport = False
+    _setNameSpacePrefixes = True
+
+    _STACK_SIZE = 100
+
+    private XMLReader    _parser;
+    private XTree        _xtree;
+    private int        _idStack[], _lsidStack[]; // id and left sibling
+    private long        _valueStack[];
+    private int        _stackTop, _currentNodeID;
+    private boolean        _readElement;
+    private StringBuffer    _elementBuffer;
+
+# Constructor.
+    def __init__(self):
+    {
+        XHash.initialize();
+        try
+        {
+            _parser = (XMLReader)Class.forName(_PARSER_NAME).newInstance();
+            _parser.setFeature("http://xml.org/sax/features/validation", _setValidation);
+            _parser.setFeature("http://xml.org/sax/features/namespaces", _setNameSpaces);
+            _parser.setFeature("http://apache.org/xml/features/validation/schema", _setSchemaSupport);
+            _parser.setFeature("http://apache.org/xml/features/validation/schema-full-checking", _setSchemaFullSupport);
+             _parser.setFeature("http://xml.org/sax/features/namespace-prefixes", _setNameSpacePrefixes);
+
+            _parser.setContentHandler(this);
+            _parser.setErrorHandler(this);
+            _parser.setProperty("http://xml.org/sax/properties/lexical-handler", this);
+        }
+        catch (Exception e)
+        {
+            System.err.println(e.getMessage());
+            System.exit(1);
+        }
+
+        _idStack = new int[_STACK_SIZE];
+        _lsidStack = new int[_STACK_SIZE];
+        _valueStack = new long[_STACK_SIZE];
+        _stackTop = 0;
+        _currentNodeID = XTree.NULL_NODE;
+        _elementBuffer = new StringBuffer();
+    }
+
+# Parse an XML document
+# @param    uri    input XML document
+# @return    the created XTree
+    def parse(String uri):
+    {
+        _xtree = new XTree();
+        _idStack[_stackTop] = XTree.NULL_NODE;
+        _lsidStack[_stackTop] = XTree.NULL_NODE;
+
+        try
+        {
+            _parser.parse(uri);
+        }
+        catch (Exception e)
+        {
+            System.err.println(e.getMessage());
+            System.exit(1);
+        }
+
+        return _xtree;
+    }
+
+    // Document handler methods
+
+    public void startElement(String uri, String local, String raw,
+                 Attributes attrs)
+    {
+        // if text is mixed with elements
+        if (_elementBuffer.length() > 0)
+        {
+            String    text = _elementBuffer.toString().trim();
+            if (text.length() > 0)
+            {
+                long    value = XHash.hash(text);
+                int    tid = _xtree.addText(_idStack[_stackTop], _lsidStack[_stackTop], text, value);
+                _lsidStack[_stackTop] = tid;
+                _currentNodeID = tid;
+                _valueStack[_stackTop] += value;
+            }
+        }
+
+        int    eid = _xtree.addElement(_idStack[_stackTop],
+                        _lsidStack[_stackTop], local);
+
+        // Update last sibling info.
+        _lsidStack[_stackTop] = eid;
+
+        // Push
+        _stackTop++;
+        _idStack[_stackTop] = eid;
+        _currentNodeID = eid;
+        _lsidStack[_stackTop] = XTree.NULL_NODE;
+        _valueStack[_stackTop] = XHash.hash(local);
+
+        // Take care of attributes
+        if ((attrs != null) && (attrs.getLength() > 0))
+        {
+            for (int i = 0; i < attrs.getLength(); i++)
+            {
+                String    name = attrs.getQName(i);
+                String    value = attrs.getValue(i);
+                long    namehash = XHash.hash(name);
+                long    valuehash = XHash.hash(value);
+                long    attrhash = namehash * namehash +
+                           valuehash * valuehash;
+                int    aid = _xtree.addAttribute(eid, _lsidStack[_stackTop], name, value, namehash, attrhash);
+
+                _lsidStack[_stackTop] = aid;
+                _currentNodeID = aid + 1;
+                _valueStack[_stackTop] += attrhash * attrhash;
+            }
+        }
+
+        _readElement = True;
+        _elementBuffer = new StringBuffer();
+    }
+
+    def characters(char ch[], int start, int length):
+    {
+        _elementBuffer.append(ch, start, length);
+    }
+
+    def endElement(String uri, String local, String raw):
+    {
+        if (_readElement)
+        {
+            if (_elementBuffer.length() > 0)
+            {
+                String    text = _elementBuffer.toString();
+                long    value = XHash.hash(text);
+                _currentNodeID =
+                    _xtree.addText(_idStack[_stackTop],
+                               _lsidStack[_stackTop],
+                               text, value);
+                _valueStack[_stackTop] += value;
+            }
+            else    // an empty element
+            {
+                _currentNodeID =
+                    _xtree.addText(_idStack[_stackTop],
+                               _lsidStack[_stackTop],
+                               "", 0);
+            }
+            _readElement = False;
+        }
+        else
+        {
+            if (_elementBuffer.length() > 0)
+            {
+                String    text = _elementBuffer.toString().trim();
+                // More text nodes before end of the element.
+                if (text.length() > 0)
+                {
+                    long    value = XHash.hash(text);
+                    _currentNodeID =
+                      _xtree.addText(_idStack[_stackTop],
+                             _lsidStack[_stackTop],
+                             text, value);
+                    _valueStack[_stackTop] += value;
+                }
+            }
+        }
+
+        _elementBuffer = new StringBuffer();
+        _xtree.addHashValue(_idStack[_stackTop],
+                    _valueStack[_stackTop]);
+        _valueStack[_stackTop-1] += _valueStack[_stackTop] *
+                        _valueStack[_stackTop];
+        _lsidStack[_stackTop-1] = _idStack[_stackTop];
+
+        // Pop
+        _stackTop--;
+    }
+
+    // End of document handler methods
+
+    // Lexical handler methods.
+
+    def startCDATA():
+    {
+        // The text node id should be the one next to the current
+        // node id.
+        int    textid = _currentNodeID + 1;
+        String    text = _elementBuffer.toString();
+        _xtree.addCDATA(textid, text.length());
+    }
+
+    def endCDATA():
+    {
+        int    textid = _currentNodeID + 1;
+        String    text = _elementBuffer.toString();
+        _xtree.addCDATA(textid, text.length());
+    }
+
+    // Following functions are not implemented.
+    def comment(char[] ch, int start, int length):
+    {
+    }
+
+    def startDTD(String name, String publicId, String systemId):
+    {
+    }
+
+    def endDTD():
+    {
+    }
+
+    def startEntity(String name):
+    {
+    }
+
+    def endEntity(String name):
+    {
+    }
+
+    // End of lexical handler methods.
+}
diff --git a/XTree.py b/XTree.py
new file mode 100644
index 0000000..eef0af9
--- /dev/null
+++ b/XTree.py
@@ -0,0 +1,423 @@
+# Copyright (c) 2001 - 2005
+#     Yuan Wang. All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright 
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the 
+# documentation and/or other materials provided with the distribution.
+# 3. Redistributions in any form must be accompanied by information on
+# how to obtain complete source code for the X-Diff software and any
+# accompanying software that uses the X-Diff software.  The source code
+# must either be included in the distribution or be available for no
+# more than the cost of distribution plus a nominal fee, and must be
+# freely redistributable under reasonable conditions.  For an executable
+# file, complete source code means the source code for all modules it
+# contains.  It does not include source code for modules or files that
+# typically accompany the major components of the operating system on
+# which the executable file runs.
+# 
+# THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
+# ARE DISCLAIMED.  IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
+# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+MATCH = 0
+CHANGE = 1
+NO_MATCH = -1
+INSERT = -1
+DELETE = -1
+NULL_NODE = -1
+NO_CONNECTION = 1048576
+
+_TOP_LEVEL_CAPACITY = 16384
+_BOT_LEVEL_CAPACITY = 4096
+
+
+# <code>XTree</code> provides a DOM-like interface but somehow simplified
+# Ideally, it can be replaced by any other DOM parser output tree structures.
+class XTree:
+#    private     _topCap, _botCap
+#    private     _elementIndex, _tagIndex, self._valueCount
+#    private        self._firstChild[][], self._nextSibling[][]
+#    private     self._childrenCount[][], _valueIndex[][]
+#    private boolean        self._isAttribute[][]
+#    private     self._matching[][]
+#    private long        self._hashValue[][]
+#    private String        _value[][]
+#    private Hashtable    self._tagNames, _cdataTable
+
+    def __init__(self, topcap=None, botcap=None):
+        self._topCap = _TOP_LEVEL_CAPACITY
+        self._botCap = _BOT_LEVEL_CAPACITY
+        if topcap:
+            self._topCap = topcap
+        if botcap:
+            self._botCap = botcap
+        self._initialize()
+
+    # Initialization.
+    def _initialize(self):
+        self._root = 0
+        self._firstChild    = []
+        self._nextSibling    = []
+        self._isAttribute    = []
+        self._valueIndex    = []
+        self._matching    = []
+        self._childrenCount    = []
+        self._hashValue    = []
+        self._value        = []
+
+        self._value[0]    = []
+        self._tagNames    = []
+
+        # This hashtable is used to record CDATA section info.
+        # The key is the text node id, the value is the list of 
+        # (start,end) position pair of each CDATA section.
+        self._cdataTable    = {}
+
+        self._elementIndex    = -1
+        self._tagIndex    = -1
+        self._valueCount    = self._botCap - 1
+
+    # ID Expansion
+    def _expand(self, topid):
+        self._firstChild[topid]    = []
+        self._nextSibling[topid]    = []
+        self._childrenCount[topid]    = []
+        self._matching[topid]    = []
+        self._valueIndex[topid]    = []
+        self._hashValue[topid]    = []
+        self._isAttribute[topid]    = []
+
+        for i in range(self._botCap):
+            self._firstChild[topid][i]    = NULL_NODE
+            self._nextSibling[topid][i]    = NULL_NODE
+            self._childrenCount[topid][i]= 0
+            self._matching[topid][i]    = MATCH
+            self._valueIndex[topid][i]    = -1
+            self._isAttribute[topid][i]    = False
+
+    # Start  -- methods for constructing a tree.
+    # Add a new element to the tree.
+    # @param    pid        parent id
+    # @param    lsid        left-side sibling id
+    # @param    tagName        element name
+    # @return    the element id in the tree.
+    def addElement(self, pid, lsid, tagName):
+        self._elementIndex += 1
+
+        topid = self._elementIndex / self._botCap
+        botid = self._elementIndex % self._botCap
+        if (botid == 0):
+            self._expand(topid)
+
+        # Check if we've already had the tag
+        tagID = self._tagNames[tagName]
+        if (tagID != None):
+            self._valueIndex[topid][botid] = tagID.intValue()
+        else:
+            self._tagIndex += 1
+            tagID = int(self._tagIndex)
+            self._value[0][self._tagIndex] = tagName
+            self._tagNames.append(tagName, tagID)
+            self._valueIndex[topid][botid] = self._tagIndex
+
+        if (pid == NULL_NODE):
+            return self._elementIndex
+
+        ptopid = pid / self._botCap
+        pbotid = pid % self._botCap
+        # parent-child relation or sibling-sibling relation
+        if (lsid == NULL_NODE):
+            self._firstChild[ptopid][pbotid] = self._elementIndex
+        else:
+            self._nextSibling[lsid/self._botCap][lsid%self._botCap] = self._elementIndex
+
+        # update children count
+        self._childrenCount[ptopid][pbotid] += 1
+
+        return self._elementIndex
+
+    # Add a text node.
+    # @param    eid    element id
+    # @param    lsid    the sibling id on the left
+    # @param    text    text value
+    # @param    value    hash value
+    def addText(self, eid, lsid, text, value):
+        self._elementIndex += 1
+        topid = self._elementIndex / self._botCap
+        botid = self._elementIndex % self._botCap
+        if (botid == 0):
+            self._expand(topid)
+
+        etopid = eid / self._botCap
+        ebotid = eid % self._botCap
+        if (lsid == NULL_NODE):
+            self._firstChild[etopid][ebotid] = self._elementIndex
+        else:
+            self._nextSibling[lsid/self._botCap][lsid%self._botCap] = self._elementIndex
+
+        self._childrenCount[etopid][ebotid] += 1
+        self._hashValue[topid][botid] = value
+
+        self._valueCount += 1
+        vtopid = self._valueCount / self._botCap
+        vbotid = self._valueCount % self._botCap
+        if (vbotid == 0):
+            self._value[vtopid] = str[self._botCap]
+
+        self._value[vtopid][vbotid] = text
+        self._valueIndex[topid][botid] = self._valueCount
+
+        return self._elementIndex
+
+    # Add an attribute.
+    # @param    eid    element id
+    # @param    lsid    the sibling id on the left
+    # @param    name    attribute name
+    # @param    value    attribute value
+    # @param    valuehash    hash value of the value
+    # @param    attrhash    hash value of the entire attribute
+    # @return    the element id of the attribute
+    def addAttribute(self, eid, lsid, name, value, valuehash, attrhash):
+        # attribute name first.
+        aid = self.addElement(eid, lsid, name)
+
+        # attribute value second.
+        self.addText(aid, NULL_NODE, value, valuehash)
+
+        # hash value third
+        atopid = aid / self._botCap
+        abotid = aid % self._botCap
+        self._isAttribute[atopid][abotid] = True
+        self._hashValue[atopid][abotid] = attrhash
+
+        return aid
+
+    # Add more information (hash value) to an element node.
+    # @param    eid    element id
+    # @param    value    extra hash value
+    def addHashValue(self, eid, value):
+        self._hashValue[eid/self._botCap][eid%self._botCap] = value
+
+    # Add a CDATA section (either a start or an end) to the CDATA
+    # hashtable, in which each entry should have an even number of
+    # position slots.
+    # @param    eid        The text node id
+    # @param    position    the section tag position
+    def addCDATA(self, eid, position):
+        key = int(eid)
+        value = self._cdataTable[key]
+        if (value == None):
+            elem_list = []
+            elem_list.append(position)
+            self._cdataTable[key] = elem_list
+        else:
+            elem_list = value
+            elem_list.append(position)
+            self._cdataTable[key] = elem_list
+
+    # Add matching information.
+    # @param    eid    element id
+    # @param    match    ?match and matched element id
+    def addMatching(self, eid, match):
+        if (match[0] == NO_MATCH):
+            self._matching[eid/self._botCap][eid%self._botCap] = NO_MATCH
+        elif (match[0] == MATCH):
+            self._matching[eid/self._botCap][eid%self._botCap] = MATCH
+        else:
+            self._matching[eid/self._botCap][eid%self._botCap] = match[1] + 1
+
+    # End  -- methods for constructing a tree.
+
+    # Start -- methods for accessing a tree.
+
+    # Get matching information.
+    # @param    eid    element id
+    # @param    match    ?change and matched element id 
+    def getMatching(self, eid, match):
+        mid = self._matching[eid/self._botCap][eid%self._botCap]
+        if (mid == NO_MATCH):
+            match[0] = NO_MATCH
+        elif (mid == MATCH):
+            match[0] = MATCH
+        else:
+            match[0] = CHANGE
+            match[1] = mid - 1
+
+    # Get the root element id.
+    def getRoot(self):
+        return self._root
+
+    # Get the first child of a node.
+    # @param    eid    element id
+    def getFirstChild(self, eid):
+        cid = self._firstChild[eid/self._botCap][eid%self._botCap]
+        while (cid > self._root):
+            ctopid = cid / self._botCap
+            cbotid = cid % self._botCap
+            if (self._isAttribute[ctopid][cbotid]):
+                cid = self._nextSibling[ctopid][cbotid]
+            else:
+                return cid
+
+        return NULL_NODE
+
+    # Get the next sibling of a node.
+    # @param    eid    element id
+    def getNextSibling(self, eid):
+        return self._nextSibling[eid/self._botCap][eid%self._botCap]
+
+    # Get the first attribute of a node.
+    # @param    eid    element id
+    def getFirstAttribute(self, eid):
+        aid = self._firstChild[eid/self._botCap][eid%self._botCap]
+        if ((aid > self._root) and (self._isAttribute[aid/self._botCap][aid%self._botCap])):
+            return aid
+        else:
+            return NULL_NODE
+
+    # Get the next attribute of a node.
+    # @param    aid    attribute id
+    def getNextAttribute(self, aid):
+        aid1 = self._nextSibling[aid/self._botCap][aid%self._botCap]
+        if ((aid1 > self._root) and (self._isAttribute[aid1/self._botCap][aid1%self._botCap])):
+            return aid1
+        else:
+            return NULL_NODE
+
+    # Get the attribute value.
+    # @param    aid    attribute id
+    def getAttributeValue(self, aid):
+        cid = self._firstChild[aid/self._botCap][aid%self._botCap]
+        index = self._valueIndex[cid/self._botCap][cid%self._botCap]
+        if (index > 0):
+            return self._value[index/self._botCap][index%self._botCap]
+        else:
+            return ""
+
+    # Get the hash value of a node.
+    # @param    eid    element id
+    def getHashValue(self, eid):
+        return self._hashValue[eid/self._botCap][eid%self._botCap]
+
+    # Get the CDATA section position list of a text node.
+    # @param    eid    element id
+    # @return    position list which is a vector or None if no CDATA
+    def getCDATA(self, eid):
+        return self._cdataTable[eid]
+
+    # Get the childern count of a node.
+    # @param    eid    element id
+    def getChildrenCount(self, eid):
+        return self._childrenCount[eid/self._botCap][eid%self._botCap]
+
+    # Get the # of all decendents of a node.
+    # @param    eid    element id
+    def getDecendentsCount(self, eid):
+        topid = eid / self._botCap
+        botid = eid % self._botCap
+        count = self._childrenCount[topid][botid]
+        if (count == 0):
+            return 0
+
+        cid = self._firstChild[topid][botid]
+        while (cid > NULL_NODE):
+            count += self.getDecendentsCount(cid)
+            cid = self._nextSibling[cid/self._botCap][cid%self._botCap]
+
+        return count
+
+    # Get the value index of a node
+    # @param    eid    element id
+    def getValueIndex(self, eid):
+        return self._valueIndex[eid/self._botCap][eid%self._botCap]
+
+    # Get the value of a leaf node
+    # @param    index    value index
+    def getValue(self, index):
+        return self._value[index/self._botCap][index%self._botCap]
+
+    # Get the tag of an element node
+    # @param    eid    element id
+    def getTag(self, eid):
+        index = self._valueIndex[eid/self._botCap][eid%self._botCap]
+        return    self._value[0][index]
+
+    # Get the text value of a leaf node
+    # @param    eid    element id
+    def getText(self, eid):
+        index = self._valueIndex[eid/self._botCap][eid%self._botCap]
+        if (index >= self._botCap):
+            return self._value[index/self._botCap][index%self._botCap]
+        else:
+            return ""
+
+    # Check if a node an element node.
+    # @param    eid    element id
+    def isElement(self, eid):
+        vindex = self._valueIndex[eid/self._botCap][eid%self._botCap]
+        if (vindex < self._botCap):
+            return True
+        else:
+            return False
+
+    # Check if a node is an attribute node.
+    # @param    eid    element id
+    def isAttribute(self, eid):
+        return self._isAttribute[eid/self._botCap][eid%self._botCap]
+
+    # Check if a node an leaf text node.
+    # @param    edi    element id
+    def isLeaf(self, eid):
+        index = self._valueIndex[eid/self._botCap][eid%self._botCap]
+        if (index < self._botCap):
+            return False
+        else:
+            return True
+
+    # End  -- methods for accessing a tree.
+
+    # For testing purpose.
+    def dump(self, eid = None):
+        if eid:
+            topid = eid / self._botCap
+            botid = eid % self._botCap
+            vid = self._valueIndex[topid][botid]
+            vtopid = vid / self._botCap
+            vbotid = vid % self._botCap
+            print eid + "\t" + \
+                       self._firstChild[topid][botid] + "\t" + \
+                       self._nextSibling[topid][botid] + "\t" + \
+                       self._isAttribute[topid][botid] + "\t" + \
+                       self._childrenCount[topid][botid] + "\t" + \
+                       self._hashValue[topid][botid] + "\t" + \
+                       self._matching[topid][botid] + "\t" + \
+                       self._value[vtopid][vbotid]
+        else:
+            print "eid\tfirstC\tnextS\tattr?\tcCount\thash\tmatch\tvalue"
+            for i in range(self._root,self._elementIndex+1):
+                topid = i / self._botCap
+                botid = i % self._botCap
+                vid = self._valueIndex[topid][botid]
+                vtopid = vid / self._botCap
+                vbotid = vid % self._botCap
+                print i + "\t" + \
+                           self._firstChild[topid][botid] + "\t" + \
+                           self._nextSibling[topid][botid] + "\t" + \
+                           self._isAttribute[topid][botid] + "\t" + \
+                           self._childrenCount[topid][botid] + "\t" + \
+                           self._hashValue[topid][botid] + "\t" + \
+                           self._matching[topid][botid] + "\t" + \
+                           self._value[vtopid][vbotid]