From aaab0e1e065a31c25a81d972d0b2e5221cbcba4d Mon Sep 17 00:00:00 2001 From: Matěj Cepl Date: Mon, 24 Oct 2011 15:45:51 +0200 Subject: Syntaically correct, but not working. Huge refactoring of data strctures is probably needed. --- XDiff.py | 105 ++++++++------- XHash.py | 310 ------------------------------------------ XParser.py | 423 ++++++++++++++++++++++++++++------------------------------ XTree.py | 138 +++++++++++-------- diff.xml | 12 ++ new.xml | 7 + old.xml | 7 + test_XDiff.py | 9 ++ 8 files changed, 371 insertions(+), 640 deletions(-) delete mode 100644 XHash.py create mode 100644 diff.xml create mode 100644 new.xml create mode 100644 old.xml create mode 100644 test_XDiff.py diff --git a/XDiff.py b/XDiff.py index ad0e93c..368ef51 100644 --- a/XDiff.py +++ b/XDiff.py @@ -55,6 +55,7 @@ Options: import sys, time, codecs import XTree, XLut from XParser import XParser +import random # XDiff computes the difference of two input XML documents. @@ -66,6 +67,8 @@ _TEXT_SIZE = 1024 class XDiff: _oFlag = False + _gFlag = False + _needNewLine = False _NO_MATCH_THRESHOLD = 0.3 _sampleCount = 3 _DEBUG = False @@ -343,7 +346,7 @@ class XDiff: i = start while (i < elementCount1) and (muc1 < ucount1): - if (not matched1[i] and startTag.equals(self._xtree1.getTag(elements1[i]))): + if (not matched1[i] and (startTag == self._xtree1.getTag(elements1[i]))): matched1[i] = True muc1 += 1 unmatched1[uele1] = elements1[i] @@ -352,7 +355,7 @@ class XDiff: i = 0 while (i < elementCount2) and (muc2 < ucount2): - if (not matched2[i] and startTag.equals(self._xtree2.getTag(elements2[i]))): + if (not matched2[i] and (startTag == self._xtree2.getTag(elements2[i]))): matched2[i] = True muc2 += 1 unmatched2[uele2] = elements2[i] @@ -628,9 +631,9 @@ class XDiff: dist = self._xlut.get(nodes2[j], nodes1[i]) else: if treeOrder: - dist = distance(nodes1[i], nodes2[j], True, XTree.NO_CONNECTION) + dist = self.distance(nodes1[i], nodes2[j], True, XTree.NO_CONNECTION) else: - dist = distance(nodes2[j], nodes1[i], True, XTree.NO_CONNECTION) + dist = self.distance(nodes2[j], nodes1[i], True, XTree.NO_CONNECTION) # the default mode. if (not self._oFlag and (dist > 1) and (dist >= self._NO_MATCH_THRESHOLD * (deleteCost + distance[count1][j]))): dist = XTree.NO_CONNECTION @@ -714,13 +717,13 @@ class XDiff: matching2[j] = i break else: - r = Random(time.time()) # FIXME + r = random.Random(time.time()) scount1 = 0 scount2 = 0 matchingThreshold = 0 i = 0 while (i < self._sampleCount) and (scount2 < count2): - snode = r.nextInt(count2 - scount2) + scount2 + snode = r.randint(0, count2 - scount2) + scount2 dist = XTree.NO_CONNECTION bestmatch = XTree.NO_MATCH for j in range(scount1,count1): @@ -859,7 +862,6 @@ class XDiff: # @param threshold No need to return a distance higher # than this threshold # @return the distance - def _xdiff(self, pid1, pid2, threshold): dist = 0 @@ -1022,7 +1024,7 @@ class XDiff: i = start while (i < elementCount1) and (muc1 < ucount1): - if (not matched1[i] and startTag.equals(self._xtree1.getTag(elements1[i]))): + if (not matched1[i] and (startTag == self._xtree1.getTag(elements1[i]))): matched1[i] = True muc1 += 1 unmatched1[uele1] = elements1[i] @@ -1031,7 +1033,7 @@ class XDiff: i = 0 while (i < elementCount2) and (muc2 < ucount2): - if (not matched2[i] and startTag.equals(self._xtree2.getTag(elements2[i]))): + if (not matched2[i] and (startTag == self._xtree2.getTag(elements2[i]))): matched2[i] = True muc2 += 1 unmatched2[uele2] = elements2[i] @@ -1189,13 +1191,12 @@ class XDiff: deleteCost = self._xtree2.getDecendentsCount(nodes1[i]) + 1 for j in range(count2): if treeOrder: - dist = distance(nodes1[i], nodes2[j], True, XTree.NO_CONNECTION) + dist = self.distance(nodes1[i], nodes2[j], True, XTree.NO_CONNECTION) else: - dist = distance(nodes2[j], nodes1[i], True, XTree.NO_CONNECTION) + dist = self.distance(nodes2[j], nodes1[i], True, XTree.NO_CONNECTION) # the default mode. if (not self._oFlag and (dist > 1) and (dist < XTree.NO_CONNECTION) and \ - (dist >= self._NO_MATCH_THRESHOLD * \ - (deleteCost + distance[count1][j]))): + (dist >= self._NO_MATCH_THRESHOLD * (deleteCost + distance[count1][j]))): dist = XTree.NO_CONNECTION if (dist < XTree.NO_CONNECTION): @@ -1204,6 +1205,7 @@ class XDiff: else: self._xlut.add(nodes2[j], nodes1[i], dist) distance[i][j] = dist + # delete cost. distance[i][count2] = deleteCost @@ -1230,21 +1232,21 @@ class XDiff: matching2[i] = XTree.NO_MATCH distance = 0 - r = Random(time.time()) + r = random.Random(time.time()) scount1 = 0 scount2 = 0 matchingThreshold = 0 i = 0 while (i < self._sampleCount) and (scount2 < count2): - snode = r.nextInt(count2 - scount2) + scount2 + snode = r.randint(0, count2 - scount2) + scount2 dist = XTree.NO_CONNECTION bestmatch = XTree.NO_MATCH for j in range(scount1,count1): if treeOrder: - d = distance(nodes1[j], nodes2[snode], False, threshold - distance) + d = self.distance(nodes1[j], nodes2[snode], False, threshold - distance) else: - d = distance(nodes2[snode], nodes1[j], False, threshold - distance) + d = self.distance(nodes2[snode], nodes1[j], False, threshold - distance) if (d < dist): dist = d bestmatch = j @@ -1468,14 +1470,14 @@ class XDiff: if (clen > 0): # Modify matching. i = 0 - next = 0 + next_circuit = 0 while (i < clen - 1): - n1 = self._circuit[next] - next = self._circuit[next+1] + n1 = self._circuit[next_circuit] + next_circuit = self._circuit[next_circuit+1] # Node in node list 1. if ((n1 > 0) and (n1 <= count1)): nid1 = n1 - 1 - nid2 = self._circuit[next] - count1 - 1 + nid2 = self._circuit[next_circuit] - count1 - 1 if (nid2 == count2): nid2 = XTree.DELETE @@ -1597,7 +1599,7 @@ class XDiff: # Found! if ((i == j) and (less < 0)): - clen = 0; # the length of the circuit. + clen = 0 # the length of the circuit. # Locate the circuit. #circuit.addElement( Integer(i)) @@ -1621,11 +1623,11 @@ class XDiff: n = 0 while (cit < clen - 1): left = self._circuit[n] - next = self._circuit[n + 1] - if next == -1: + next_circ = self._circuit[n + 1] + if next_circ == -1: right = -1 else: - right = self._circuit[next] + right = self._circuit[next_circ] #int middle = pathMatrix[circuit[n-1]][circuit[n]] middle = self._pathMatrix[left][right] @@ -1633,13 +1635,13 @@ class XDiff: if (middle != left): #circuit.insert( cit, middle ) self._circuit[clen * 2] = middle - self._circuit[clen * 2 + 1] = next + self._circuit[clen * 2 + 1] = next_circ self._circuit[n + 1] = clen * 2 clen += 1 finish = False break - n = next + n = next_circ cit += 1 return clen @@ -1669,10 +1671,10 @@ class XDiff: # @param input the first/old xml document # @param output output file name # FIXME this is probably completely wrong ... IO is Java-specific!!! - def writeDiff(self, input, output): + def writeDiff(self, inp, output): try: out = codecs.open(output, self._encoding) - br = open(input) + br = open(inp) root1 = self._xtree1.getRoot() root2 = self._xtree2.getRoot() @@ -1696,8 +1698,7 @@ class XDiff: out.close() except IOError as (errno, strerror): - print >>sys.stderr, strerror - + print >>sys.stderr, "Exception: err no. %d\n%s" % (errno, strerror) # Write an element that has been deleted from the old document. # @param out output file writer @@ -1926,39 +1927,37 @@ class XDiff: if (cdatalist == None): return text - buf = StringBuffer() + buf = "" count = cdatalist.size() lastEnd = 0 for i in range(0,count,2): - cdataStart = int(self.cdatalist[i]) - cdataEnd = int(self.cdatalist[i+1]) + cdataStart = int(cdatalist[i]) + cdataEnd = int(cdatalist[i+1]) if (cdataStart > lastEnd): - buf.append(text.substring(lastEnd, cdataStart)) - buf.append("") + buf += text[lastEnd:cdataStart] + buf += "" lastEnd = cdataEnd - if (lastEnd < text.length()): - buf.append(text.substring(lastEnd)) + if (lastEnd < len(text)): + buf += text[lastEnd:] - return buf.toString() + return str(buf) -def readParameters(args, parameters): +def readParameters(args, params): opid = 0 - if (args.length < 3): + if (len(args) < 3): return False # we are not in the object, so how can we get to these values? # FIXME global module variables? - elif (args[0].equals("-o")): + elif (args[0] == "-o"): _oFlag = True opid += 1 - elif (args[0].equals("-g")): + elif (args[0] == "-g"): _gFlag = True opid += 1 - if (args[opid].equals("-p")): + if (args[opid] == "-p"): opid += 1 p = 0 # try: @@ -1972,18 +1971,18 @@ def readParameters(args, parameters): return False XDiff._NO_MATCH_THRESHOLD = p - if (args[opid].equals("-e")): + if (args[opid] == "-e"): opid += 1 _encoding = args[opid] opid += 1 - if ((args.length - opid) != 3): + if ((len(args) - opid) != 3): return False - parameters.add(args[opid]) + params.append(args[opid]) opid += 1 - parameters.add(args[opid]) + params.append(args[opid]) opid += 1 - parameters.add(args[opid]) + params.append(args[opid]) return True @@ -1991,6 +1990,6 @@ if __name__ == "__main__": parameters = [] if (not readParameters(sys.argv, parameters)): print >>sys.stderr, __doc__ - return + sys.exit(1) - mydiff = XDiff(parameters[0], parameters[1], parameters[2]) \ No newline at end of file + mydiff = XDiff(parameters[0], parameters[1], parameters[2]) diff --git a/XHash.py b/XHash.py deleted file mode 100644 index f0c1554..0000000 --- a/XHash.py +++ /dev/null @@ -1,310 +0,0 @@ -# Copyright (c) 2001 - 2005 -# Yuan Wang. All rights reserved. - -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. Redistributions in any form must be accompanied by information on -# how to obtain complete source code for the X-Diff software and any -# accompanying software that uses the X-Diff software. The source code -# must either be included in the distribution or be available for no -# more than the cost of distribution plus a nominal fee, and must be -# freely redistributable under reasonable conditions. For an executable -# file, complete source code means the source code for all modules it -# contains. It does not include source code for modules or files that -# typically accompany the major components of the operating system on -# which the executable file runs. - -# THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED -# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, -# ARE DISCLAIMED. IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT, -# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. - - -# XHash is an implementaion of DES -class XHash: - private static final int _initialPermutation[] = - 57, 49, 41, 33 , 25, 17, 9, 1, 59, 51, 43, 35, 27, 19, 11, 3, - 61, 53, 45, 37, 29, 21, 13, 5, 63, 55, 47, 39, 31, 23, 15, 7, - 56, 48, 40, 32 ,24, 16, 8, 0, 58, 50, 42, 34, 26, 18, 10, 2, - 60, 52, 44, 36, 28, 20, 12, 4, 62, 54, 46, 38, 30, 22, 14, 6 - }; - - private static final int _finalPermutation[] = - 39, 7, 47, 15, 55, 23, 63, 31, 38, 6, 46, 14, 54, 22, 62, 30, - 37, 5, 45, 13, 53, 21, 61, 29, 36, 4, 44, 12, 52, 20, 60, 28, - 35, 3, 43, 11, 51, 19, 59, 27, 34, 2, 42, 10, 50, 18, 58, 26, - 33, 1, 41, 9, 49, 17, 57, 25, 32, 0, 40, 8, 48, 16, 56, 24 - }; - - private static final int _keyReducePermutation[] = - 60, 52, 44, 36, 59, 51, 43, 35, 27, 19, 11, 3, 58, 50, - 42, 34, 26, 18, 10, 2, 57, 49, 41, 33, 25, 17, 9, 1, - 28, 20, 12, 4, 61, 53, 45, 37, 29, 21, 13, 5, 62, 54, - 46, 38, 30, 22, 14, 6, 63, 55, 47, 39, 31, 23, 15, 7 - }; - - private static final int _keyCompressPermutation[] = - 24, 27, 20, 6, 14, 10, 3, 22, 0, 17, 7, 12, - 8, 23, 11, 5, 16, 26, 1, 9, 19, 25, 4, 15, - 54, 43 ,36, 29, 49, 40, 48, 30, 52, 44, 37, 33, - 46, 35, 50, 41, 28, 53, 51, 55, 32, 45, 39, 42 - }; - - private static final int _keyRot[] = - 1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28 - }; - - private static final int _sBoxP[][] = - 0x00808200, 0x00000000, 0x00008000, 0x00808202, - 0x00808002, 0x00008202, 0x00000002, 0x00008000, - 0x00000200, 0x00808200, 0x00808202, 0x00000200, - 0x00800202, 0x00808002, 0x00800000, 0x00000002, - 0x00000202, 0x00800200, 0x00800200, 0x00008200, - 0x00008200, 0x00808000, 0x00808000, 0x00800202, - 0x00008002, 0x00800002, 0x00800002, 0x00008002, - 0x00000000, 0x00000202, 0x00008202, 0x00800000, - 0x00008000, 0x00808202, 0x00000002, 0x00808000, - 0x00808200, 0x00800000, 0x00800000, 0x00000200, - 0x00808002, 0x00008000, 0x00008200, 0x00800002, - 0x00000200, 0x00000002, 0x00800202, 0x00008202, - 0x00808202, 0x00008002, 0x00808000, 0x00800202, - 0x00800002, 0x00000202, 0x00008202, 0x00808200, - 0x00000202, 0x00800200, 0x00800200, 0x00000000, - 0x00008002, 0x00008200, 0x00000000, 0x00808002 - }, - 0x40084010, 0x40004000, 0x00004000, 0x00084010, - 0x00080000, 0x00000010, 0x40080010, 0x40004010, - 0x40000010, 0x40084010, 0x40084000, 0x40000000, - 0x40004000, 0x00080000, 0x00000010, 0x40080010, - 0x00084000, 0x00080010, 0x40004010, 0x00000000, - 0x40000000, 0x00004000, 0x00084010, 0x40080000, - 0x00080010, 0x40000010, 0x00000000, 0x00084000, - 0x00004010, 0x40084000, 0x40080000, 0x00004010, - 0x00000000, 0x00084010, 0x40080010, 0x00080000, - 0x40004010, 0x40080000, 0x40084000, 0x00004000, - 0x40080000, 0x40004000, 0x00000010, 0x40084010, - 0x00084010, 0x00000010, 0x00004000, 0x40000000, - 0x00004010, 0x40084000, 0x00080000, 0x40000010, - 0x00080010, 0x40004010, 0x40000010, 0x00080010, - 0x00084000, 0x00000000, 0x40004000, 0x00004010, - 0x40000000, 0x40080010, 0x40084010, 0x00084000 - }, - 0x00000104, 0x04010100, 0x00000000, 0x04010004, - 0x04000100, 0x00000000, 0x00010104, 0x04000100, - 0x00010004, 0x04000004, 0x04000004, 0x00010000, - 0x04010104, 0x00010004, 0x04010000, 0x00000104, - 0x04000000, 0x00000004, 0x04010100, 0x00000100, - 0x00010100, 0x04010000, 0x04010004, 0x00010104, - 0x04000104, 0x00010100, 0x00010000, 0x04000104, - 0x00000004, 0x04010104, 0x00000100, 0x04000000, - 0x04010100, 0x04000000, 0x00010004, 0x00000104, - 0x00010000, 0x04010100, 0x04000100, 0x00000000, - 0x00000100, 0x00010004, 0x04010104, 0x04000100, - 0x04000004, 0x00000100, 0x00000000, 0x04010004, - 0x04000104, 0x00010000, 0x04000000, 0x04010104, - 0x00000004, 0x00010104, 0x00010100, 0x04000004, - 0x04010000, 0x04000104, 0x00000104, 0x04010000, - 0x00010104, 0x00000004, 0x04010004, 0x00010100 - }, - 0x80401000, 0x80001040, 0x80001040, 0x00000040, - 0x00401040, 0x80400040, 0x80400000, 0x80001000, - 0x00000000, 0x00401000, 0x00401000, 0x80401040, - 0x80000040, 0x00000000, 0x00400040, 0x80400000, - 0x80000000, 0x00001000, 0x00400000, 0x80401000, - 0x00000040, 0x00400000, 0x80001000, 0x00001040, - 0x80400040, 0x80000000, 0x00001040, 0x00400040, - 0x00001000, 0x00401040, 0x80401040, 0x80000040, - 0x00400040, 0x80400000, 0x00401000, 0x80401040, - 0x80000040, 0x00000000, 0x00000000, 0x00401000, - 0x00001040, 0x00400040, 0x80400040, 0x80000000, - 0x80401000, 0x80001040, 0x80001040, 0x00000040, - 0x80401040, 0x80000040, 0x80000000, 0x00001000, - 0x80400000, 0x80001000, 0x00401040, 0x80400040, - 0x80001000, 0x00001040, 0x00400000, 0x80401000, - 0x00000040, 0x00400000, 0x00001000, 0x00401040 - }, - 0x00000080, 0x01040080, 0x01040000, 0x21000080, - 0x00040000, 0x00000080, 0x20000000, 0x01040000, - 0x20040080, 0x00040000, 0x01000080, 0x20040080, - 0x21000080, 0x21040000, 0x00040080, 0x20000000, - 0x01000000, 0x20040000, 0x20040000, 0x00000000, - 0x20000080, 0x21040080, 0x21040080, 0x01000080, - 0x21040000, 0x20000080, 0x00000000, 0x21000000, - 0x01040080, 0x01000000, 0x21000000, 0x00040080, - 0x00040000, 0x21000080, 0x00000080, 0x01000000, - 0x20000000, 0x01040000, 0x21000080, 0x20040080, - 0x01000080, 0x20000000, 0x21040000, 0x01040080, - 0x20040080, 0x00000080, 0x01000000, 0x21040000, - 0x21040080, 0x00040080, 0x21000000, 0x21040080, - 0x01040000, 0x00000000, 0x20040000, 0x21000000, - 0x00040080, 0x01000080, 0x20000080, 0x00040000, - 0x00000000, 0x20040000, 0x01040080, 0x20000080 - }, - 0x10000008, 0x10200000, 0x00002000, 0x10202008, - 0x10200000, 0x00000008, 0x10202008, 0x00200000, - 0x10002000, 0x00202008, 0x00200000, 0x10000008, - 0x00200008, 0x10002000, 0x10000000, 0x00002008, - 0x00000000, 0x00200008, 0x10002008, 0x00002000, - 0x00202000, 0x10002008, 0x00000008, 0x10200008, - 0x10200008, 0x00000000, 0x00202008, 0x10202000, - 0x00002008, 0x00202000, 0x10202000, 0x10000000, - 0x10002000, 0x00000008, 0x10200008, 0x00202000, - 0x10202008, 0x00200000, 0x00002008, 0x10000008, - 0x00200000, 0x10002000, 0x10000000, 0x00002008, - 0x10000008, 0x10202008, 0x00202000, 0x10200000, - 0x00202008, 0x10202000, 0x00000000, 0x10200008, - 0x00000008, 0x00002000, 0x10200000, 0x00202008, - 0x00002000, 0x00200008, 0x10002008, 0x00000000, - 0x10202000, 0x10000000, 0x00200008, 0x10002008 - }, - 0x00100000, 0x02100001, 0x02000401, 0x00000000, - 0x00000400, 0x02000401, 0x00100401, 0x02100400, - 0x02100401, 0x00100000, 0x00000000, 0x02000001, - 0x00000001, 0x02000000, 0x02100001, 0x00000401, - 0x02000400, 0x00100401, 0x00100001, 0x02000400, - 0x02000001, 0x02100000, 0x02100400, 0x00100001, - 0x02100000, 0x00000400, 0x00000401, 0x02100401, - 0x00100400, 0x00000001, 0x02000000, 0x00100400, - 0x02000000, 0x00100400, 0x00100000, 0x02000401, - 0x02000401, 0x02100001, 0x02100001, 0x00000001, - 0x00100001, 0x02000000, 0x02000400, 0x00100000, - 0x02100400, 0x00000401, 0x00100401, 0x02100400, - 0x00000401, 0x02000001, 0x02100401, 0x02100000, - 0x00100400, 0x00000000, 0x00000001, 0x02100401, - 0x00000000, 0x00100401, 0x02100000, 0x00000400, - 0x02000001, 0x02000400, 0x00000400, 0x00100001 - }, - 0x08000820, 0x00000800, 0x00020000, 0x08020820, - 0x08000000, 0x08000820, 0x00000020, 0x08000000, - 0x00020020, 0x08020000, 0x08020820, 0x00020800, - 0x08020800, 0x00020820, 0x00000800, 0x00000020, - 0x08020000, 0x08000020, 0x08000800, 0x00000820, - 0x00020800, 0x00020020, 0x08020020, 0x08020800, - 0x00000820, 0x00000000, 0x00000000, 0x08020020, - 0x08000020, 0x08000800, 0x00020820, 0x00020000, - 0x00020820, 0x00020000, 0x08020800, 0x00000800, - 0x00000020, 0x08020020, 0x00000800, 0x00020820, - 0x08000800, 0x00000020, 0x08000020, 0x08020000, - 0x08020020, 0x08000000, 0x00020000, 0x08000820, - 0x00000000, 0x08020820, 0x00020020, 0x08000020, - 0x08020000, 0x08000800, 0x08000820, 0x00000000, - 0x08020820, 0x00020800, 0x00020800, 0x00000820, - 0x00000820, 0x00020020, 0x08000000, 0x08020800 - }; - - private static long _keys[]; - private static char _word[]; - - private static long _initialKey = 1007360890380L; - - /** -# Initialization #1. - public static void initialize() - makeKeys(_initialKey); - _word = new char[64]; - - /** -# Initialization #2. - public static void initialize(long key) - makeKeys(key); - _word = new char[64]; - - public static long hash(String word) - int len = word.length(); - long value = 0L; - for (int start = 0; start < len; start += 64) - if (len - start > 64) - value += (_hash(word.substring(start, start + 64)) ^ 0xffffffffL); - else - value += (_hash(word.substring(start)) ^ 0xffffffffL); - break; - return value; - - /** -# The actual hash function. - private static long _hash(String word) - int len = word.length(); - for (int i = 0; i < len; i++) - _word[i] = word.charAt(i); - int round = len / 8; - int rest = len % 8; - if (rest > 0) - for (int i = 0; i < 8 - rest; i++) - _word[len+i] = 0; - round++; - - int value = 0; - for (int i = 0, pos = 0; i < round; i++, pos += 8) - long todo = 0L + (byte)_word[pos]; - for (int j = 1; j < 8; j++) - todo = (todo << 8) + (byte)_word[pos+j]; - value += des(todo); - - return value; - - private static void makeKeys(long key) - long reduced = permutate(key, _keyReducePermutation); - int l = (int)(reduced >> 28); - int r = (int)(reduced & 0xfffffff); - _keys = new long[16]; - for (int i = 0; i < 16; i++) - _keys[i] = permutate(rotate(l, r, _keyRot[i]), - _keyCompressPermutation); - - private static long des(long w) - long x = permutate(w, _initialPermutation); - int l = (int)(x >>> 32); - int r = (int)x; - for (int i = 0; i < 16; i++) - int tmp = desFunc(r, _keys[i]) ^ l; - l = r; - r = tmp; - long y = ((long)r << 32) | ((long)l & 0xffffffffL); - return permutate(y, _finalPermutation); - - private static long permutate(long k, int p[]) - long s = 0; - for (int i = 0; i < p.length; i++) - if ((k & (1L << p[i])) != 0) - s |= 1L << i; - - return s; - - private static long rotate(int l, int r, int s) - return ((long)(((l<>>(28 - s))) << 28) | - ((r<> (28 - s)); - - private static int desFunc(int x, long k) - int p = x >>> 27; - int q = (p & 3) << 4; - int r = x << 5; - p |= r; - r = _sBoxP[0][(int)((k >> 42) ^ p) & 0x3f]; - p >>>= 4; - r |= _sBoxP[7][(int)((k >> 0) ^ p) & 0x3f]; - p >>>= 4; - r |= _sBoxP[6][(int)((k >> 6) ^ p) & 0x3f]; - p >>>= 4; - r |= _sBoxP[5][(int)((k >> 12) ^ p) & 0x3f]; - p >>>= 4; - r |= _sBoxP[4][(int)((k >> 18) ^ p) & 0x3f]; - p >>>= 4; - r |= _sBoxP[3][(int)((k >> 24) ^ p) & 0x3f]; - p >>>= 4; - r |= _sBoxP[2][(int)((k >> 30) ^ p) & 0x3f]; - p >>>= 4; - r |= _sBoxP[1][(int)((k >> 36) ^ (p | q)) & 0x3f]; - return r; diff --git a/XParser.py b/XParser.py index dcfedc4..018628e 100644 --- a/XParser.py +++ b/XParser.py @@ -26,16 +26,41 @@ # ARE DISCLAIMED. IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT, # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# SERVICES LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING # IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. +import XTree +import sys, hashlib import xml.sax -_PARSER_NAME = "org.apache.xerces.parsers.SAXParser"; +_STACK_SIZE = 100 + +class ErrorHandler: + """Basic interface for SAX error handlers. If you create an object + that implements this interface, then register the object with your + Parser, the parser will call the methods in your object to report + all warnings and errors. There are three levels of errors + available: warnings, (possibly) recoverable errors, and + unrecoverable errors. All methods take a SAXParseException as the + only parameter.""" + + + + def error(self, exception): + "Handle a recoverable error." + sys.stderr.write ("Error: %s\n" % exception) + + def fatalError(self, exception): + "Handle a non-recoverable error." + sys.stderr.write ("Fatal error: %s\n" % exception) + raise xml.sax.SAXParseException + + def warning(self, exception): + "Handle a warning." + sys.stderr.write ("Warning: %s\n" % exception) -# FIXME # This is interesting # http://www.virtuousprogrammer.com/?page_id=183 # http://docs.python.org/library/xml.sax.reader.html @@ -43,227 +68,185 @@ _PARSER_NAME = "org.apache.xerces.parsers.SAXParser"; # XTree # class XParser extends DefaultHandler implements LexicalHandler class XParser(xml.sax.handler.ContentHandler): - _setValidation = False - _setNameSpaces = True - _setSchemaSupport = True - _setSchemaFullSupport = False - _setNameSpacePrefixes = True - - _STACK_SIZE = 100 - - private XMLReader _parser; - private XTree _xtree; - private int _idStack[], _lsidStack[]; // id and left sibling - private long _valueStack[]; - private int _stackTop, _currentNodeID; - private boolean _readElement; - private StringBuffer _elementBuffer; +# private XMLReader self._parser +# private XTree self._xtree +# private int self._idStack[], self._lsidStack[] # id and left sibling +# private long self._valueStack[] +# private int self._stackTop, self._currentNodeID +# private boolean self._readElement +# private StringBuffer self._elementBuffer # Constructor. def __init__(self): - { - XHash.initialize(); - try - { - _parser = (XMLReader)Class.forName(_PARSER_NAME).newInstance(); - _parser.setFeature("http://xml.org/sax/features/validation", _setValidation); - _parser.setFeature("http://xml.org/sax/features/namespaces", _setNameSpaces); - _parser.setFeature("http://apache.org/xml/features/validation/schema", _setSchemaSupport); - _parser.setFeature("http://apache.org/xml/features/validation/schema-full-checking", _setSchemaFullSupport); - _parser.setFeature("http://xml.org/sax/features/namespace-prefixes", _setNameSpacePrefixes); - - _parser.setContentHandler(this); - _parser.setErrorHandler(this); - _parser.setProperty("http://xml.org/sax/properties/lexical-handler", this); - } - catch (Exception e) - { - System.err.println(e.getMessage()); - System.exit(1); - } - - _idStack = new int[_STACK_SIZE]; - _lsidStack = new int[_STACK_SIZE]; - _valueStack = new long[_STACK_SIZE]; - _stackTop = 0; - _currentNodeID = XTree.NULL_NODE; - _elementBuffer = new StringBuffer(); - } + xml.sax.handler.ContentHandler.__init__(self) + self._setValidation = False + self._setNameSpaces = True + self._setSchemaSupport = True + self._setSchemaFullSupport = False + self._setNameSpacePrefixes = True + self._readElement = False + self._xtree = None + +# try: + self._parser = xml.sax.make_parser() + self._parser.setFeature(xml.sax.handler.feature_validation, \ + self._setValidation) + self._parser.setFeature(xml.sax.handler.feature_namespaces, \ + self._setNameSpaces) + self._parser.setFeature(xml.sax.handler.feature_namespace_prefixes, \ + self._setNameSpacePrefixes) + #self._parser.setFeature("http://apache.org/xml/features/validation/schema", \ + # self._setSchemaSupport) + #self._parser.setFeature("http://apache.org/xml/features/validation/schema-full-checking", \ + # self._setSchemaFullSupport) + + self._parser.setContentHandler(self) +# self._parser.setErrorHandler(self) + self._parser.setProperty(xml.sax.handler.property_lexical_handler, self) +# except xml.sax.SAXParseException as (errno, strerror): # swallowing exception FIXME +# print >>sys.stderr, "Exception: err no. %d\n%s" % (errno, strerror) +# sys.exit(1) + + self._idStack = [] + self._lsidStack = [] + self._valueStack = [] + self._stackTop = 0 + self._currentNodeID = XTree.NULL_NODE + self._elementBuffer = "" # Parse an XML document # @param uri input XML document # @return the created XTree - def parse(String uri): - { - _xtree = new XTree(); - _idStack[_stackTop] = XTree.NULL_NODE; - _lsidStack[_stackTop] = XTree.NULL_NODE; - - try - { - _parser.parse(uri); - } - catch (Exception e) - { - System.err.println(e.getMessage()); - System.exit(1); - } - - return _xtree; - } - - // Document handler methods - - public void startElement(String uri, String local, String raw, - Attributes attrs) - { - // if text is mixed with elements - if (_elementBuffer.length() > 0) - { - String text = _elementBuffer.toString().trim(); - if (text.length() > 0) - { - long value = XHash.hash(text); - int tid = _xtree.addText(_idStack[_stackTop], _lsidStack[_stackTop], text, value); - _lsidStack[_stackTop] = tid; - _currentNodeID = tid; - _valueStack[_stackTop] += value; - } - } - - int eid = _xtree.addElement(_idStack[_stackTop], - _lsidStack[_stackTop], local); - - // Update last sibling info. - _lsidStack[_stackTop] = eid; - - // Push - _stackTop++; - _idStack[_stackTop] = eid; - _currentNodeID = eid; - _lsidStack[_stackTop] = XTree.NULL_NODE; - _valueStack[_stackTop] = XHash.hash(local); - - // Take care of attributes - if ((attrs != null) && (attrs.getLength() > 0)) - { - for (int i = 0; i < attrs.getLength(); i++) - { - String name = attrs.getQName(i); - String value = attrs.getValue(i); - long namehash = XHash.hash(name); - long valuehash = XHash.hash(value); - long attrhash = namehash * namehash + - valuehash * valuehash; - int aid = _xtree.addAttribute(eid, _lsidStack[_stackTop], name, value, namehash, attrhash); - - _lsidStack[_stackTop] = aid; - _currentNodeID = aid + 1; - _valueStack[_stackTop] += attrhash * attrhash; - } - } - - _readElement = True; - _elementBuffer = new StringBuffer(); - } - - def characters(char ch[], int start, int length): - { - _elementBuffer.append(ch, start, length); - } - - def endElement(String uri, String local, String raw): - { - if (_readElement) - { - if (_elementBuffer.length() > 0) - { - String text = _elementBuffer.toString(); - long value = XHash.hash(text); - _currentNodeID = - _xtree.addText(_idStack[_stackTop], - _lsidStack[_stackTop], - text, value); - _valueStack[_stackTop] += value; - } - else // an empty element - { - _currentNodeID = - _xtree.addText(_idStack[_stackTop], - _lsidStack[_stackTop], - "", 0); - } - _readElement = False; - } - else - { - if (_elementBuffer.length() > 0) - { - String text = _elementBuffer.toString().trim(); - // More text nodes before end of the element. - if (text.length() > 0) - { - long value = XHash.hash(text); - _currentNodeID = - _xtree.addText(_idStack[_stackTop], - _lsidStack[_stackTop], - text, value); - _valueStack[_stackTop] += value; - } - } - } - - _elementBuffer = new StringBuffer(); - _xtree.addHashValue(_idStack[_stackTop], - _valueStack[_stackTop]); - _valueStack[_stackTop-1] += _valueStack[_stackTop] * - _valueStack[_stackTop]; - _lsidStack[_stackTop-1] = _idStack[_stackTop]; - - // Pop - _stackTop--; - } - - // End of document handler methods - - // Lexical handler methods. - - def startCDATA(): - { - // The text node id should be the one next to the current - // node id. - int textid = _currentNodeID + 1; - String text = _elementBuffer.toString(); - _xtree.addCDATA(textid, text.length()); - } - - def endCDATA(): - { - int textid = _currentNodeID + 1; - String text = _elementBuffer.toString(); - _xtree.addCDATA(textid, text.length()); - } - - // Following functions are not implemented. - def comment(char[] ch, int start, int length): - { - } - - def startDTD(String name, String publicId, String systemId): - { - } - - def endDTD(): - { - } - - def startEntity(String name): - { - } - - def endEntity(String name): - { - } - - // End of lexical handler methods. -} + def parse(self, uri): + self._xtree = XTree.XTree() + self._idStack.append(XTree.NULL_NODE) + self._lsidStack.append(XTree.NULL_NODE) + +# try: + self._parser.parse(uri) +# except xml.sax.SAXParseException as (errno, strerror): +# print >>sys.stderr, "Exception: err no. %d\n%s" % (errno, strerror) +# sys.exit(1) + + return self._xtree + + # Document handler methods + + def startElement(self, local, attrs): + # if text is mixed with elements + if (len(self._elementBuffer) > 0): + text = str(self._elementBuffer).strip() + if (len(text) > 0): + # Original Java has long here, we have str. FIXME + value = hashlib.sha1(text).digest() + tid = self._xtree.addText(self._idStack[self._stackTop], self._lsidStack[self._stackTop], text, value) + self._lsidStack[self._stackTop] = tid + self._currentNodeID = tid + self._valueStack[self._stackTop] += value + + eid = self._xtree.addElement(self._idStack[self._stackTop], + self._lsidStack[self._stackTop], local) + + # Update last sibling info. + self._lsidStack[self._stackTop] = eid + + # Push + self._stackTop += 1 + self._idStack[self._stackTop] = eid + self._currentNodeID = eid + self._lsidStack[self._stackTop] = XTree.NULL_NODE + self._valueStack[self._stackTop] = hashlib.sha1(local).digest() + + # Take care of attributes + if ((attrs != None) and (attrs.getLength() > 0)): + for i in range(attrs.getLength()): + name = attrs.getQName(i) + value = attrs.getValue(i) + namehash = hashlib.sha1(name).digest() + valuehash = hashlib.sha1(value).digest() + attrhash = namehash * namehash + \ + valuehash * valuehash + aid = self._xtree.addAttribute(eid, self._lsidStack[self._stackTop], name, value, namehash, attrhash) + + self._lsidStack[self._stackTop] = aid + self._currentNodeID = aid + 1 + self._valueStack[self._stackTop] += attrhash * attrhash + + self._readElement = True + self._elementBuffer = "" + + def characters(self, ch): + self._elementBuffer += ch + + def endElement(self, name): + if (self._readElement): + if (len(self._elementBuffer) > 0): + text = str(self._elementBuffer) + value = hashlib.sha1(text).digest() + self._currentNodeID = \ + self._xtree.addText(self._idStack[self._stackTop], + self._lsidStack[self._stackTop], + text, value) + self._valueStack[self._stackTop] += value + else: # an empty element + self._currentNodeID = \ + self._xtree.addText(self._idStack[self._stackTop], \ + self._lsidStack[self._stackTop], \ + "", 0) + self._readElement = False + else: + if (len(self._elementBuffer) > 0): + text = str(self._elementBuffer).strip() + # More text nodes before end of the element. + if (len(text) > 0): + value = hashlib.sha1(text).digest() + self._currentNodeID = \ + self._xtree.addText(self._idStack[self._stackTop], \ + self._lsidStack[self._stackTop], \ + text, value) + self._valueStack[self._stackTop] += value + + self._elementBuffer = "" + self._xtree.addHashValue(self._idStack[self._stackTop], + self._valueStack[self._stackTop]) + self._valueStack[self._stackTop-1] += self._valueStack[self._stackTop] * \ + self._valueStack[self._stackTop] + self._lsidStack[self._stackTop-1] = self._idStack[self._stackTop] + + # Pop + self._stackTop -= 1 + + # End of document handler methods + + # Lexical handler methods. + + def startCDATA(self): + # The text node id should be the one next to the current + # node id. + textid = self._currentNodeID + 1 + text = str(self._elementBuffer) + self._xtree.addCDATA(textid, len(text)) + + def endCDATA(self): + textid = self._currentNodeID + 1 + text = str(self._elementBuffer) + self._xtree.addCDATA(textid, len(text)) + + # Following functions are not implemented. + def comment(self, ch): + pass + + def startDTD(self, name, publicId, systemId): + pass + + def endDTD(self): + pass + + def startEntity(self, name): + pass + + def endEntity(self, name): + pass + + # End of lexical handler methods. + diff --git a/XTree.py b/XTree.py index eef0af9..215dbc6 100644 --- a/XTree.py +++ b/XTree.py @@ -56,6 +56,29 @@ class XTree: # private long self._hashValue[][] # private String _value[][] # private Hashtable self._tagNames, _cdataTable + _root = 0 + _firstChild = [] + _nextSibling = [] + _isAttribute = [] + _valueIndex = [] + _matching = [] + _childrenCount = [] + _hashValue = [] + _value = [] + + _value.append([]) + _tagNames = [] + + # This hashtable is used to record CDATA section info. + # The key is the text node id, the value is the list of + # (start,end) position pair of each CDATA section. + _cdataTable = {} + + _elementIndex = -1 + _tagIndex = -1 + _valueCount = 0 + + def __init__(self, topcap=None, botcap=None): self._topCap = _TOP_LEVEL_CAPACITY @@ -69,44 +92,44 @@ class XTree: # Initialization. def _initialize(self): self._root = 0 - self._firstChild = [] - self._nextSibling = [] - self._isAttribute = [] - self._valueIndex = [] - self._matching = [] - self._childrenCount = [] - self._hashValue = [] - self._value = [] - - self._value[0] = [] - self._tagNames = [] + self._firstChild = [] + self._nextSibling = [] + self._isAttribute = [] + self._valueIndex = [] + self._matching = [] + self._childrenCount = [] + self._hashValue = [] + self._value = [] + + self._value.append([]) + self._tagNames = [] # This hashtable is used to record CDATA section info. # The key is the text node id, the value is the list of # (start,end) position pair of each CDATA section. - self._cdataTable = {} + self._cdataTable = {} - self._elementIndex = -1 - self._tagIndex = -1 - self._valueCount = self._botCap - 1 + self._elementIndex = -1 + self._tagIndex = -1 + self._valueCount = self._botCap - 1 # ID Expansion def _expand(self, topid): - self._firstChild[topid] = [] - self._nextSibling[topid] = [] - self._childrenCount[topid] = [] - self._matching[topid] = [] - self._valueIndex[topid] = [] - self._hashValue[topid] = [] - self._isAttribute[topid] = [] + self._firstChild[topid] = [] + self._nextSibling[topid] = [] + self._childrenCount[topid] = [] + self._matching[topid] = [] + self._valueIndex[topid] = [] + self._hashValue[topid] = [] + self._isAttribute[topid] = [] for i in range(self._botCap): - self._firstChild[topid][i] = NULL_NODE - self._nextSibling[topid][i] = NULL_NODE - self._childrenCount[topid][i]= 0 - self._matching[topid][i] = MATCH - self._valueIndex[topid][i] = -1 - self._isAttribute[topid][i] = False + self._firstChild[topid][i] = NULL_NODE + self._nextSibling[topid][i] = NULL_NODE + self._childrenCount[topid][i] = 0 + self._matching[topid][i] = MATCH + self._valueIndex[topid][i] = -1 + self._isAttribute[topid][i] = False # Start -- methods for constructing a tree. # Add a new element to the tree. @@ -142,7 +165,7 @@ class XTree: if (lsid == NULL_NODE): self._firstChild[ptopid][pbotid] = self._elementIndex else: - self._nextSibling[lsid/self._botCap][lsid%self._botCap] = self._elementIndex + self._nextSibling[lsid / self._botCap][lsid % self._botCap] = self._elementIndex # update children count self._childrenCount[ptopid][pbotid] += 1 @@ -166,7 +189,7 @@ class XTree: if (lsid == NULL_NODE): self._firstChild[etopid][ebotid] = self._elementIndex else: - self._nextSibling[lsid/self._botCap][lsid%self._botCap] = self._elementIndex + self._nextSibling[lsid / self._botCap][lsid % self._botCap] = self._elementIndex self._childrenCount[etopid][ebotid] += 1 self._hashValue[topid][botid] = value @@ -209,7 +232,7 @@ class XTree: # @param eid element id # @param value extra hash value def addHashValue(self, eid, value): - self._hashValue[eid/self._botCap][eid%self._botCap] = value + self._hashValue[eid / self._botCap][eid % self._botCap] = value # Add a CDATA section (either a start or an end) to the CDATA # hashtable, in which each entry should have an even number of @@ -233,11 +256,11 @@ class XTree: # @param match ?match and matched element id def addMatching(self, eid, match): if (match[0] == NO_MATCH): - self._matching[eid/self._botCap][eid%self._botCap] = NO_MATCH + self._matching[eid / self._botCap][eid % self._botCap] = NO_MATCH elif (match[0] == MATCH): - self._matching[eid/self._botCap][eid%self._botCap] = MATCH + self._matching[eid / self._botCap][eid % self._botCap] = MATCH else: - self._matching[eid/self._botCap][eid%self._botCap] = match[1] + 1 + self._matching[eid / self._botCap][eid % self._botCap] = match[1] + 1 # End -- methods for constructing a tree. @@ -247,7 +270,7 @@ class XTree: # @param eid element id # @param match ?change and matched element id def getMatching(self, eid, match): - mid = self._matching[eid/self._botCap][eid%self._botCap] + mid = self._matching[eid / self._botCap][eid % self._botCap] if (mid == NO_MATCH): match[0] = NO_MATCH elif (mid == MATCH): @@ -263,7 +286,7 @@ class XTree: # Get the first child of a node. # @param eid element id def getFirstChild(self, eid): - cid = self._firstChild[eid/self._botCap][eid%self._botCap] + cid = self._firstChild[eid / self._botCap][eid % self._botCap] while (cid > self._root): ctopid = cid / self._botCap cbotid = cid % self._botCap @@ -277,13 +300,13 @@ class XTree: # Get the next sibling of a node. # @param eid element id def getNextSibling(self, eid): - return self._nextSibling[eid/self._botCap][eid%self._botCap] + return self._nextSibling[eid / self._botCap][eid % self._botCap] # Get the first attribute of a node. # @param eid element id def getFirstAttribute(self, eid): - aid = self._firstChild[eid/self._botCap][eid%self._botCap] - if ((aid > self._root) and (self._isAttribute[aid/self._botCap][aid%self._botCap])): + aid = self._firstChild[eid / self._botCap][eid % self._botCap] + if ((aid > self._root) and (self._isAttribute[aid / self._botCap][aid % self._botCap])): return aid else: return NULL_NODE @@ -291,8 +314,8 @@ class XTree: # Get the next attribute of a node. # @param aid attribute id def getNextAttribute(self, aid): - aid1 = self._nextSibling[aid/self._botCap][aid%self._botCap] - if ((aid1 > self._root) and (self._isAttribute[aid1/self._botCap][aid1%self._botCap])): + aid1 = self._nextSibling[aid / self._botCap][aid % self._botCap] + if ((aid1 > self._root) and (self._isAttribute[aid1 / self._botCap][aid1 % self._botCap])): return aid1 else: return NULL_NODE @@ -300,17 +323,17 @@ class XTree: # Get the attribute value. # @param aid attribute id def getAttributeValue(self, aid): - cid = self._firstChild[aid/self._botCap][aid%self._botCap] - index = self._valueIndex[cid/self._botCap][cid%self._botCap] + cid = self._firstChild[aid / self._botCap][aid % self._botCap] + index = self._valueIndex[cid / self._botCap][cid % self._botCap] if (index > 0): - return self._value[index/self._botCap][index%self._botCap] + return self._value[index / self._botCap][index % self._botCap] else: return "" # Get the hash value of a node. # @param eid element id def getHashValue(self, eid): - return self._hashValue[eid/self._botCap][eid%self._botCap] + return self._hashValue[eid / self._botCap][eid % self._botCap] # Get the CDATA section position list of a text node. # @param eid element id @@ -321,7 +344,7 @@ class XTree: # Get the childern count of a node. # @param eid element id def getChildrenCount(self, eid): - return self._childrenCount[eid/self._botCap][eid%self._botCap] + return self._childrenCount[eid / self._botCap][eid % self._botCap] # Get the # of all decendents of a node. # @param eid element id @@ -335,39 +358,39 @@ class XTree: cid = self._firstChild[topid][botid] while (cid > NULL_NODE): count += self.getDecendentsCount(cid) - cid = self._nextSibling[cid/self._botCap][cid%self._botCap] + cid = self._nextSibling[cid / self._botCap][cid % self._botCap] return count # Get the value index of a node # @param eid element id def getValueIndex(self, eid): - return self._valueIndex[eid/self._botCap][eid%self._botCap] + return self._valueIndex[eid / self._botCap][eid % self._botCap] # Get the value of a leaf node # @param index value index def getValue(self, index): - return self._value[index/self._botCap][index%self._botCap] + return self._value[index / self._botCap][index % self._botCap] # Get the tag of an element node # @param eid element id def getTag(self, eid): - index = self._valueIndex[eid/self._botCap][eid%self._botCap] + index = self._valueIndex[eid / self._botCap][eid % self._botCap] return self._value[0][index] # Get the text value of a leaf node # @param eid element id def getText(self, eid): - index = self._valueIndex[eid/self._botCap][eid%self._botCap] + index = self._valueIndex[eid / self._botCap][eid % self._botCap] if (index >= self._botCap): - return self._value[index/self._botCap][index%self._botCap] + return self._value[index / self._botCap][index % self._botCap] else: return "" # Check if a node an element node. # @param eid element id def isElement(self, eid): - vindex = self._valueIndex[eid/self._botCap][eid%self._botCap] + vindex = self._valueIndex[eid / self._botCap][eid % self._botCap] if (vindex < self._botCap): return True else: @@ -376,12 +399,12 @@ class XTree: # Check if a node is an attribute node. # @param eid element id def isAttribute(self, eid): - return self._isAttribute[eid/self._botCap][eid%self._botCap] + return self._isAttribute[eid / self._botCap][eid % self._botCap] # Check if a node an leaf text node. # @param edi element id def isLeaf(self, eid): - index = self._valueIndex[eid/self._botCap][eid%self._botCap] + index = self._valueIndex[eid / self._botCap][eid % self._botCap] if (index < self._botCap): return False else: @@ -390,7 +413,7 @@ class XTree: # End -- methods for accessing a tree. # For testing purpose. - def dump(self, eid = None): + def dump(self, eid=None): if eid: topid = eid / self._botCap botid = eid % self._botCap @@ -407,7 +430,7 @@ class XTree: self._value[vtopid][vbotid] else: print "eid\tfirstC\tnextS\tattr?\tcCount\thash\tmatch\tvalue" - for i in range(self._root,self._elementIndex+1): + for i in range(self._root, self._elementIndex + 1): topid = i / self._botCap botid = i % self._botCap vid = self._valueIndex[topid][botid] @@ -421,3 +444,4 @@ class XTree: self._hashValue[topid][botid] + "\t" + \ self._matching[topid][botid] + "\t" + \ self._value[vtopid][vbotid] + diff --git a/diff.xml b/diff.xml new file mode 100644 index 0000000..af4ca81 --- /dev/null +++ b/diff.xml @@ -0,0 +1,12 @@ + +4 + +2 + +Maruška + +Janíček + + +3 + diff --git a/new.xml b/new.xml new file mode 100644 index 0000000..9abd148 --- /dev/null +++ b/new.xml @@ -0,0 +1,7 @@ + + 4 + 3 + + Janíček + + diff --git a/old.xml b/old.xml new file mode 100644 index 0000000..898d8c0 --- /dev/null +++ b/old.xml @@ -0,0 +1,7 @@ + + 1 + 2 + + Maruška + + diff --git a/test_XDiff.py b/test_XDiff.py new file mode 100644 index 0000000..8eb8072 --- /dev/null +++ b/test_XDiff.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import unittest +import XDiff + +class TestXDiff(unittest.TestCase): + def test_XDiff(self): + XDiff.XDiff('old.xml', 'new.xml', 'diff.xml') + self.assertTrue(True, "we have managed to get through XDiff.") \ No newline at end of file -- cgit