aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@redhat.com>2011-10-24 15:45:51 +0200
committerMatěj Cepl <mcepl@redhat.com>2011-10-24 23:26:32 +0200
commitaaab0e1e065a31c25a81d972d0b2e5221cbcba4d (patch)
treeaca6644e995fd1569a07b8c5a97aa3b9f28f8fbb
parent7f7b2e533dbdfaef16f41902e750588f58e2cdaf (diff)
downloadjson_diff-aaab0e1e065a31c25a81d972d0b2e5221cbcba4d.tar.gz
Syntaically correct, but not working.
Huge refactoring of data strctures is probably needed.
-rw-r--r--XDiff.py105
-rw-r--r--XHash.py310
-rw-r--r--XParser.py423
-rw-r--r--XTree.py138
-rw-r--r--diff.xml12
-rw-r--r--new.xml7
-rw-r--r--old.xml7
-rw-r--r--test_XDiff.py9
8 files changed, 371 insertions, 640 deletions
diff --git a/XDiff.py b/XDiff.py
index ad0e93c..368ef51 100644
--- a/XDiff.py
+++ b/XDiff.py
@@ -55,6 +55,7 @@ Options:
import sys, time, codecs
import XTree, XLut
from XParser import XParser
+import random
# <code>XDiff</code> computes the difference of two input XML documents.
@@ -66,6 +67,8 @@ _TEXT_SIZE = 1024
class XDiff:
_oFlag = False
+ _gFlag = False
+ _needNewLine = False
_NO_MATCH_THRESHOLD = 0.3
_sampleCount = 3
_DEBUG = False
@@ -343,7 +346,7 @@ class XDiff:
i = start
while (i < elementCount1) and (muc1 < ucount1):
- if (not matched1[i] and startTag.equals(self._xtree1.getTag(elements1[i]))):
+ if (not matched1[i] and (startTag == self._xtree1.getTag(elements1[i]))):
matched1[i] = True
muc1 += 1
unmatched1[uele1] = elements1[i]
@@ -352,7 +355,7 @@ class XDiff:
i = 0
while (i < elementCount2) and (muc2 < ucount2):
- if (not matched2[i] and startTag.equals(self._xtree2.getTag(elements2[i]))):
+ if (not matched2[i] and (startTag == self._xtree2.getTag(elements2[i]))):
matched2[i] = True
muc2 += 1
unmatched2[uele2] = elements2[i]
@@ -628,9 +631,9 @@ class XDiff:
dist = self._xlut.get(nodes2[j], nodes1[i])
else:
if treeOrder:
- dist = distance(nodes1[i], nodes2[j], True, XTree.NO_CONNECTION)
+ dist = self.distance(nodes1[i], nodes2[j], True, XTree.NO_CONNECTION)
else:
- dist = distance(nodes2[j], nodes1[i], True, XTree.NO_CONNECTION)
+ dist = self.distance(nodes2[j], nodes1[i], True, XTree.NO_CONNECTION)
# the default mode.
if (not self._oFlag and (dist > 1) and (dist >= self._NO_MATCH_THRESHOLD * (deleteCost + distance[count1][j]))):
dist = XTree.NO_CONNECTION
@@ -714,13 +717,13 @@ class XDiff:
matching2[j] = i
break
else:
- r = Random(time.time()) # FIXME
+ r = random.Random(time.time())
scount1 = 0
scount2 = 0
matchingThreshold = 0
i = 0
while (i < self._sampleCount) and (scount2 < count2):
- snode = r.nextInt(count2 - scount2) + scount2
+ snode = r.randint(0, count2 - scount2) + scount2
dist = XTree.NO_CONNECTION
bestmatch = XTree.NO_MATCH
for j in range(scount1,count1):
@@ -859,7 +862,6 @@ class XDiff:
# @param threshold No need to return a distance higher
# than this threshold
# @return the distance
-
def _xdiff(self, pid1, pid2, threshold):
dist = 0
@@ -1022,7 +1024,7 @@ class XDiff:
i = start
while (i < elementCount1) and (muc1 < ucount1):
- if (not matched1[i] and startTag.equals(self._xtree1.getTag(elements1[i]))):
+ if (not matched1[i] and (startTag == self._xtree1.getTag(elements1[i]))):
matched1[i] = True
muc1 += 1
unmatched1[uele1] = elements1[i]
@@ -1031,7 +1033,7 @@ class XDiff:
i = 0
while (i < elementCount2) and (muc2 < ucount2):
- if (not matched2[i] and startTag.equals(self._xtree2.getTag(elements2[i]))):
+ if (not matched2[i] and (startTag == self._xtree2.getTag(elements2[i]))):
matched2[i] = True
muc2 += 1
unmatched2[uele2] = elements2[i]
@@ -1189,13 +1191,12 @@ class XDiff:
deleteCost = self._xtree2.getDecendentsCount(nodes1[i]) + 1
for j in range(count2):
if treeOrder:
- dist = distance(nodes1[i], nodes2[j], True, XTree.NO_CONNECTION)
+ dist = self.distance(nodes1[i], nodes2[j], True, XTree.NO_CONNECTION)
else:
- dist = distance(nodes2[j], nodes1[i], True, XTree.NO_CONNECTION)
+ dist = self.distance(nodes2[j], nodes1[i], True, XTree.NO_CONNECTION)
# the default mode.
if (not self._oFlag and (dist > 1) and (dist < XTree.NO_CONNECTION) and \
- (dist >= self._NO_MATCH_THRESHOLD * \
- (deleteCost + distance[count1][j]))):
+ (dist >= self._NO_MATCH_THRESHOLD * (deleteCost + distance[count1][j]))):
dist = XTree.NO_CONNECTION
if (dist < XTree.NO_CONNECTION):
@@ -1204,6 +1205,7 @@ class XDiff:
else:
self._xlut.add(nodes2[j], nodes1[i], dist)
distance[i][j] = dist
+
# delete cost.
distance[i][count2] = deleteCost
@@ -1230,21 +1232,21 @@ class XDiff:
matching2[i] = XTree.NO_MATCH
distance = 0
- r = Random(time.time())
+ r = random.Random(time.time())
scount1 = 0
scount2 = 0
matchingThreshold = 0
i = 0
while (i < self._sampleCount) and (scount2 < count2):
- snode = r.nextInt(count2 - scount2) + scount2
+ snode = r.randint(0, count2 - scount2) + scount2
dist = XTree.NO_CONNECTION
bestmatch = XTree.NO_MATCH
for j in range(scount1,count1):
if treeOrder:
- d = distance(nodes1[j], nodes2[snode], False, threshold - distance)
+ d = self.distance(nodes1[j], nodes2[snode], False, threshold - distance)
else:
- d = distance(nodes2[snode], nodes1[j], False, threshold - distance)
+ d = self.distance(nodes2[snode], nodes1[j], False, threshold - distance)
if (d < dist):
dist = d
bestmatch = j
@@ -1468,14 +1470,14 @@ class XDiff:
if (clen > 0):
# Modify matching.
i = 0
- next = 0
+ next_circuit = 0
while (i < clen - 1):
- n1 = self._circuit[next]
- next = self._circuit[next+1]
+ n1 = self._circuit[next_circuit]
+ next_circuit = self._circuit[next_circuit+1]
# Node in node list 1.
if ((n1 > 0) and (n1 <= count1)):
nid1 = n1 - 1
- nid2 = self._circuit[next] - count1 - 1
+ nid2 = self._circuit[next_circuit] - count1 - 1
if (nid2 == count2):
nid2 = XTree.DELETE
@@ -1597,7 +1599,7 @@ class XDiff:
# Found!
if ((i == j) and (less < 0)):
- clen = 0; # the length of the circuit.
+ clen = 0 # the length of the circuit.
# Locate the circuit.
#circuit.addElement( Integer(i))
@@ -1621,11 +1623,11 @@ class XDiff:
n = 0
while (cit < clen - 1):
left = self._circuit[n]
- next = self._circuit[n + 1]
- if next == -1:
+ next_circ = self._circuit[n + 1]
+ if next_circ == -1:
right = -1
else:
- right = self._circuit[next]
+ right = self._circuit[next_circ]
#int middle = pathMatrix[circuit[n-1]][circuit[n]]
middle = self._pathMatrix[left][right]
@@ -1633,13 +1635,13 @@ class XDiff:
if (middle != left):
#circuit.insert( cit, middle )
self._circuit[clen * 2] = middle
- self._circuit[clen * 2 + 1] = next
+ self._circuit[clen * 2 + 1] = next_circ
self._circuit[n + 1] = clen * 2
clen += 1
finish = False
break
- n = next
+ n = next_circ
cit += 1
return clen
@@ -1669,10 +1671,10 @@ class XDiff:
# @param input the first/old xml document
# @param output output file name
# FIXME this is probably completely wrong ... IO is Java-specific!!!
- def writeDiff(self, input, output):
+ def writeDiff(self, inp, output):
try:
out = codecs.open(output, self._encoding)
- br = open(input)
+ br = open(inp)
root1 = self._xtree1.getRoot()
root2 = self._xtree2.getRoot()
@@ -1696,8 +1698,7 @@ class XDiff:
out.close()
except IOError as (errno, strerror):
- print >>sys.stderr, strerror
-
+ print >>sys.stderr, "Exception: err no. %d\n%s" % (errno, strerror)
# Write an element that has been deleted from the old document.
# @param out output file writer
@@ -1926,39 +1927,37 @@ class XDiff:
if (cdatalist == None):
return text
- buf = StringBuffer()
+ buf = ""
count = cdatalist.size()
lastEnd = 0
for i in range(0,count,2):
- cdataStart = int(self.cdatalist[i])
- cdataEnd = int(self.cdatalist[i+1])
+ cdataStart = int(cdatalist[i])
+ cdataEnd = int(cdatalist[i+1])
if (cdataStart > lastEnd):
- buf.append(text.substring(lastEnd, cdataStart))
- buf.append("<![CDATA[" +
- text.substring(cdataStart, cdataEnd) +
- "]]>")
+ buf += text[lastEnd:cdataStart]
+ buf += "<![CDATA[" + text[cdataStart:cdataEnd] + "]]>"
lastEnd = cdataEnd
- if (lastEnd < text.length()):
- buf.append(text.substring(lastEnd))
+ if (lastEnd < len(text)):
+ buf += text[lastEnd:]
- return buf.toString()
+ return str(buf)
-def readParameters(args, parameters):
+def readParameters(args, params):
opid = 0
- if (args.length < 3):
+ if (len(args) < 3):
return False
# we are not in the object, so how can we get to these values?
# FIXME global module variables?
- elif (args[0].equals("-o")):
+ elif (args[0] == "-o"):
_oFlag = True
opid += 1
- elif (args[0].equals("-g")):
+ elif (args[0] == "-g"):
_gFlag = True
opid += 1
- if (args[opid].equals("-p")):
+ if (args[opid] == "-p"):
opid += 1
p = 0
# try:
@@ -1972,18 +1971,18 @@ def readParameters(args, parameters):
return False
XDiff._NO_MATCH_THRESHOLD = p
- if (args[opid].equals("-e")):
+ if (args[opid] == "-e"):
opid += 1
_encoding = args[opid]
opid += 1
- if ((args.length - opid) != 3):
+ if ((len(args) - opid) != 3):
return False
- parameters.add(args[opid])
+ params.append(args[opid])
opid += 1
- parameters.add(args[opid])
+ params.append(args[opid])
opid += 1
- parameters.add(args[opid])
+ params.append(args[opid])
return True
@@ -1991,6 +1990,6 @@ if __name__ == "__main__":
parameters = []
if (not readParameters(sys.argv, parameters)):
print >>sys.stderr, __doc__
- return
+ sys.exit(1)
- mydiff = XDiff(parameters[0], parameters[1], parameters[2]) \ No newline at end of file
+ mydiff = XDiff(parameters[0], parameters[1], parameters[2])
diff --git a/XHash.py b/XHash.py
deleted file mode 100644
index f0c1554..0000000
--- a/XHash.py
+++ /dev/null
@@ -1,310 +0,0 @@
-# Copyright (c) 2001 - 2005
-# Yuan Wang. All rights reserved.
-
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Redistributions in any form must be accompanied by information on
-# how to obtain complete source code for the X-Diff software and any
-# accompanying software that uses the X-Diff software. The source code
-# must either be included in the distribution or be available for no
-# more than the cost of distribution plus a nominal fee, and must be
-# freely redistributable under reasonable conditions. For an executable
-# file, complete source code means the source code for all modules it
-# contains. It does not include source code for modules or files that
-# typically accompany the major components of the operating system on
-# which the executable file runs.
-
-# THIS SOFTWARE IS PROVIDED BY YUAN WANG "AS IS" AND ANY EXPRESS OR IMPLIED
-# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT,
-# ARE DISCLAIMED. IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
-# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
-# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-
-
-# <code>XHash</code> is an implementaion of DES
-class XHash:
- private static final int _initialPermutation[] =
- 57, 49, 41, 33 , 25, 17, 9, 1, 59, 51, 43, 35, 27, 19, 11, 3,
- 61, 53, 45, 37, 29, 21, 13, 5, 63, 55, 47, 39, 31, 23, 15, 7,
- 56, 48, 40, 32 ,24, 16, 8, 0, 58, 50, 42, 34, 26, 18, 10, 2,
- 60, 52, 44, 36, 28, 20, 12, 4, 62, 54, 46, 38, 30, 22, 14, 6
- };
-
- private static final int _finalPermutation[] =
- 39, 7, 47, 15, 55, 23, 63, 31, 38, 6, 46, 14, 54, 22, 62, 30,
- 37, 5, 45, 13, 53, 21, 61, 29, 36, 4, 44, 12, 52, 20, 60, 28,
- 35, 3, 43, 11, 51, 19, 59, 27, 34, 2, 42, 10, 50, 18, 58, 26,
- 33, 1, 41, 9, 49, 17, 57, 25, 32, 0, 40, 8, 48, 16, 56, 24
- };
-
- private static final int _keyReducePermutation[] =
- 60, 52, 44, 36, 59, 51, 43, 35, 27, 19, 11, 3, 58, 50,
- 42, 34, 26, 18, 10, 2, 57, 49, 41, 33, 25, 17, 9, 1,
- 28, 20, 12, 4, 61, 53, 45, 37, 29, 21, 13, 5, 62, 54,
- 46, 38, 30, 22, 14, 6, 63, 55, 47, 39, 31, 23, 15, 7
- };
-
- private static final int _keyCompressPermutation[] =
- 24, 27, 20, 6, 14, 10, 3, 22, 0, 17, 7, 12,
- 8, 23, 11, 5, 16, 26, 1, 9, 19, 25, 4, 15,
- 54, 43 ,36, 29, 49, 40, 48, 30, 52, 44, 37, 33,
- 46, 35, 50, 41, 28, 53, 51, 55, 32, 45, 39, 42
- };
-
- private static final int _keyRot[] =
- 1, 2, 4, 6, 8, 10, 12, 14, 15, 17, 19, 21, 23, 25, 27, 28
- };
-
- private static final int _sBoxP[][] =
- 0x00808200, 0x00000000, 0x00008000, 0x00808202,
- 0x00808002, 0x00008202, 0x00000002, 0x00008000,
- 0x00000200, 0x00808200, 0x00808202, 0x00000200,
- 0x00800202, 0x00808002, 0x00800000, 0x00000002,
- 0x00000202, 0x00800200, 0x00800200, 0x00008200,
- 0x00008200, 0x00808000, 0x00808000, 0x00800202,
- 0x00008002, 0x00800002, 0x00800002, 0x00008002,
- 0x00000000, 0x00000202, 0x00008202, 0x00800000,
- 0x00008000, 0x00808202, 0x00000002, 0x00808000,
- 0x00808200, 0x00800000, 0x00800000, 0x00000200,
- 0x00808002, 0x00008000, 0x00008200, 0x00800002,
- 0x00000200, 0x00000002, 0x00800202, 0x00008202,
- 0x00808202, 0x00008002, 0x00808000, 0x00800202,
- 0x00800002, 0x00000202, 0x00008202, 0x00808200,
- 0x00000202, 0x00800200, 0x00800200, 0x00000000,
- 0x00008002, 0x00008200, 0x00000000, 0x00808002
- },
- 0x40084010, 0x40004000, 0x00004000, 0x00084010,
- 0x00080000, 0x00000010, 0x40080010, 0x40004010,
- 0x40000010, 0x40084010, 0x40084000, 0x40000000,
- 0x40004000, 0x00080000, 0x00000010, 0x40080010,
- 0x00084000, 0x00080010, 0x40004010, 0x00000000,
- 0x40000000, 0x00004000, 0x00084010, 0x40080000,
- 0x00080010, 0x40000010, 0x00000000, 0x00084000,
- 0x00004010, 0x40084000, 0x40080000, 0x00004010,
- 0x00000000, 0x00084010, 0x40080010, 0x00080000,
- 0x40004010, 0x40080000, 0x40084000, 0x00004000,
- 0x40080000, 0x40004000, 0x00000010, 0x40084010,
- 0x00084010, 0x00000010, 0x00004000, 0x40000000,
- 0x00004010, 0x40084000, 0x00080000, 0x40000010,
- 0x00080010, 0x40004010, 0x40000010, 0x00080010,
- 0x00084000, 0x00000000, 0x40004000, 0x00004010,
- 0x40000000, 0x40080010, 0x40084010, 0x00084000
- },
- 0x00000104, 0x04010100, 0x00000000, 0x04010004,
- 0x04000100, 0x00000000, 0x00010104, 0x04000100,
- 0x00010004, 0x04000004, 0x04000004, 0x00010000,
- 0x04010104, 0x00010004, 0x04010000, 0x00000104,
- 0x04000000, 0x00000004, 0x04010100, 0x00000100,
- 0x00010100, 0x04010000, 0x04010004, 0x00010104,
- 0x04000104, 0x00010100, 0x00010000, 0x04000104,
- 0x00000004, 0x04010104, 0x00000100, 0x04000000,
- 0x04010100, 0x04000000, 0x00010004, 0x00000104,
- 0x00010000, 0x04010100, 0x04000100, 0x00000000,
- 0x00000100, 0x00010004, 0x04010104, 0x04000100,
- 0x04000004, 0x00000100, 0x00000000, 0x04010004,
- 0x04000104, 0x00010000, 0x04000000, 0x04010104,
- 0x00000004, 0x00010104, 0x00010100, 0x04000004,
- 0x04010000, 0x04000104, 0x00000104, 0x04010000,
- 0x00010104, 0x00000004, 0x04010004, 0x00010100
- },
- 0x80401000, 0x80001040, 0x80001040, 0x00000040,
- 0x00401040, 0x80400040, 0x80400000, 0x80001000,
- 0x00000000, 0x00401000, 0x00401000, 0x80401040,
- 0x80000040, 0x00000000, 0x00400040, 0x80400000,
- 0x80000000, 0x00001000, 0x00400000, 0x80401000,
- 0x00000040, 0x00400000, 0x80001000, 0x00001040,
- 0x80400040, 0x80000000, 0x00001040, 0x00400040,
- 0x00001000, 0x00401040, 0x80401040, 0x80000040,
- 0x00400040, 0x80400000, 0x00401000, 0x80401040,
- 0x80000040, 0x00000000, 0x00000000, 0x00401000,
- 0x00001040, 0x00400040, 0x80400040, 0x80000000,
- 0x80401000, 0x80001040, 0x80001040, 0x00000040,
- 0x80401040, 0x80000040, 0x80000000, 0x00001000,
- 0x80400000, 0x80001000, 0x00401040, 0x80400040,
- 0x80001000, 0x00001040, 0x00400000, 0x80401000,
- 0x00000040, 0x00400000, 0x00001000, 0x00401040
- },
- 0x00000080, 0x01040080, 0x01040000, 0x21000080,
- 0x00040000, 0x00000080, 0x20000000, 0x01040000,
- 0x20040080, 0x00040000, 0x01000080, 0x20040080,
- 0x21000080, 0x21040000, 0x00040080, 0x20000000,
- 0x01000000, 0x20040000, 0x20040000, 0x00000000,
- 0x20000080, 0x21040080, 0x21040080, 0x01000080,
- 0x21040000, 0x20000080, 0x00000000, 0x21000000,
- 0x01040080, 0x01000000, 0x21000000, 0x00040080,
- 0x00040000, 0x21000080, 0x00000080, 0x01000000,
- 0x20000000, 0x01040000, 0x21000080, 0x20040080,
- 0x01000080, 0x20000000, 0x21040000, 0x01040080,
- 0x20040080, 0x00000080, 0x01000000, 0x21040000,
- 0x21040080, 0x00040080, 0x21000000, 0x21040080,
- 0x01040000, 0x00000000, 0x20040000, 0x21000000,
- 0x00040080, 0x01000080, 0x20000080, 0x00040000,
- 0x00000000, 0x20040000, 0x01040080, 0x20000080
- },
- 0x10000008, 0x10200000, 0x00002000, 0x10202008,
- 0x10200000, 0x00000008, 0x10202008, 0x00200000,
- 0x10002000, 0x00202008, 0x00200000, 0x10000008,
- 0x00200008, 0x10002000, 0x10000000, 0x00002008,
- 0x00000000, 0x00200008, 0x10002008, 0x00002000,
- 0x00202000, 0x10002008, 0x00000008, 0x10200008,
- 0x10200008, 0x00000000, 0x00202008, 0x10202000,
- 0x00002008, 0x00202000, 0x10202000, 0x10000000,
- 0x10002000, 0x00000008, 0x10200008, 0x00202000,
- 0x10202008, 0x00200000, 0x00002008, 0x10000008,
- 0x00200000, 0x10002000, 0x10000000, 0x00002008,
- 0x10000008, 0x10202008, 0x00202000, 0x10200000,
- 0x00202008, 0x10202000, 0x00000000, 0x10200008,
- 0x00000008, 0x00002000, 0x10200000, 0x00202008,
- 0x00002000, 0x00200008, 0x10002008, 0x00000000,
- 0x10202000, 0x10000000, 0x00200008, 0x10002008
- },
- 0x00100000, 0x02100001, 0x02000401, 0x00000000,
- 0x00000400, 0x02000401, 0x00100401, 0x02100400,
- 0x02100401, 0x00100000, 0x00000000, 0x02000001,
- 0x00000001, 0x02000000, 0x02100001, 0x00000401,
- 0x02000400, 0x00100401, 0x00100001, 0x02000400,
- 0x02000001, 0x02100000, 0x02100400, 0x00100001,
- 0x02100000, 0x00000400, 0x00000401, 0x02100401,
- 0x00100400, 0x00000001, 0x02000000, 0x00100400,
- 0x02000000, 0x00100400, 0x00100000, 0x02000401,
- 0x02000401, 0x02100001, 0x02100001, 0x00000001,
- 0x00100001, 0x02000000, 0x02000400, 0x00100000,
- 0x02100400, 0x00000401, 0x00100401, 0x02100400,
- 0x00000401, 0x02000001, 0x02100401, 0x02100000,
- 0x00100400, 0x00000000, 0x00000001, 0x02100401,
- 0x00000000, 0x00100401, 0x02100000, 0x00000400,
- 0x02000001, 0x02000400, 0x00000400, 0x00100001
- },
- 0x08000820, 0x00000800, 0x00020000, 0x08020820,
- 0x08000000, 0x08000820, 0x00000020, 0x08000000,
- 0x00020020, 0x08020000, 0x08020820, 0x00020800,
- 0x08020800, 0x00020820, 0x00000800, 0x00000020,
- 0x08020000, 0x08000020, 0x08000800, 0x00000820,
- 0x00020800, 0x00020020, 0x08020020, 0x08020800,
- 0x00000820, 0x00000000, 0x00000000, 0x08020020,
- 0x08000020, 0x08000800, 0x00020820, 0x00020000,
- 0x00020820, 0x00020000, 0x08020800, 0x00000800,
- 0x00000020, 0x08020020, 0x00000800, 0x00020820,
- 0x08000800, 0x00000020, 0x08000020, 0x08020000,
- 0x08020020, 0x08000000, 0x00020000, 0x08000820,
- 0x00000000, 0x08020820, 0x00020020, 0x08000020,
- 0x08020000, 0x08000800, 0x08000820, 0x00000000,
- 0x08020820, 0x00020800, 0x00020800, 0x00000820,
- 0x00000820, 0x00020020, 0x08000000, 0x08020800
- };
-
- private static long _keys[];
- private static char _word[];
-
- private static long _initialKey = 1007360890380L;
-
- /**
-# Initialization #1.
- public static void initialize()
- makeKeys(_initialKey);
- _word = new char[64];
-
- /**
-# Initialization #2.
- public static void initialize(long key)
- makeKeys(key);
- _word = new char[64];
-
- public static long hash(String word)
- int len = word.length();
- long value = 0L;
- for (int start = 0; start < len; start += 64)
- if (len - start > 64)
- value += (_hash(word.substring(start, start + 64)) ^ 0xffffffffL);
- else
- value += (_hash(word.substring(start)) ^ 0xffffffffL);
- break;
- return value;
-
- /**
-# The actual hash function.
- private static long _hash(String word)
- int len = word.length();
- for (int i = 0; i < len; i++)
- _word[i] = word.charAt(i);
- int round = len / 8;
- int rest = len % 8;
- if (rest > 0)
- for (int i = 0; i < 8 - rest; i++)
- _word[len+i] = 0;
- round++;
-
- int value = 0;
- for (int i = 0, pos = 0; i < round; i++, pos += 8)
- long todo = 0L + (byte)_word[pos];
- for (int j = 1; j < 8; j++)
- todo = (todo << 8) + (byte)_word[pos+j];
- value += des(todo);
-
- return value;
-
- private static void makeKeys(long key)
- long reduced = permutate(key, _keyReducePermutation);
- int l = (int)(reduced >> 28);
- int r = (int)(reduced & 0xfffffff);
- _keys = new long[16];
- for (int i = 0; i < 16; i++)
- _keys[i] = permutate(rotate(l, r, _keyRot[i]),
- _keyCompressPermutation);
-
- private static long des(long w)
- long x = permutate(w, _initialPermutation);
- int l = (int)(x >>> 32);
- int r = (int)x;
- for (int i = 0; i < 16; i++)
- int tmp = desFunc(r, _keys[i]) ^ l;
- l = r;
- r = tmp;
- long y = ((long)r << 32) | ((long)l & 0xffffffffL);
- return permutate(y, _finalPermutation);
-
- private static long permutate(long k, int p[])
- long s = 0;
- for (int i = 0; i < p.length; i++)
- if ((k & (1L << p[i])) != 0)
- s |= 1L << i;
-
- return s;
-
- private static long rotate(int l, int r, int s)
- return ((long)(((l<<s) & 0xfffffff) | (l>>>(28 - s))) << 28) |
- ((r<<s) & 0xfffffff) | (r>> (28 - s));
-
- private static int desFunc(int x, long k)
- int p = x >>> 27;
- int q = (p & 3) << 4;
- int r = x << 5;
- p |= r;
- r = _sBoxP[0][(int)((k >> 42) ^ p) & 0x3f];
- p >>>= 4;
- r |= _sBoxP[7][(int)((k >> 0) ^ p) & 0x3f];
- p >>>= 4;
- r |= _sBoxP[6][(int)((k >> 6) ^ p) & 0x3f];
- p >>>= 4;
- r |= _sBoxP[5][(int)((k >> 12) ^ p) & 0x3f];
- p >>>= 4;
- r |= _sBoxP[4][(int)((k >> 18) ^ p) & 0x3f];
- p >>>= 4;
- r |= _sBoxP[3][(int)((k >> 24) ^ p) & 0x3f];
- p >>>= 4;
- r |= _sBoxP[2][(int)((k >> 30) ^ p) & 0x3f];
- p >>>= 4;
- r |= _sBoxP[1][(int)((k >> 36) ^ (p | q)) & 0x3f];
- return r;
diff --git a/XParser.py b/XParser.py
index dcfedc4..018628e 100644
--- a/XParser.py
+++ b/XParser.py
@@ -26,16 +26,41 @@
# ARE DISCLAIMED. IN NO EVENT SHALL YUAN WANG BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# SERVICES LOSS OF USE, DATA, OR PROFITS OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
+import XTree
+import sys, hashlib
import xml.sax
-_PARSER_NAME = "org.apache.xerces.parsers.SAXParser";
+_STACK_SIZE = 100
+
+class ErrorHandler:
+ """Basic interface for SAX error handlers. If you create an object
+ that implements this interface, then register the object with your
+ Parser, the parser will call the methods in your object to report
+ all warnings and errors. There are three levels of errors
+ available: warnings, (possibly) recoverable errors, and
+ unrecoverable errors. All methods take a SAXParseException as the
+ only parameter."""
+
+
+
+ def error(self, exception):
+ "Handle a recoverable error."
+ sys.stderr.write ("Error: %s\n" % exception)
+
+ def fatalError(self, exception):
+ "Handle a non-recoverable error."
+ sys.stderr.write ("Fatal error: %s\n" % exception)
+ raise xml.sax.SAXParseException
+
+ def warning(self, exception):
+ "Handle a warning."
+ sys.stderr.write ("Warning: %s\n" % exception)
-# FIXME
# This is interesting
# http://www.virtuousprogrammer.com/?page_id=183
# http://docs.python.org/library/xml.sax.reader.html
@@ -43,227 +68,185 @@ _PARSER_NAME = "org.apache.xerces.parsers.SAXParser";
# <code>XTree</code>
# class XParser extends DefaultHandler implements LexicalHandler
class XParser(xml.sax.handler.ContentHandler):
- _setValidation = False
- _setNameSpaces = True
- _setSchemaSupport = True
- _setSchemaFullSupport = False
- _setNameSpacePrefixes = True
-
- _STACK_SIZE = 100
-
- private XMLReader _parser;
- private XTree _xtree;
- private int _idStack[], _lsidStack[]; // id and left sibling
- private long _valueStack[];
- private int _stackTop, _currentNodeID;
- private boolean _readElement;
- private StringBuffer _elementBuffer;
+# private XMLReader self._parser
+# private XTree self._xtree
+# private int self._idStack[], self._lsidStack[] # id and left sibling
+# private long self._valueStack[]
+# private int self._stackTop, self._currentNodeID
+# private boolean self._readElement
+# private StringBuffer self._elementBuffer
# Constructor.
def __init__(self):
- {
- XHash.initialize();
- try
- {
- _parser = (XMLReader)Class.forName(_PARSER_NAME).newInstance();
- _parser.setFeature("http://xml.org/sax/features/validation", _setValidation);
- _parser.setFeature("http://xml.org/sax/features/namespaces", _setNameSpaces);
- _parser.setFeature("http://apache.org/xml/features/validation/schema", _setSchemaSupport);
- _parser.setFeature("http://apache.org/xml/features/validation/schema-full-checking", _setSchemaFullSupport);
- _parser.setFeature("http://xml.org/sax/features/namespace-prefixes", _setNameSpacePrefixes);
-
- _parser.setContentHandler(this);
- _parser.setErrorHandler(this);
- _parser.setProperty("http://xml.org/sax/properties/lexical-handler", this);
- }
- catch (Exception e)
- {
- System.err.println(e.getMessage());
- System.exit(1);
- }
-
- _idStack = new int[_STACK_SIZE];
- _lsidStack = new int[_STACK_SIZE];
- _valueStack = new long[_STACK_SIZE];
- _stackTop = 0;
- _currentNodeID = XTree.NULL_NODE;
- _elementBuffer = new StringBuffer();
- }
+ xml.sax.handler.ContentHandler.__init__(self)
+ self._setValidation = False
+ self._setNameSpaces = True
+ self._setSchemaSupport = True
+ self._setSchemaFullSupport = False
+ self._setNameSpacePrefixes = True
+ self._readElement = False
+ self._xtree = None
+
+# try:
+ self._parser = xml.sax.make_parser()
+ self._parser.setFeature(xml.sax.handler.feature_validation, \
+ self._setValidation)
+ self._parser.setFeature(xml.sax.handler.feature_namespaces, \
+ self._setNameSpaces)
+ self._parser.setFeature(xml.sax.handler.feature_namespace_prefixes, \
+ self._setNameSpacePrefixes)
+ #self._parser.setFeature("http://apache.org/xml/features/validation/schema", \
+ # self._setSchemaSupport)
+ #self._parser.setFeature("http://apache.org/xml/features/validation/schema-full-checking", \
+ # self._setSchemaFullSupport)
+
+ self._parser.setContentHandler(self)
+# self._parser.setErrorHandler(self)
+ self._parser.setProperty(xml.sax.handler.property_lexical_handler, self)
+# except xml.sax.SAXParseException as (errno, strerror): # swallowing exception FIXME
+# print >>sys.stderr, "Exception: err no. %d\n%s" % (errno, strerror)
+# sys.exit(1)
+
+ self._idStack = []
+ self._lsidStack = []
+ self._valueStack = []
+ self._stackTop = 0
+ self._currentNodeID = XTree.NULL_NODE
+ self._elementBuffer = ""
# Parse an XML document
# @param uri input XML document
# @return the created XTree
- def parse(String uri):
- {
- _xtree = new XTree();
- _idStack[_stackTop] = XTree.NULL_NODE;
- _lsidStack[_stackTop] = XTree.NULL_NODE;
-
- try
- {
- _parser.parse(uri);
- }
- catch (Exception e)
- {
- System.err.println(e.getMessage());
- System.exit(1);
- }
-
- return _xtree;
- }
-
- // Document handler methods
-
- public void startElement(String uri, String local, String raw,
- Attributes attrs)
- {
- // if text is mixed with elements
- if (_elementBuffer.length() > 0)
- {
- String text = _elementBuffer.toString().trim();
- if (text.length() > 0)
- {
- long value = XHash.hash(text);
- int tid = _xtree.addText(_idStack[_stackTop], _lsidStack[_stackTop], text, value);
- _lsidStack[_stackTop] = tid;
- _currentNodeID = tid;
- _valueStack[_stackTop] += value;
- }
- }
-
- int eid = _xtree.addElement(_idStack[_stackTop],
- _lsidStack[_stackTop], local);
-
- // Update last sibling info.
- _lsidStack[_stackTop] = eid;
-
- // Push
- _stackTop++;
- _idStack[_stackTop] = eid;
- _currentNodeID = eid;
- _lsidStack[_stackTop] = XTree.NULL_NODE;
- _valueStack[_stackTop] = XHash.hash(local);
-
- // Take care of attributes
- if ((attrs != null) && (attrs.getLength() > 0))
- {
- for (int i = 0; i < attrs.getLength(); i++)
- {
- String name = attrs.getQName(i);
- String value = attrs.getValue(i);
- long namehash = XHash.hash(name);
- long valuehash = XHash.hash(value);
- long attrhash = namehash * namehash +
- valuehash * valuehash;
- int aid = _xtree.addAttribute(eid, _lsidStack[_stackTop], name, value, namehash, attrhash);
-
- _lsidStack[_stackTop] = aid;
- _currentNodeID = aid + 1;
- _valueStack[_stackTop] += attrhash * attrhash;
- }
- }
-
- _readElement = True;
- _elementBuffer = new StringBuffer();
- }
-
- def characters(char ch[], int start, int length):
- {
- _elementBuffer.append(ch, start, length);
- }
-
- def endElement(String uri, String local, String raw):
- {
- if (_readElement)
- {
- if (_elementBuffer.length() > 0)
- {
- String text = _elementBuffer.toString();
- long value = XHash.hash(text);
- _currentNodeID =
- _xtree.addText(_idStack[_stackTop],
- _lsidStack[_stackTop],
- text, value);
- _valueStack[_stackTop] += value;
- }
- else // an empty element
- {
- _currentNodeID =
- _xtree.addText(_idStack[_stackTop],
- _lsidStack[_stackTop],
- "", 0);
- }
- _readElement = False;
- }
- else
- {
- if (_elementBuffer.length() > 0)
- {
- String text = _elementBuffer.toString().trim();
- // More text nodes before end of the element.
- if (text.length() > 0)
- {
- long value = XHash.hash(text);
- _currentNodeID =
- _xtree.addText(_idStack[_stackTop],
- _lsidStack[_stackTop],
- text, value);
- _valueStack[_stackTop] += value;
- }
- }
- }
-
- _elementBuffer = new StringBuffer();
- _xtree.addHashValue(_idStack[_stackTop],
- _valueStack[_stackTop]);
- _valueStack[_stackTop-1] += _valueStack[_stackTop] *
- _valueStack[_stackTop];
- _lsidStack[_stackTop-1] = _idStack[_stackTop];
-
- // Pop
- _stackTop--;
- }
-
- // End of document handler methods
-
- // Lexical handler methods.
-
- def startCDATA():
- {
- // The text node id should be the one next to the current
- // node id.
- int textid = _currentNodeID + 1;
- String text = _elementBuffer.toString();
- _xtree.addCDATA(textid, text.length());
- }
-
- def endCDATA():
- {
- int textid = _currentNodeID + 1;
- String text = _elementBuffer.toString();
- _xtree.addCDATA(textid, text.length());
- }
-
- // Following functions are not implemented.
- def comment(char[] ch, int start, int length):
- {
- }
-
- def startDTD(String name, String publicId, String systemId):
- {
- }
-
- def endDTD():
- {
- }
-
- def startEntity(String name):
- {
- }
-
- def endEntity(String name):
- {
- }
-
- // End of lexical handler methods.
-}
+ def parse(self, uri):
+ self._xtree = XTree.XTree()
+ self._idStack.append(XTree.NULL_NODE)
+ self._lsidStack.append(XTree.NULL_NODE)
+
+# try:
+ self._parser.parse(uri)
+# except xml.sax.SAXParseException as (errno, strerror):
+# print >>sys.stderr, "Exception: err no. %d\n%s" % (errno, strerror)
+# sys.exit(1)
+
+ return self._xtree
+
+ # Document handler methods
+
+ def startElement(self, local, attrs):
+ # if text is mixed with elements
+ if (len(self._elementBuffer) > 0):
+ text = str(self._elementBuffer).strip()
+ if (len(text) > 0):
+ # Original Java has long here, we have str. FIXME
+ value = hashlib.sha1(text).digest()
+ tid = self._xtree.addText(self._idStack[self._stackTop], self._lsidStack[self._stackTop], text, value)
+ self._lsidStack[self._stackTop] = tid
+ self._currentNodeID = tid
+ self._valueStack[self._stackTop] += value
+
+ eid = self._xtree.addElement(self._idStack[self._stackTop],
+ self._lsidStack[self._stackTop], local)
+
+ # Update last sibling info.
+ self._lsidStack[self._stackTop] = eid
+
+ # Push
+ self._stackTop += 1
+ self._idStack[self._stackTop] = eid
+ self._currentNodeID = eid
+ self._lsidStack[self._stackTop] = XTree.NULL_NODE
+ self._valueStack[self._stackTop] = hashlib.sha1(local).digest()
+
+ # Take care of attributes
+ if ((attrs != None) and (attrs.getLength() > 0)):
+ for i in range(attrs.getLength()):
+ name = attrs.getQName(i)
+ value = attrs.getValue(i)
+ namehash = hashlib.sha1(name).digest()
+ valuehash = hashlib.sha1(value).digest()
+ attrhash = namehash * namehash + \
+ valuehash * valuehash
+ aid = self._xtree.addAttribute(eid, self._lsidStack[self._stackTop], name, value, namehash, attrhash)
+
+ self._lsidStack[self._stackTop] = aid
+ self._currentNodeID = aid + 1
+ self._valueStack[self._stackTop] += attrhash * attrhash
+
+ self._readElement = True
+ self._elementBuffer = ""
+
+ def characters(self, ch):
+ self._elementBuffer += ch
+
+ def endElement(self, name):
+ if (self._readElement):
+ if (len(self._elementBuffer) > 0):
+ text = str(self._elementBuffer)
+ value = hashlib.sha1(text).digest()
+ self._currentNodeID = \
+ self._xtree.addText(self._idStack[self._stackTop],
+ self._lsidStack[self._stackTop],
+ text, value)
+ self._valueStack[self._stackTop] += value
+ else: # an empty element
+ self._currentNodeID = \
+ self._xtree.addText(self._idStack[self._stackTop], \
+ self._lsidStack[self._stackTop], \
+ "", 0)
+ self._readElement = False
+ else:
+ if (len(self._elementBuffer) > 0):
+ text = str(self._elementBuffer).strip()
+ # More text nodes before end of the element.
+ if (len(text) > 0):
+ value = hashlib.sha1(text).digest()
+ self._currentNodeID = \
+ self._xtree.addText(self._idStack[self._stackTop], \
+ self._lsidStack[self._stackTop], \
+ text, value)
+ self._valueStack[self._stackTop] += value
+
+ self._elementBuffer = ""
+ self._xtree.addHashValue(self._idStack[self._stackTop],
+ self._valueStack[self._stackTop])
+ self._valueStack[self._stackTop-1] += self._valueStack[self._stackTop] * \
+ self._valueStack[self._stackTop]
+ self._lsidStack[self._stackTop-1] = self._idStack[self._stackTop]
+
+ # Pop
+ self._stackTop -= 1
+
+ # End of document handler methods
+
+ # Lexical handler methods.
+
+ def startCDATA(self):
+ # The text node id should be the one next to the current
+ # node id.
+ textid = self._currentNodeID + 1
+ text = str(self._elementBuffer)
+ self._xtree.addCDATA(textid, len(text))
+
+ def endCDATA(self):
+ textid = self._currentNodeID + 1
+ text = str(self._elementBuffer)
+ self._xtree.addCDATA(textid, len(text))
+
+ # Following functions are not implemented.
+ def comment(self, ch):
+ pass
+
+ def startDTD(self, name, publicId, systemId):
+ pass
+
+ def endDTD(self):
+ pass
+
+ def startEntity(self, name):
+ pass
+
+ def endEntity(self, name):
+ pass
+
+ # End of lexical handler methods.
+
diff --git a/XTree.py b/XTree.py
index eef0af9..215dbc6 100644
--- a/XTree.py
+++ b/XTree.py
@@ -56,6 +56,29 @@ class XTree:
# private long self._hashValue[][]
# private String _value[][]
# private Hashtable self._tagNames, _cdataTable
+ _root = 0
+ _firstChild = []
+ _nextSibling = []
+ _isAttribute = []
+ _valueIndex = []
+ _matching = []
+ _childrenCount = []
+ _hashValue = []
+ _value = []
+
+ _value.append([])
+ _tagNames = []
+
+ # This hashtable is used to record CDATA section info.
+ # The key is the text node id, the value is the list of
+ # (start,end) position pair of each CDATA section.
+ _cdataTable = {}
+
+ _elementIndex = -1
+ _tagIndex = -1
+ _valueCount = 0
+
+
def __init__(self, topcap=None, botcap=None):
self._topCap = _TOP_LEVEL_CAPACITY
@@ -69,44 +92,44 @@ class XTree:
# Initialization.
def _initialize(self):
self._root = 0
- self._firstChild = []
- self._nextSibling = []
- self._isAttribute = []
- self._valueIndex = []
- self._matching = []
- self._childrenCount = []
- self._hashValue = []
- self._value = []
-
- self._value[0] = []
- self._tagNames = []
+ self._firstChild = []
+ self._nextSibling = []
+ self._isAttribute = []
+ self._valueIndex = []
+ self._matching = []
+ self._childrenCount = []
+ self._hashValue = []
+ self._value = []
+
+ self._value.append([])
+ self._tagNames = []
# This hashtable is used to record CDATA section info.
# The key is the text node id, the value is the list of
# (start,end) position pair of each CDATA section.
- self._cdataTable = {}
+ self._cdataTable = {}
- self._elementIndex = -1
- self._tagIndex = -1
- self._valueCount = self._botCap - 1
+ self._elementIndex = -1
+ self._tagIndex = -1
+ self._valueCount = self._botCap - 1
# ID Expansion
def _expand(self, topid):
- self._firstChild[topid] = []
- self._nextSibling[topid] = []
- self._childrenCount[topid] = []
- self._matching[topid] = []
- self._valueIndex[topid] = []
- self._hashValue[topid] = []
- self._isAttribute[topid] = []
+ self._firstChild[topid] = []
+ self._nextSibling[topid] = []
+ self._childrenCount[topid] = []
+ self._matching[topid] = []
+ self._valueIndex[topid] = []
+ self._hashValue[topid] = []
+ self._isAttribute[topid] = []
for i in range(self._botCap):
- self._firstChild[topid][i] = NULL_NODE
- self._nextSibling[topid][i] = NULL_NODE
- self._childrenCount[topid][i]= 0
- self._matching[topid][i] = MATCH
- self._valueIndex[topid][i] = -1
- self._isAttribute[topid][i] = False
+ self._firstChild[topid][i] = NULL_NODE
+ self._nextSibling[topid][i] = NULL_NODE
+ self._childrenCount[topid][i] = 0
+ self._matching[topid][i] = MATCH
+ self._valueIndex[topid][i] = -1
+ self._isAttribute[topid][i] = False
# Start -- methods for constructing a tree.
# Add a new element to the tree.
@@ -142,7 +165,7 @@ class XTree:
if (lsid == NULL_NODE):
self._firstChild[ptopid][pbotid] = self._elementIndex
else:
- self._nextSibling[lsid/self._botCap][lsid%self._botCap] = self._elementIndex
+ self._nextSibling[lsid / self._botCap][lsid % self._botCap] = self._elementIndex
# update children count
self._childrenCount[ptopid][pbotid] += 1
@@ -166,7 +189,7 @@ class XTree:
if (lsid == NULL_NODE):
self._firstChild[etopid][ebotid] = self._elementIndex
else:
- self._nextSibling[lsid/self._botCap][lsid%self._botCap] = self._elementIndex
+ self._nextSibling[lsid / self._botCap][lsid % self._botCap] = self._elementIndex
self._childrenCount[etopid][ebotid] += 1
self._hashValue[topid][botid] = value
@@ -209,7 +232,7 @@ class XTree:
# @param eid element id
# @param value extra hash value
def addHashValue(self, eid, value):
- self._hashValue[eid/self._botCap][eid%self._botCap] = value
+ self._hashValue[eid / self._botCap][eid % self._botCap] = value
# Add a CDATA section (either a start or an end) to the CDATA
# hashtable, in which each entry should have an even number of
@@ -233,11 +256,11 @@ class XTree:
# @param match ?match and matched element id
def addMatching(self, eid, match):
if (match[0] == NO_MATCH):
- self._matching[eid/self._botCap][eid%self._botCap] = NO_MATCH
+ self._matching[eid / self._botCap][eid % self._botCap] = NO_MATCH
elif (match[0] == MATCH):
- self._matching[eid/self._botCap][eid%self._botCap] = MATCH
+ self._matching[eid / self._botCap][eid % self._botCap] = MATCH
else:
- self._matching[eid/self._botCap][eid%self._botCap] = match[1] + 1
+ self._matching[eid / self._botCap][eid % self._botCap] = match[1] + 1
# End -- methods for constructing a tree.
@@ -247,7 +270,7 @@ class XTree:
# @param eid element id
# @param match ?change and matched element id
def getMatching(self, eid, match):
- mid = self._matching[eid/self._botCap][eid%self._botCap]
+ mid = self._matching[eid / self._botCap][eid % self._botCap]
if (mid == NO_MATCH):
match[0] = NO_MATCH
elif (mid == MATCH):
@@ -263,7 +286,7 @@ class XTree:
# Get the first child of a node.
# @param eid element id
def getFirstChild(self, eid):
- cid = self._firstChild[eid/self._botCap][eid%self._botCap]
+ cid = self._firstChild[eid / self._botCap][eid % self._botCap]
while (cid > self._root):
ctopid = cid / self._botCap
cbotid = cid % self._botCap
@@ -277,13 +300,13 @@ class XTree:
# Get the next sibling of a node.
# @param eid element id
def getNextSibling(self, eid):
- return self._nextSibling[eid/self._botCap][eid%self._botCap]
+ return self._nextSibling[eid / self._botCap][eid % self._botCap]
# Get the first attribute of a node.
# @param eid element id
def getFirstAttribute(self, eid):
- aid = self._firstChild[eid/self._botCap][eid%self._botCap]
- if ((aid > self._root) and (self._isAttribute[aid/self._botCap][aid%self._botCap])):
+ aid = self._firstChild[eid / self._botCap][eid % self._botCap]
+ if ((aid > self._root) and (self._isAttribute[aid / self._botCap][aid % self._botCap])):
return aid
else:
return NULL_NODE
@@ -291,8 +314,8 @@ class XTree:
# Get the next attribute of a node.
# @param aid attribute id
def getNextAttribute(self, aid):
- aid1 = self._nextSibling[aid/self._botCap][aid%self._botCap]
- if ((aid1 > self._root) and (self._isAttribute[aid1/self._botCap][aid1%self._botCap])):
+ aid1 = self._nextSibling[aid / self._botCap][aid % self._botCap]
+ if ((aid1 > self._root) and (self._isAttribute[aid1 / self._botCap][aid1 % self._botCap])):
return aid1
else:
return NULL_NODE
@@ -300,17 +323,17 @@ class XTree:
# Get the attribute value.
# @param aid attribute id
def getAttributeValue(self, aid):
- cid = self._firstChild[aid/self._botCap][aid%self._botCap]
- index = self._valueIndex[cid/self._botCap][cid%self._botCap]
+ cid = self._firstChild[aid / self._botCap][aid % self._botCap]
+ index = self._valueIndex[cid / self._botCap][cid % self._botCap]
if (index > 0):
- return self._value[index/self._botCap][index%self._botCap]
+ return self._value[index / self._botCap][index % self._botCap]
else:
return ""
# Get the hash value of a node.
# @param eid element id
def getHashValue(self, eid):
- return self._hashValue[eid/self._botCap][eid%self._botCap]
+ return self._hashValue[eid / self._botCap][eid % self._botCap]
# Get the CDATA section position list of a text node.
# @param eid element id
@@ -321,7 +344,7 @@ class XTree:
# Get the childern count of a node.
# @param eid element id
def getChildrenCount(self, eid):
- return self._childrenCount[eid/self._botCap][eid%self._botCap]
+ return self._childrenCount[eid / self._botCap][eid % self._botCap]
# Get the # of all decendents of a node.
# @param eid element id
@@ -335,39 +358,39 @@ class XTree:
cid = self._firstChild[topid][botid]
while (cid > NULL_NODE):
count += self.getDecendentsCount(cid)
- cid = self._nextSibling[cid/self._botCap][cid%self._botCap]
+ cid = self._nextSibling[cid / self._botCap][cid % self._botCap]
return count
# Get the value index of a node
# @param eid element id
def getValueIndex(self, eid):
- return self._valueIndex[eid/self._botCap][eid%self._botCap]
+ return self._valueIndex[eid / self._botCap][eid % self._botCap]
# Get the value of a leaf node
# @param index value index
def getValue(self, index):
- return self._value[index/self._botCap][index%self._botCap]
+ return self._value[index / self._botCap][index % self._botCap]
# Get the tag of an element node
# @param eid element id
def getTag(self, eid):
- index = self._valueIndex[eid/self._botCap][eid%self._botCap]
+ index = self._valueIndex[eid / self._botCap][eid % self._botCap]
return self._value[0][index]
# Get the text value of a leaf node
# @param eid element id
def getText(self, eid):
- index = self._valueIndex[eid/self._botCap][eid%self._botCap]
+ index = self._valueIndex[eid / self._botCap][eid % self._botCap]
if (index >= self._botCap):
- return self._value[index/self._botCap][index%self._botCap]
+ return self._value[index / self._botCap][index % self._botCap]
else:
return ""
# Check if a node an element node.
# @param eid element id
def isElement(self, eid):
- vindex = self._valueIndex[eid/self._botCap][eid%self._botCap]
+ vindex = self._valueIndex[eid / self._botCap][eid % self._botCap]
if (vindex < self._botCap):
return True
else:
@@ -376,12 +399,12 @@ class XTree:
# Check if a node is an attribute node.
# @param eid element id
def isAttribute(self, eid):
- return self._isAttribute[eid/self._botCap][eid%self._botCap]
+ return self._isAttribute[eid / self._botCap][eid % self._botCap]
# Check if a node an leaf text node.
# @param edi element id
def isLeaf(self, eid):
- index = self._valueIndex[eid/self._botCap][eid%self._botCap]
+ index = self._valueIndex[eid / self._botCap][eid % self._botCap]
if (index < self._botCap):
return False
else:
@@ -390,7 +413,7 @@ class XTree:
# End -- methods for accessing a tree.
# For testing purpose.
- def dump(self, eid = None):
+ def dump(self, eid=None):
if eid:
topid = eid / self._botCap
botid = eid % self._botCap
@@ -407,7 +430,7 @@ class XTree:
self._value[vtopid][vbotid]
else:
print "eid\tfirstC\tnextS\tattr?\tcCount\thash\tmatch\tvalue"
- for i in range(self._root,self._elementIndex+1):
+ for i in range(self._root, self._elementIndex + 1):
topid = i / self._botCap
botid = i % self._botCap
vid = self._valueIndex[topid][botid]
@@ -421,3 +444,4 @@ class XTree:
self._hashValue[topid][botid] + "\t" + \
self._matching[topid][botid] + "\t" + \
self._value[vtopid][vbotid]
+
diff --git a/diff.xml b/diff.xml
new file mode 100644
index 0000000..af4ca81
--- /dev/null
+++ b/diff.xml
@@ -0,0 +1,12 @@
+<list>
+<a>4<?UPDATE FROM "1"?></a>
+<b><?DELETE b?>
+2</b>
+<d><daughter><?DELETE daughter?>
+Maruška</daughter>
+<son><?INSERT son?>
+Janíček</son>
+</d>
+<c><?INSERT c?>
+3</c>
+</list>
diff --git a/new.xml b/new.xml
new file mode 100644
index 0000000..9abd148
--- /dev/null
+++ b/new.xml
@@ -0,0 +1,7 @@
+<list>
+ <a>4</a>
+ <c>3</c>
+ <d>
+ <son>Janíček</son>
+ </d>
+</list>
diff --git a/old.xml b/old.xml
new file mode 100644
index 0000000..898d8c0
--- /dev/null
+++ b/old.xml
@@ -0,0 +1,7 @@
+<list>
+ <a>1</a>
+ <b>2</b>
+ <d>
+ <daughter>Maruška</daughter>
+ </d>
+</list>
diff --git a/test_XDiff.py b/test_XDiff.py
new file mode 100644
index 0000000..8eb8072
--- /dev/null
+++ b/test_XDiff.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import unittest
+import XDiff
+
+class TestXDiff(unittest.TestCase):
+ def test_XDiff(self):
+ XDiff.XDiff('old.xml', 'new.xml', 'diff.xml')
+ self.assertTrue(True, "we have managed to get through XDiff.") \ No newline at end of file