aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@redhat.com>2011-11-16 12:23:40 +0100
committerMatěj Cepl <mcepl@redhat.com>2011-11-16 13:10:54 +0100
commit00212166f380611d9472f2280ff3566863c9c5b4 (patch)
treeffd052622663bc82ec7517bd9d135cfcbf1f69b0
parentc55b0ba2a5aef15ef3e0aa697abd24a2b4759b90 (diff)
downloadjson_diff-00212166f380611d9472f2280ff3566863c9c5b4.tar.gz
Switch back to optparse ... no need to have an additional dependency.
-rw-r--r--.ditz/issue-5f4f6fef53a0907e3565ff2e6888fa916fe463e4.yaml8
-rw-r--r--.ditz/issue-60aea6be428d95ae1712703c141a069cac487fef.yaml22
-rw-r--r--.ditz/issue-a9e842efd5346c069a8f4f7780408556586eca47.yaml22
-rw-r--r--.ditz/issue-b10a9a16798679380fc9a2c184e9fe0d13e5abcf.yaml33
-rw-r--r--.ditz/issue-fb675aa395503fb3e4fdbcd2ab22da02f4345d47.yaml20
-rwxr-xr-xjson_diff.py38
-rw-r--r--odict.py328
7 files changed, 453 insertions, 18 deletions
diff --git a/.ditz/issue-5f4f6fef53a0907e3565ff2e6888fa916fe463e4.yaml b/.ditz/issue-5f4f6fef53a0907e3565ff2e6888fa916fe463e4.yaml
index c5d55a9..54670f7 100644
--- a/.ditz/issue-5f4f6fef53a0907e3565ff2e6888fa916fe463e4.yaml
+++ b/.ditz/issue-5f4f6fef53a0907e3565ff2e6888fa916fe463e4.yaml
@@ -24,8 +24,8 @@ type: :bugfix
component: json_diff
release:
reporter: Matej Cepl <mcepl@redhat.com>
-status: :unstarted
-disposition:
+status: :closed
+disposition: :fixed
creation_time: 2011-11-07 09:58:56.794804 Z
references: []
@@ -35,3 +35,7 @@ log_events:
- Matej Cepl <mcepl@redhat.com>
- created
- ""
+- - 2011-11-08 18:11:14.760985 Z
+ - Matej Cepl <mcepl@redhat.com>
+ - closed with disposition fixed
+ - ""
diff --git a/.ditz/issue-60aea6be428d95ae1712703c141a069cac487fef.yaml b/.ditz/issue-60aea6be428d95ae1712703c141a069cac487fef.yaml
new file mode 100644
index 0000000..a09e494
--- /dev/null
+++ b/.ditz/issue-60aea6be428d95ae1712703c141a069cac487fef.yaml
@@ -0,0 +1,22 @@
+--- !ditz.rubyforge.org,2008-03-06/issue
+title: RPM package
+desc: ""
+type: :task
+component: json_diff
+release:
+reporter: Matej Cepl <mcepl@redhat.com>
+status: :unstarted
+disposition:
+creation_time: 2011-11-16 10:44:39.832446 Z
+references: []
+
+id: 60aea6be428d95ae1712703c141a069cac487fef
+log_events:
+- - 2011-11-16 10:44:40.851325 Z
+ - Matej Cepl <mcepl@redhat.com>
+ - created
+ - ""
+- - 2011-11-16 10:45:03.435355 Z
+ - Matej Cepl <mcepl@redhat.com>
+ - commented
+ - https://fedoraproject.org/wiki/Packaging:Python
diff --git a/.ditz/issue-a9e842efd5346c069a8f4f7780408556586eca47.yaml b/.ditz/issue-a9e842efd5346c069a8f4f7780408556586eca47.yaml
new file mode 100644
index 0000000..f11787a
--- /dev/null
+++ b/.ditz/issue-a9e842efd5346c069a8f4f7780408556586eca47.yaml
@@ -0,0 +1,22 @@
+--- !ditz.rubyforge.org,2008-03-06/issue
+title: eliminate argparse
+desc: ""
+type: :bugfix
+component: json_diff
+release:
+reporter: Matej Cepl <mcepl@redhat.com>
+status: :closed
+disposition: :fixed
+creation_time: 2011-11-16 11:09:26.710658 Z
+references: []
+
+id: a9e842efd5346c069a8f4f7780408556586eca47
+log_events:
+- - 2011-11-16 11:09:40.591262 Z
+ - Matej Cepl <mcepl@redhat.com>
+ - created
+ - We don't have argparse in python 2.6 and it is just a waste to use it.
+- - 2011-11-16 12:10:05.602830 Z
+ - Matej Cepl <mcepl@redhat.com>
+ - closed with disposition fixed
+ - done.
diff --git a/.ditz/issue-b10a9a16798679380fc9a2c184e9fe0d13e5abcf.yaml b/.ditz/issue-b10a9a16798679380fc9a2c184e9fe0d13e5abcf.yaml
new file mode 100644
index 0000000..d95caff
--- /dev/null
+++ b/.ditz/issue-b10a9a16798679380fc9a2c184e9fe0d13e5abcf.yaml
@@ -0,0 +1,33 @@
+--- !ditz.rubyforge.org,2008-03-06/issue
+title: -i option to compare only mentioned keys
+desc: ""
+type: :feature
+component: json_diff
+release:
+reporter: Matej Cepl <mcepl@redhat.com>
+status: :unstarted
+disposition:
+creation_time: 2011-11-16 11:06:20.989309 Z
+references: []
+
+id: b10a9a16798679380fc9a2c184e9fe0d13e5abcf
+log_events:
+- - 2011-11-16 11:06:21.529346 Z
+ - Matej Cepl <mcepl@redhat.com>
+ - created
+ - ""
+- - 2011-11-16 11:08:22.147340 Z
+ - Matej Cepl <mcepl@redhat.com>
+ - commented
+ - |-
+ Right, this code
+ for name in keys:
+ # Explicitly excluded arguments
+ logging.debug("name = %s (inc = %s, excl = %s)", name,
+ unicode(self.included_attributes), unicode(self.excluded_attributes))
+ if ((self.included_attributes and (name not in self.included_attributes)) or
+ (name in self.excluded_attributes)):
+ continue
+ # old_obj is missing
+ is wrong. We need to dive into all dictionaries and arrays and only down there
+ to decide.
diff --git a/.ditz/issue-fb675aa395503fb3e4fdbcd2ab22da02f4345d47.yaml b/.ditz/issue-fb675aa395503fb3e4fdbcd2ab22da02f4345d47.yaml
new file mode 100644
index 0000000..175674b
--- /dev/null
+++ b/.ditz/issue-fb675aa395503fb3e4fdbcd2ab22da02f4345d47.yaml
@@ -0,0 +1,20 @@
+--- !ditz.rubyforge.org,2008-03-06/issue
+title: test_large_with_exclusions is broken
+desc: |-
+ There is something wrong with comparing two large dicts.
+ Tried to fix it by using odict instead, but it doesn't seem to help.
+type: :bugfix
+component: json_diff
+release:
+reporter: Matej Cepl <mcepl@redhat.com>
+status: :unstarted
+disposition:
+creation_time: 2011-11-08 18:05:08.687378 Z
+references: []
+
+id: fb675aa395503fb3e4fdbcd2ab22da02f4345d47
+log_events:
+- - 2011-11-08 18:05:10.458418 Z
+ - Matej Cepl <mcepl@redhat.com>
+ - created
+ - ""
diff --git a/json_diff.py b/json_diff.py
index f460af7..0362f61 100755
--- a/json_diff.py
+++ b/json_diff.py
@@ -22,10 +22,12 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
-from __future__ import division, absolute_import
+from __future__ import division, absolute_import, print_function
import json
import logging
-import argparse
+import odict
+
+from optparse import OptionParser
__author__ = "Matěj Cepl"
__version__ = "0.1.0"
@@ -67,7 +69,7 @@ td {
"""
class HTMLFormatter(object):
-
+
def __init__(self, diff_object):
self.diff = diff_object
@@ -128,7 +130,6 @@ class HTMLFormatter(object):
return out_str.strip()
-
def __str__(self):
return self._generate_page(self.diff).encode("utf-8")
@@ -262,22 +263,27 @@ class Comparator(object):
if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="Generates diff between two JSON files.")
- parser.add_argument("filenames", action="append", nargs=2,
- metavar="FILENAME", help="names of the old and new JSON files")
- parser.add_argument("-x", "--exclude",
- action="append", dest="exclude", default=[],
+ usage = "usage: %prog [options] old.json new.json"
+ description = "Generates diff between two JSON files."
+ parser = OptionParser(usage=usage)
+ parser.add_option("-x", "--exclude",
+ action="append", dest="exclude", metavar="ATTR", default=[],
help="attributes which should be ignored when comparing")
- parser.add_argument("-H", "--HTML",
- action="store_true", dest="HTMLoutput", default=False,
+ parser.add_option("-i", "--include",
+ action="append", dest="include", metavar="ATTR", default=[],
+ help="attributes which should be exclusively used when comparing")
+ parser.add_option("-H", "--HTML",
+ action="store_true", dest="HTMLoutput", metavar="BOOL", default=False,
help="program should output to HTML report")
- parser.add_argument('--version', action='version', version='%(prog)s 0.1.1')
- options = parser.parse_args()
+ (options, args) = parser.parse_args()
+
+ if len(args) != 2:
+ parser.error("Script requires two positional arguments, names for old and new JSON file.")
- diff = Comparator(file(options.filenames[0][0]), file(options.filenames[0][1]), options.exclude)
+ diff = Comparator(file(args[0]), file(args[1]), options.exclude, options.include)
if options.HTMLoutput:
diff_res = diff.compare_dicts()
logging.debug("diff_res:\n%s", json.dumps(diff_res, indent=True))
- print HTMLFormatter(diff_res)
+ print(HTMLFormatter(diff_res))
else:
- print json.dumps(diff.compare_dicts(), indent=4, ensure_ascii=False).encode("utf-8")
+ print(json.dumps(diff.compare_dicts(), indent=4, ensure_ascii=False).encode("utf-8")) \ No newline at end of file
diff --git a/odict.py b/odict.py
new file mode 100644
index 0000000..3bf708a
--- /dev/null
+++ b/odict.py
@@ -0,0 +1,328 @@
+# -*- coding: utf-8 -*-
+"""
+ odict
+ ~~~~~
+
+ This module is an example implementation of an ordered dict for the
+ collections module. It's not written for performance (it actually
+ performs pretty bad) but to show how the API works.
+
+
+ Questions and Answers
+ =====================
+
+ Why would anyone need ordered dicts?
+
+ Dicts in python are unordered which means that the order of items when
+ iterating over dicts is undefined. As a matter of fact it is most of
+ the time useless and differs from implementation to implementation.
+
+ Many developers stumble upon that problem sooner or later when
+ comparing the output of doctests which often does not match the order
+ the developer thought it would.
+
+ Also XML systems such as Genshi have their problems with unordered
+ dicts as the input and output ordering of tag attributes is often
+ mixed up because the ordering is lost when converting the data into
+ a dict. Switching to lists is often not possible because the
+ complexity of a lookup is too high.
+
+ Another very common case is metaprogramming. The default namespace
+ of a class in python is a dict. With Python 3 it becomes possible
+ to replace it with a different object which could be an ordered dict.
+ Django is already doing something similar with a hack that assigns
+ numbers to some descriptors initialized in the class body of a
+ specific subclass to restore the ordering after class creation.
+
+ When porting code from programming languages such as PHP and Ruby
+ where the item-order in a dict is guaranteed it's also a great help
+ to have an equivalent data structure in Python to ease the transition.
+
+ Where are new keys added?
+
+ At the end. This behavior is consistent with Ruby 1.9 Hashmaps
+ and PHP Arrays. It also matches what common ordered dict
+ implementations do currently.
+
+ What happens if an existing key is reassigned?
+
+ The key is *not* moved. This is consitent with existing
+ implementations and can be changed by a subclass very easily::
+
+ class movingodict(odict):
+ def __setitem__(self, key, value):
+ self.pop(key, None)
+ odict.__setitem__(self, key, value)
+
+ Moving keys to the end of a ordered dict on reassignment is not
+ very useful for most applications.
+
+ Does it mean the dict keys are sorted by a sort expression?
+
+ That's not the case. The odict only guarantees that there is an order
+ and that newly inserted keys are inserted at the end of the dict. If
+ you want to sort it you can do so, but newly added keys are again added
+ at the end of the dict.
+
+ I initializes the odict with a dict literal but the keys are not
+ ordered like they should!
+
+ Dict literals in Python generate dict objects and as such the order of
+ their items is not guaranteed. Before they are passed to the odict
+ constructor they are already unordered.
+
+ What happens if keys appear multiple times in the list passed to the
+ constructor?
+
+ The same as for the dict. The latter item overrides the former. This
+ has the side-effect that the position of the first key is used because
+ the key is actually overwritten:
+
+ >>> odict([('a', 1), ('b', 2), ('a', 3)])
+ odict.odict([('a', 3), ('b', 2)])
+
+ This behavor is consistent with existing implementation in Python
+ and the PHP array and the hashmap in Ruby 1.9.
+
+ This odict doesn't scale!
+
+ Yes it doesn't. The delitem operation is O(n). This is file is a
+ mockup of a real odict that could be implemented for collections
+ based on an linked list.
+
+ Why is there no .insert()?
+
+ There are few situations where you really want to insert a key at
+ an specified index. To now make the API too complex the proposed
+ solution for this situation is creating a list of items, manipulating
+ that and converting it back into an odict:
+
+ >>> d = odict([('a', 42), ('b', 23), ('c', 19)])
+ >>> l = d.items()
+ >>> l.insert(1, ('x', 0))
+ >>> odict(l)
+ odict.odict([('a', 42), ('x', 0), ('b', 23), ('c', 19)])
+
+ :copyright: (c) 2008 by Armin Ronacher and PEP 273 authors.
+ :license: modified BSD license.
+"""
+from itertools import izip, imap
+from copy import deepcopy
+
+missing = object()
+
+
+class odict(dict):
+ """
+ Ordered dict example implementation.
+
+ This is the proposed interface for a an ordered dict as proposed on the
+ Python mailinglist (proposal_).
+
+ It's a dict subclass and provides some list functions. The implementation
+ of this class is inspired by the implementation of Babel but incorporates
+ some ideas from the `ordereddict`_ and Django's ordered dict.
+
+ The constructor and `update()` both accept iterables of tuples as well as
+ mappings:
+
+ >>> d = odict([('a', 'b'), ('c', 'd')])
+ >>> d.update({'foo': 'bar'})
+ >>> d
+ odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar')])
+
+ Keep in mind that when updating from dict-literals the order is not
+ preserved as these dicts are unsorted!
+
+ You can copy an odict like a dict by using the constructor, `copy.copy`
+ or the `copy` method and make deep copies with `copy.deepcopy`:
+
+ >>> from copy import copy, deepcopy
+ >>> copy(d)
+ odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar')])
+ >>> d.copy()
+ odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar')])
+ >>> odict(d)
+ odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar')])
+ >>> d['spam'] = []
+ >>> d2 = deepcopy(d)
+ >>> d2['spam'].append('eggs')
+ >>> d
+ odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar'), ('spam', [])])
+ >>> d2
+ odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar'), ('spam', ['eggs'])])
+
+ All iteration methods as well as `keys`, `values` and `items` return
+ the values ordered by the the time the key-value pair is inserted:
+
+ >>> d.keys()
+ ['a', 'c', 'foo', 'spam']
+ >>> d.values()
+ ['b', 'd', 'bar', []]
+ >>> d.items()
+ [('a', 'b'), ('c', 'd'), ('foo', 'bar'), ('spam', [])]
+ >>> list(d.iterkeys())
+ ['a', 'c', 'foo', 'spam']
+ >>> list(d.itervalues())
+ ['b', 'd', 'bar', []]
+ >>> list(d.iteritems())
+ [('a', 'b'), ('c', 'd'), ('foo', 'bar'), ('spam', [])]
+
+ Index based lookup is supported too by `byindex` which returns the
+ key/value pair for an index:
+
+ >>> d.byindex(2)
+ ('foo', 'bar')
+
+ You can reverse the odict as well:
+
+ >>> d.reverse()
+ >>> d
+ odict.odict([('spam', []), ('foo', 'bar'), ('c', 'd'), ('a', 'b')])
+
+ And sort it like a list:
+
+ >>> d.sort(key=lambda x: x[0].lower())
+ >>> d
+ odict.odict([('a', 'b'), ('c', 'd'), ('foo', 'bar'), ('spam', [])])
+
+ .. _proposal: http://thread.gmane.org/gmane.comp.python.devel/95316
+ .. _ordereddict: http://www.xs4all.nl/~anthon/Python/ordereddict/
+ """
+
+ def __init__(self, *args, **kwargs):
+ dict.__init__(self)
+ self._keys = []
+ self.update(*args, **kwargs)
+
+ def __delitem__(self, key):
+ dict.__delitem__(self, key)
+ self._keys.remove(key)
+
+ def __setitem__(self, key, item):
+ if key not in self:
+ self._keys.append(key)
+ dict.__setitem__(self, key, item)
+
+ def __deepcopy__(self, memo=None):
+ if memo is None:
+ memo = {}
+ d = memo.get(id(self), missing)
+ if d is not missing:
+ return d
+ memo[id(self)] = d = self.__class__()
+ dict.__init__(d, deepcopy(self.items(), memo))
+ d._keys = self._keys[:]
+ return d
+
+ def __getstate__(self):
+ return {'items': dict(self), 'keys': self._keys}
+
+ def __setstate__(self, d):
+ self._keys = d['keys']
+ dict.update(d['items'])
+
+ def __reversed__(self):
+ return reversed(self._keys)
+
+ def __eq__(self, other):
+ if isinstance(other, odict):
+ if not dict.__eq__(self, other):
+ return False
+ return self.items() == other.items()
+ return dict.__eq__(self, other)
+
+ def __ne__(self, other):
+ return not self.__eq__(other)
+
+ def __cmp__(self, other):
+ if isinstance(other, odict):
+ return cmp(self.items(), other.items())
+ elif isinstance(other, dict):
+ return dict.__cmp__(self, other)
+ return NotImplemented
+
+ @classmethod
+ def fromkeys(cls, iterable, default=None):
+ return cls((key, default) for key in iterable)
+
+ def clear(self):
+ del self._keys[:]
+ dict.clear(self)
+
+ def copy(self):
+ return self.__class__(self)
+
+ def items(self):
+ return zip(self._keys, self.values())
+
+ def iteritems(self):
+ return izip(self._keys, self.itervalues())
+
+ def keys(self):
+ return self._keys[:]
+
+ def iterkeys(self):
+ return iter(self._keys)
+
+ def pop(self, key, default=missing):
+ if default is missing:
+ return dict.pop(self, key)
+ elif key not in self:
+ return default
+ self._keys.remove(key)
+ return dict.pop(self, key, default)
+
+ def popitem(self, key):
+ self._keys.remove(key)
+ return dict.popitem(key)
+
+ def setdefault(self, key, default=None):
+ if key not in self:
+ self._keys.append(key)
+ dict.setdefault(self, key, default)
+
+ def update(self, *args, **kwargs):
+ sources = []
+ if len(args) == 1:
+ if hasattr(args[0], 'iteritems'):
+ sources.append(args[0].iteritems())
+ else:
+ sources.append(iter(args[0]))
+ elif args:
+ raise TypeError('expected at most one positional argument')
+ if kwargs:
+ sources.append(kwargs.iteritems())
+ for iterable in sources:
+ for key, val in iterable:
+ self[key] = val
+
+ def values(self):
+ return map(self.get, self._keys)
+
+ def itervalues(self):
+ return imap(self.get, self._keys)
+
+ def index(self, item):
+ return self._keys.index(item)
+
+ def byindex(self, item):
+ key = self._keys[item]
+ return (key, dict.__getitem__(self, key))
+
+ def reverse(self):
+ self._keys.reverse()
+
+ def sort(self, *args, **kwargs):
+ self._keys.sort(*args, **kwargs)
+
+ def __repr__(self):
+ return 'odict.odict(%r)' % self.items()
+
+ __copy__ = copy
+ __iter__ = iterkeys
+
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()