#!/usr/bin/python # -*- coding: utf-8 -*- # ****************************************************************************** # # Copyright (C) 2006-2009 Olivier Tilloy # # This file is part of the pyexiv2 distribution. # # pyexiv2 is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # pyexiv2 is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with pyexiv2; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, 5th Floor, Boston, MA 02110-1301 USA. # # Author: Olivier Tilloy # # ****************************************************************************** """ Manipulation of EXIF, IPTC and XMP metadata and thumbnails embedded in images. The L{ImageMetadata} class provides read/write access to all the metadata and the various thumbnails embedded in an image file such as JPEG and TIFF files. Metadata is accessed through subclasses of L{MetadataTag} and the tag values are conveniently wrapped in python objects. A tag containing a date/time information for the image (e.g. C{Exif.Photo.DateTimeOriginal}) will be represented by a python C{datetime.datetime} object. This module is a python layer on top of the low-level python binding of the C++ library Exiv2, libpyexiv2. A typical use of this binding would be: >>> import pyexiv2 >>> metadata = pyexiv2.ImageMetadata('test/smiley.jpg') >>> metadata.read() >>> print metadata.exif_keys ['Exif.Image.ImageDescription', 'Exif.Image.XResolution', 'Exif.Image.YResolution', 'Exif.Image.ResolutionUnit', 'Exif.Image.Software', 'Exif.Image.DateTime', 'Exif.Image.Artist', 'Exif.Image.Copyright', 'Exif.Image.ExifTag', 'Exif.Photo.Flash', 'Exif.Photo.PixelXDimension', 'Exif.Photo.PixelYDimension'] >>> print metadata['Exif.Image.DateTime'].value 2004-07-13 21:23:44 >>> import datetime >>> metadata['Exif.Image.DateTime'].value = datetime.datetime.today() >>> metadata.write() """ import libexiv2python import os import time import datetime import re __version__ = (0, 2, 1) __exiv2_version__ = libexiv2python.__exiv2_version__ class FixedOffset(datetime.tzinfo): """ Fixed positive or negative offset from a local time east from UTC. @ivar sign: the sign of the offset ('+' or '-') @type sign: C{str} @ivar hours: the absolute number of hours of the offset @type hours: C{int} @ivar minutes: the absolute number of minutes of the offset @type minutes: C{int} """ def __init__(self, sign='+', hours=0, minutes=0): """ Initialize an offset from a sign ('+' or '-') and an absolute value expressed in hours and minutes. No check on the validity of those values is performed, it is the responsibility of the caller to pass valid values. @param sign: the sign of the offset ('+' or '-') @type sign: C{str} @param hours: an absolute number of hours @type hours: C{int} @param minutes: an absolute number of minutes @type minutes: C{int} """ self.sign = sign self.hours = hours self.minutes = minutes def utcoffset(self, dt): """ Return offset of local time from UTC, in minutes east of UTC. If local time is west of UTC, this value will be negative. @param dt: the local time @type dt: C{datetime.time} @return: a whole number of minutes in the range -1439 to 1439 inclusive @rtype: C{datetime.timedelta} """ total = self.hours * 60 + self.minutes if self.sign == '-': total = -total return datetime.timedelta(minutes = total) def dst(self, dt): """ Return the daylight saving time (DST) adjustment. In this implementation, it is always nil. @param dt: the local time @type dt: C{datetime.time} @return: the DST adjustment (always nil) @rtype: C{datetime.timedelta} """ return datetime.timedelta(0) def tzname(self, dt): """ Return a string representation of the offset in the format '±%H:%M'. If the offset is nil, the representation is, by convention, 'Z'. @param dt: the local time @type dt: C{datetime.time} @return: a human-readable representation of the offset @rtype: C{str} """ if self.hours == 0 and self.minutes == 0: return 'Z' else: return '%s%02d:%02d' % (self.sign, self.hours, self.minutes) def __equal__(self, other): """ Test equality between this offset and another offset. @param other: another offset @type other: L{FixedOffset} @return: C{True} if the offset are equal, C{False} otherwise @rtype: C{bool} """ return (self.sign == other.sign) and (self.hours == other.hours) and \ (self.minutes == other.minutes) def UndefinedToString(undefined): """ Convert an undefined string into its corresponding sequence of bytes. The undefined string must contain the ascii codes of a sequence of bytes, each followed by a blank space (e.g. "48 50 50 49 " will be converted into "0221"). The Undefined type is part of the EXIF specification. @param undefined: an undefined string @type undefined: C{str} @return: the corresponding decoded string @rtype: C{str} """ return ''.join(map(lambda x: chr(int(x)), undefined.rstrip().split(' '))) def StringToUndefined(sequence): """ Convert a string into its undefined form. The undefined form contains a sequence of ascii codes, each followed by a blank space (e.g. "0221" will be converted into "48 50 50 49 "). The Undefined type is part of the EXIF specification. @param sequence: a sequence of bytes @type sequence: C{str} @return: the corresponding undefined string @rtype: C{str} """ return ''.join(map(lambda x: '%d ' % ord(x), sequence)) class Rational(object): """ A class representing a rational number. Its numerator and denominator are read-only properties. """ _format_re = re.compile(r'(?P-?\d+)/(?P\d+)') def __init__(self, numerator, denominator): """ Constructor. @param numerator: the numerator @type numerator: C{long} @param denominator: the denominator @type denominator: C{long} @raise ZeroDivisionError: if the denominator equals zero """ if denominator == 0: msg = 'Denominator of a rational number cannot be zero.' raise ZeroDivisionError(msg) self._numerator = long(numerator) self._denominator = long(denominator) @property def numerator(self): return self._numerator @property def denominator(self): return self._denominator @staticmethod def from_string(string): """ Instantiate a Rational from a string formatted as C{[-]numerator/denominator}. @param string: a string representation of a rational number @type string: C{str} @return: the rational number parsed @rtype: L{Rational} @raise ValueError: if the format of the string is invalid """ match = Rational._format_re.match(string) if match is None: raise ValueError('Invalid format for a rational: %s' % string) gd = match.groupdict() return Rational(long(gd['numerator']), long(gd['denominator'])) def to_float(self): """ @return: a floating point number approximation of the value @rtype: C{float} """ return float(self._numerator) / self._denominator def __eq__(self, other): """ Compare two rational numbers for equality. Two rational numbers are equal if their reduced forms are equal. @param other: the rational number to compare to self for equality @type other: L{Rational} @return: C{True} if equal, C{False} otherwise @rtype: C{bool} """ return (self._numerator * other._denominator) == \ (other._numerator * self._denominator) def __str__(self): """ Return a string representation of the rational number. """ return '%d/%d' % (self._numerator, self._denominator) class ListenerInterface(object): """ Interface that an object that wants to listen to changes on another object should implement. """ def contents_changed(self): """ React on changes on the object observed. Override to implement specific behaviours. """ raise NotImplementedError() class NotifyingList(list): """ A simplistic implementation of a notifying list. Any changes to the list are notified in a synchronous way to all previously registered listeners. A listener must implement the L{ListenerInterface}. """ # Useful documentation: # file:///usr/share/doc/python2.5/html/lib/typesseq-mutable.html # http://docs.python.org/reference/datamodel.html#additional-methods-for-emulation-of-sequence-types def __init__(self, items=[]): super(NotifyingList, self).__init__(items) self._listeners = set() def register_listener(self, listener): """ Register a new listener to be notified of changes. @param listener: any object that listens for changes @type listener: any class that implements the L{ListenerInterface} """ self._listeners.add(listener) def unregister_listener(self, listener): """ Unregister a previously registered listener. @param listener: a previously registered listener @type listener: any class that implements the L{ListenerInterface} @raise KeyError: if the listener was not previously registered """ self._listeners.remove(listener) def _notify_listeners(self, *args): for listener in self._listeners: listener.contents_changed(*args) def __setitem__(self, index, item): # FIXME: support slice arguments for extended slicing super(NotifyingList, self).__setitem__(index, item) self._notify_listeners() def __delitem__(self, index): # FIXME: support slice arguments for extended slicing super(NotifyingList, self).__delitem__(index) self._notify_listeners() def append(self, item): super(NotifyingList, self).append(item) self._notify_listeners() def extend(self, items): super(NotifyingList, self).extend(items) self._notify_listeners() def insert(self, index, item): super(NotifyingList, self).insert(index, item) self._notify_listeners() def pop(self, index=None): if index is None: item = super(NotifyingList, self).pop() else: item = super(NotifyingList, self).pop(index) self._notify_listeners() return item def remove(self, item): super(NotifyingList, self).remove(item) self._notify_listeners() def reverse(self): super(NotifyingList, self).reverse() self._notify_listeners() def sort(self, cmp=None, key=None, reverse=False): super(NotifyingList, self).sort(cmp, key, reverse) self._notify_listeners() def __iadd__(self, other): self = super(NotifyingList, self).__iadd__(other) self._notify_listeners() return self def __imul__(self, coefficient): self = super(NotifyingList, self).__imul__(coefficient) self._notify_listeners() return self def __setslice__(self, i, j, items): # __setslice__ is deprecated but needs to be overridden for completeness super(NotifyingList, self).__setslice__(i, j, items) self._notify_listeners() def __delslice__(self, i, j): # __delslice__ is deprecated but needs to be overridden for completeness deleted = self[i:j] super(NotifyingList, self).__delslice__(i, j) if deleted: self._notify_listeners() class MetadataTag(object): """ A generic metadata tag. It is meant to be subclassed to implement specific tag types behaviours. @ivar key: a unique key that identifies the tag @type key: C{str} @ivar name: the short internal name that identifies the tag within its scope @type name: C{str} @ivar label: a human readable label for the tag @type label: C{str} @ivar description: a description of the function of the tag @type description: C{str} @ivar type: the data type name @type type: C{str} @ivar raw_value: the raw value of the tag as provided by exiv2 @type raw_value: C{str} @ivar metadata: reference to the containing metadata if any @type metadata: L{pyexiv2.ImageMetadata} """ def __init__(self, key, name, label, description, type, value): self.key = key self.name = name # FIXME: all attributes that may contain a localized string should be # unicode. self.label = label self.description = description self.type = type self.raw_value = value self.metadata = None def __str__(self): """ Return a string representation of the value of the tag suitable to pass to libexiv2 to set it. @rtype: C{str} """ return self.raw_value def __repr__(self): """ Return a string representation of the tag for debugging purposes. @rtype: C{str} """ return '<%s [%s] = %s>' % (self.key, self.type, self.raw_value) class ExifValueError(ValueError): """ Exception raised when failing to parse the value of an EXIF tag. @ivar value: the value that fails to be parsed @type value: C{str} @ivar type: the EXIF type of the tag @type type: C{str} """ def __init__(self, value, type): self.value = value self.type = type def __str__(self): return 'Invalid value for EXIF type [%s]: [%s]' % \ (self.type, self.value) class ExifTag(MetadataTag, ListenerInterface): """ An EXIF metadata tag. This tag has an additional field that contains the value of the tag formatted as a human readable string. @ivar fvalue: the value of the tag formatted as a human readable string @type fvalue: C{str} """ # According to the EXIF specification, the only accepted format for an Ascii # value representing a datetime is '%Y:%m:%d %H:%M:%S', but it seems that # others formats can be found in the wild. _datetime_formats = ('%Y:%m:%d %H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y-%m-%dT%H:%M:%SZ') _date_formats = ('%Y:%m:%d',) def __init__(self, key, name, label, description, type, value, fvalue): super(ExifTag, self).__init__(key, name, label, description, type, value) self.fvalue = fvalue self._init_values() def _init_values(self): # Initial conversion of the raw values to their corresponding python # types. if self.type in ('Short', 'Long', 'SLong', 'Rational', 'SRational'): # May contain multiple values values = self.raw_value.split() if len(values) > 1: # Make values a notifying list values = map(self._convert_to_python, values) self._value = NotifyingList(values) self._value.register_listener(self) return self._value = self._convert_to_python(self.raw_value) def _get_value(self): return self._value def _set_value(self, new_value): if self.metadata is not None: if isinstance(new_value, (list, tuple)): raw_values = map(self._convert_to_string, new_value) raw_value = ' '.join(raw_values) else: raw_value = self._convert_to_string(new_value) self.metadata._set_exif_tag_value(self.key, raw_value) if isinstance(self._value, NotifyingList): self._value.unregister_listener(self) if isinstance(new_value, NotifyingList): # Already a notifying list self._value = new_value self._value.register_listener(self) elif isinstance(new_value, (list, tuple)): # Make the values a notifying list self._value = NotifyingList(new_value) self._value.register_listener(self) else: # Single value self._value = new_value def _del_value(self): if self.metadata is not None: self.metadata._delete_exif_tag(self.key) if isinstance(self._value, NotifyingList): self._value.unregister_listener(self) del self._value """the value of the tag converted to its corresponding python type""" value = property(fget=_get_value, fset=_set_value, fdel=_del_value, doc=None) def contents_changed(self): """ Implementation of the L{ListenerInterface}. React on changes to the list of values of the tag. """ # self._value is a list of value and its contents changed. self._set_value(self._value) def _convert_to_python(self, value): """ Convert one raw value to its corresponding python type. @param value: the raw value to be converted @type value: C{str} @return: the value converted to its corresponding python type @rtype: depends on C{self.type} (DOCME) @raise ExifValueError: if the conversion fails """ if self.type == 'Ascii': # The value may contain a Datetime for format in self._datetime_formats: try: t = time.strptime(value, format) except ValueError: continue else: return datetime.datetime(*t[:6]) # Or a Date (e.g. Exif.GPSInfo.GPSDateStamp) for format in self._date_formats: try: t = time.strptime(value, format) except ValueError: continue else: return datetime.date(*t[:3]) # Default to string. # There is currently no charset conversion. # TODO: guess the encoding and decode accordingly into unicode # where relevant. return value elif self.type == 'Byte': return value elif self.type == 'Short': try: return int(value) except ValueError: raise ExifValueError(value, self.type) elif self.type in ('Long', 'SLong'): try: return long(value) except ValueError: raise ExifValueError(value, self.type) elif self.type in ('Rational', 'SRational'): try: r = Rational.from_string(value) except (ValueError, ZeroDivisionError): raise ExifValueError(value, self.type) else: if self.type == 'Rational' and r.numerator < 0: raise ExifValueError(value, self.type) return r elif self.type == 'Undefined': # There is currently no charset conversion. # TODO: guess the encoding and decode accordingly into unicode # where relevant. return self.fvalue raise ExifValueError(value, self.type) def _convert_to_string(self, value): """ Convert one value to its corresponding string representation, suitable to pass to libexiv2. @param value: the value to be converted @type value: depends on C{self.type} (DOCME) @return: the value converted to its corresponding string representation @rtype: C{str} @raise ExifValueError: if the conversion fails """ if self.type == 'Ascii': if type(value) is datetime.datetime: return value.strftime(self._datetime_formats[0]) elif type(value) is datetime.date: if self.key == 'Exif.GPSInfo.GPSDateStamp': # Special case return value.strftime(self._date_formats[0]) else: return value.strftime('%s 00:00:00' % self._date_formats[0]) elif type(value) is unicode: try: return value.encode('utf-8') except UnicodeEncodeError: raise ExifValueError(value, self.type) elif type(value) is str: return value else: raise ExifValueError(value, self.type) elif self.type == 'Byte': if type(value) is unicode: try: return value.encode('utf-8') except UnicodeEncodeError: raise ExifValueError(value, self.type) elif type(value) is str: return value else: raise ExifValueError(value, self.type) elif self.type == 'Short': if type(value) is int and value >= 0: return str(value) else: raise ExifValueError(value, self.type) elif self.type == 'Long': if type(value) in (int, long) and value >= 0: return str(value) else: raise ExifValueError(value, self.type) elif self.type == 'SLong': if type(value) in (int, long): return str(value) else: raise ExifValueError(value, self.type) elif self.type == 'Rational': if type(value) is Rational and value.numerator >= 0: return str(value) else: raise ExifValueError(value, self.type) elif self.type == 'SRational': if type(value) is Rational: return str(value) else: raise ExifValueError(value, self.type) elif self.type == 'Undefined': if type(value) is unicode: try: return value.encode('utf-8') except UnicodeEncodeError: raise ExifValueError(value, self.type) elif type(value) is str: return value else: raise ExifValueError(value, self.type) raise ExifValueError(value, self.type) def __str__(self): """ Return a string representation of the value of the EXIF tag suitable to pass to libexiv2 to set it. @rtype: C{str} """ return self._convert_to_string(self.value) def __repr__(self): """ Return a string representation of the EXIF tag for debugging purposes. @rtype: C{str} """ left = '%s [%s]' % (self.key, self.type) if self.type == 'Undefined' and len(self._value) > 100: right = '(Binary value suppressed)' else: right = self.fvalue return '<%s = %s>' % (left, right) class IptcValueError(ValueError): """ Exception raised when failing to parse the value of an IPTC tag. @ivar value: the value that fails to be parsed @type value: C{str} @ivar type: the IPTC type of the tag @type type: C{str} """ def __init__(self, value, type): self.value = value self.type = type def __str__(self): return 'Invalid value for IPTC type [%s]: [%s]' % \ (self.type, self.value) class IptcTag(MetadataTag): """ An IPTC metadata tag. This tag can have several values (tags that have the repeatable property). """ # strptime is not flexible enough to handle all valid Time formats, we use a # custom regular expression _time_zone_re = r'(?P\+|-)(?P\d{2}):(?P\d{2})' _time_re = re.compile(r'(?P\d{2}):(?P\d{2}):(?P\d{2})(?P%s)' % _time_zone_re) def __init__(self, key, name, label, description, type, values): super(IptcTag, self).__init__(key, name, label, description, type, values) self._init_values() def _init_values(self): # Initial conversion of the raw values to their corresponding python # types. values = map(self._convert_to_python, self.raw_value) # Make values a notifying list self._values = NotifyingList(values) self._values.register_listener(self) def _get_values(self): return self._values def _set_values(self, new_values): if self.metadata is not None: raw_values = map(self._convert_to_string, new_values) self.metadata._set_iptc_tag_values(self.key, raw_values) # Make values a notifying list if needed if isinstance(new_values, NotifyingList): self._values = new_values else: self._values = NotifyingList(new_values) def _del_values(self): if self.metadata is not None: self.metadata._delete_iptc_tag(self.key) del self._values """the list of values of the tag converted to their corresponding python type""" values = property(fget=_get_values, fset=_set_values, fdel=_del_values, doc=None) def contents_changed(self): """ Implementation of the L{ListenerInterface}. React on changes to the list of values of the tag. """ # The contents of self._values was changed. # The following is a quick, non optimal solution. self._set_values(self._values) def _convert_to_python(self, value): """ Convert one raw value to its corresponding python type. @param value: the raw value to be converted @type value: C{str} @return: the value converted to its corresponding python type @rtype: depends on C{self.type} (DOCME) @raise IptcValueError: if the conversion fails """ if self.type == 'Short': try: return int(value) except ValueError: raise IptcValueError(value, self.type) elif self.type == 'String': try: return unicode(value, 'utf-8') except TypeError: raise IptcValueError(value, self.type) elif self.type == 'Date': # According to the IPTC specification, the format for a string field # representing a date is '%Y%m%d'. However, the string returned by # exiv2 using method DateValue::toString() is formatted using # pattern '%Y-%m-%d'. format = '%Y-%m-%d' try: t = time.strptime(value, format) return datetime.date(*t[:3]) except ValueError: raise IptcValueError(value, self.type) elif self.type == 'Time': # According to the IPTC specification, the format for a string field # representing a time is '%H%M%S±%H%M'. However, the string returned # by exiv2 using method TimeValue::toString() is formatted using # pattern '%H:%M:%S±%H:%M'. match = IptcTag._time_re.match(value) if match is None: raise IptcValueError(value, self.type) gd = match.groupdict() try: tzinfo = FixedOffset(gd['sign'], int(gd['ohours']), int(gd['ominutes'])) except TypeError: raise IptcValueError(value, self.type) try: return datetime.time(int(gd['hours']), int(gd['minutes']), int(gd['seconds']), tzinfo=tzinfo) except (TypeError, ValueError): raise IptcValueError(value, self.type) elif self.type == 'Undefined': # Binary data, return it unmodified return value raise IptcValueError(value, self.type) def _convert_to_string(self, value): """ Convert one value to its corresponding string representation, suitable to pass to libexiv2. @param value: the value to be converted @type value: depends on C{self.type} (DOCME) @return: the value converted to its corresponding string representation @rtype: C{str} @raise IptcValueError: if the conversion fails """ if self.type == 'Short': if type(value) is int: return str(value) else: raise IptcValueError(value, self.type) elif self.type == 'String': if type(value) is unicode: try: return value.encode('utf-8') except UnicodeEncodeError: raise IptcValueError(value, self.type) elif type(value) is str: return value else: raise IptcValueError(value, self.type) elif self.type == 'Date': if type(value) in (datetime.date, datetime.datetime): # ISO 8601 date format return value.strftime('%Y%m%d') else: raise IptcValueError(value, self.type) elif self.type == 'Time': if type(value) in (datetime.time, datetime.datetime): r = value.strftime('%H%M%S') if value.tzinfo is not None: r += value.strftime('%z') else: r += '+0000' return r else: raise IptcValueError(value, self.type) elif self.type == 'Undefined': if type(value) is str: return value else: raise IptcValueError(value, self.type) raise IptcValueError(value, self.type) def to_string(self): """ Return a list of string representations of the IPTC tag values suitable to pass to libexiv2 to set the value of the tag. @rtype: C{list} of C{str} """ return map(self._convert_to_string, self.values) def __str__(self): """ Return a string representation of the IPTC tag for debugging purposes. @rtype: C{str} """ r = 'Key = ' + self.key + os.linesep + \ 'Name = ' + self.name + os.linesep + \ 'Label = ' + self.label + os.linesep + \ 'Description = ' + self.description + os.linesep + \ 'Type = ' + self.type + os.linesep + \ 'Values = ' + str(self.values) return r class XmpValueError(ValueError): """ Exception raised when failing to parse the value of an XMP tag. @ivar value: the value that fails to be parsed @type value: C{str} @ivar type: the XMP type of the tag @type type: C{str} """ def __init__(self, value, type): self.value = value self.type = type def __str__(self): return 'Invalid value for XMP type [%s]: [%s]' % \ (self.type, self.value) class XmpTag(MetadataTag): """ An XMP metadata tag. """ # strptime is not flexible enough to handle all valid Date formats, we use a # custom regular expression _time_zone_re = r'Z|((?P\+|-)(?P\d{2}):(?P\d{2}))' _time_re = r'(?P\d{2})(:(?P\d{2})(:(?P\d{2})(.(?P\d+))?)?(?P%s))?' % _time_zone_re _date_re = re.compile(r'(?P\d{4})(-(?P\d{2})(-(?P\d{2})(T(?P