#!/usr/bin/python # -*- coding: utf-8 -*- # ****************************************************************************** # # Copyright (C) 2006-2009 Olivier Tilloy # # This file is part of the pyexiv2 distribution. # # pyexiv2 is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # pyexiv2 is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with pyexiv2; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, 5th Floor, Boston, MA 02110-1301 USA. # # # File: pyexiv2.py # Author(s): Olivier Tilloy # # ****************************************************************************** """ Manipulation of EXIF, IPTC and XMP metadata embedded in image files. This module provides a single class, Image, and utility functions to manipulate EXIF, IPTC and XMP metadata embedded in image files such as JPEG and TIFF files. EXIF, IPTC and XMP metadata can be accessed in both read and write modes. This module is a higher-level interface to the Python binding of the excellent C++ library Exiv2, libpyexiv2. Its only class, Image, inherits from libpyexiv2.Image and provides convenient methods for the manipulation of EXIF and IPTC metadata using Python's built-in types and modules such as datetime. These methods should be preferred to the ones directly provided by libpyexiv2.Image. A typical use of this binding would be: >>> import pyexiv2 >>> import datetime >>> image = pyexiv2.Image('test/smiley.jpg') >>> image.readMetadata() >>> print image.exifKeys() ['Exif.Image.ImageDescription', 'Exif.Image.XResolution', 'Exif.Image.YResolution', 'Exif.Image.ResolutionUnit', 'Exif.Image.Software', 'Exif.Image.DateTime', 'Exif.Image.Artist', 'Exif.Image.Copyright', 'Exif.Image.ExifTag', 'Exif.Photo.Flash', 'Exif.Photo.PixelXDimension', 'Exif.Photo.PixelYDimension'] >>> print image['Exif.Image.DateTime'] 2004-07-13 21:23:44 >>> image['Exif.Image.DateTime'] = datetime.datetime.today() >>> image.writeMetadata() """ import libexiv2python import os import time import datetime import re __version__ = (0, 2, 1) __exiv2_version__ = libexiv2python.__exiv2_version__ class FixedOffset(datetime.tzinfo): """ Fixed offset from a local time east from UTC. Represent a fixed (positive or negative) offset from a local time in hours and minutes. Public methods: utcoffset -- return offset of local time from UTC, in minutes east of UTC dst -- return the daylight saving time (DST) adjustment, here always 0 tzname -- return a string representation of the offset with format '±%H:%M' """ def __init__(self, sign='+', hours=0, minutes=0): """ Constructor. Construct a FixedOffset object from an offset sign ('+' or '-') and an offset absolute value expressed in hours and minutes. No check on the validity of those values is performed, it is the responsibility of the caller to pass correct values to the constructor. Keyword arguments: sign -- the sign of the offset ('+' or '-') hours -- the absolute number of hours of the offset minutes -- the absolute number of minutes of the offset """ self.sign = sign self.hours = hours self.minutes = minutes def utcoffset(self, dt): """ Return offset of local time from UTC, in minutes east of UTC. Return offset of local time from UTC, in minutes east of UTC. If local time is west of UTC, this should be negative. The value returned is a datetime.timedelta object specifying a whole number of minutes in the range -1439 to 1439 inclusive. Keyword arguments: dt -- the datetime.time object representing the local time """ total = self.hours * 60 + self.minutes if self.sign == '-': total = -total return datetime.timedelta(minutes = total) def dst(self, dt): """ Return the daylight saving time (DST) adjustment. Return the daylight saving time (DST) adjustment. In this implementation, it is always nil, and the method return datetime.timedelta(0). Keyword arguments: dt -- the datetime.time object representing the local time """ return datetime.timedelta(0) def tzname(self, dt): """ Return a string representation of the offset. Return a string representation of the offset with format '±%H:%M'. Keyword arguments: dt -- the datetime.time object representing the local time """ if self.hours == 0 and self.minutes == 0: return 'Z' else: return '%s%02d:%02d' % (self.sign, self.hours, self.minutes) def __equal__(self, other): return (self.sign == other.sign) and (self.hours == other.hours) and \ (self.minutes == other.minutes) def UndefinedToString(undefined): """ Convert an undefined string into its corresponding sequence of bytes. Convert a string containing the ascii codes of a sequence of bytes, each followed by a blank space, into the corresponding string (e.g. "48 50 50 49 " will be converted into "0221"). The Undefined type is defined in the EXIF specification. Keyword arguments: undefined -- the string containing the ascii codes of a sequence of bytes """ return ''.join(map(lambda x: chr(int(x)), undefined.rstrip().split(' '))) def StringToUndefined(sequence): """ Convert a string containing a sequence of bytes into its undefined form. Convert a string containing a sequence of bytes into the corresponding sequence of ascii codes, each followed by a blank space (e.g. "0221" will be converted into "48 50 50 49 "). The Undefined type is defined in the EXIF specification. Keyword arguments: sequence -- the string containing the sequence of bytes """ return ''.join(map(lambda x: '%d ' % ord(x), sequence)) def StringToDateTime(string): """ Try to convert a string containing a date and time to a datetime object. Try to convert a string containing a date and time to the corresponding datetime object. The conversion is done by trying several patterns for regular expression matching. If no pattern matches, the string is returned unchanged. Keyword arguments: string -- the string potentially containing a date and time """ # Possible formats to try # According to the EXIF specification [http://www.exif.org/Exif2-2.PDF], the # only accepted format for a string field representing a datetime is # '%Y-%m-%d %H:%M:%S', but it seems that others formats can be found in the # wild, so this list could be extended to include new exotic formats. # TODO: move the declaration of this list at module level formats = ['%Y-%m-%d %H:%M:%S', '%Y:%m:%d %H:%M:%S', '%Y-%m-%dT%H:%M:%SZ'] for format in formats: try: t = time.strptime(string, format) return datetime.datetime(*t[:6]) except ValueError: # the tested format does not match, do nothing pass # none of the tested formats matched, return the original string unchanged return string def StringToDate(string): """ Try to convert a string containing a date to a date object. Try to convert a string containing a date to the corresponding date object. The conversion is done by matching a regular expression. If the pattern does not match, the string is returned unchanged. Keyword arguments: string -- the string potentially containing a date """ # According to the IPTC specification # [http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf], the format # for a string field representing a date is '%Y%m%d'. # However, the string returned by exiv2 using method DateValue::toString() # is formatted using pattern '%Y-%m-%d'. format = '%Y-%m-%d' try: t = time.strptime(string, format) return datetime.date(*t[:3]) except ValueError: # the tested format does not match, do nothing return string def StringToTime(string): """ Try to convert a string containing a time to a time object. Try to convert a string containing a time to the corresponding time object. The conversion is done by matching a regular expression. If the pattern does not match, the string is returned unchanged. Keyword arguments: string -- the string potentially containing a time """ # According to the IPTC specification # [http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf], the format # for a string field representing a time is '%H%M%S±%H%M'. # However, the string returned by exiv2 using method TimeValue::toString() # is formatted using pattern '%H:%M:%S±%H:%M'. if len(string) != 14: # the string is not correctly formatted, do nothing return string if (string[2] != ':') or (string[5] != ':') or (string[11] != ':'): # the string is not correctly formatted, do nothing return string offsetSign = string[8] if (offsetSign != '+') and (offsetSign != '-'): # the string is not correctly formatted, do nothing return string try: hours = int(string[:2]) minutes = int(string[3:5]) seconds = int(string[6:8]) offsetHours = int(string[9:11]) offsetMinutes = int(string[12:]) except ValueError: # the string is not correctly formatted, do nothing return string try: offset = FixedOffset(offsetSign, offsetHours, offsetMinutes) localTime = datetime.time(hours, minutes, seconds, tzinfo=offset) except ValueError: # the values are out of range, do nothing return string return localTime class Rational(object): """ A class representing a rational number. """ def __init__(self, numerator, denominator): """ Constructor. @param numerator: the numerator @type numerator: C{long} @param denominator: the denominator @type denominator: C{long} @raise C{ZeroDivisionError}: if the denominator equals zero """ if denominator == 0: msg = 'Denominator of a rational number cannot be zero.' raise ZeroDivisionError(msg) self.numerator = long(numerator) self.denominator = long(denominator) def __eq__(self, other): """ Compare two rational numbers for equality. Two rational numbers are equal if their reduced forms are equal. @param other: the rational number to compare to self for equality @type other: L{Rational} @return: C{True} if equal, C{False} otherwise @rtype: C{bool} """ return (self.numerator * other.denominator) == \ (other.numerator * self.denominator) def __str__(self): """ Return a string representation of the rational number. """ return '%d/%d' % (self.numerator, self.denominator) def StringToRational(string): """ Try to convert a string containing a rational number to a Rational object. Try to convert a string containing a rational number to the corresponding Rational object. The conversion is done by matching a regular expression. If the pattern does not match, the Rational object with numerator=0 and denominator=1 is returned. Keyword arguments: string -- the string potentially containing a rational number """ pattern = re.compile("(-?[0-9]+)/(-?[1-9][0-9]*)") match = pattern.match(string) if match == None: return Rational(0, 1) else: return Rational(*map(long, match.groups())) class MetadataTag(object): """ A generic metadata tag. TODO: determine which attributes are common to all types of tags (EXIF, IPTC and XMP), and which are specific. """ def __init__(self, key, name, label, description, xtype, value): """ Constructor. """ self.key = key self.name = name self.label = label self.description = description self.xtype = xtype self._value = value def __str__(self): """ Return a string representation of the metadata tag. """ r = 'Key = ' + self.key + os.linesep + \ 'Name = ' + self.name + os.linesep + \ 'Label = ' + self.label + os.linesep + \ 'Description = ' + self.description + os.linesep + \ 'Type = ' + self.xtype + os.linesep + \ 'Raw value = ' + str(self._value) return r class ExifValueError(ValueError): def __init__(self, value, xtype): self.value = value self.xtype = xtype def __str__(self): return 'Invalid value for EXIF type [%s]: [%s]' % \ (self.xtype, self.value) class ExifTag(MetadataTag): """ An EXIF metadata tag has an additional field that contains the value of the tag formatted as a human readable string. """ # According to the EXIF specification, the only accepted format for an Ascii # value representing a datetime is '%Y-%m-%d %H:%M:%S', but it seems that # others formats can be found in the wild. _datetime_formats = ('%Y-%m-%d %H:%M:%S', '%Y:%m:%d %H:%M:%S', '%Y-%m-%dT%H:%M:%SZ') def __init__(self, key, name, label, description, xtype, value, fvalue): """ Constructor. """ super(ExifTag, self).__init__(key, name, label, description, xtype, value) self.fvalue = fvalue self.value = ExifTag._convert_to_python(value, xtype, fvalue) """ def __convert_value_to_python_type(self): if self.xtype == 'Byte': pass elif self.xtype == 'Ascii': # try to guess if the value is a datetime self.value = StringToDateTime(self._value) elif self.xtype == 'Short': self.value = int(self._value) elif self.xtype == 'Long' or self.type == 'SLong': self.value = long(self._value) elif self.xtype == 'Rational' or self.type == 'SRational': self.value = StringToRational(self._value) elif self.xtype == 'Undefined': # self.value is a sequence of bytes whose codes are written as a # string, each code being followed by a blank space (e.g. # "48 50 50 49 " for "0221" in the "Exif.Photo.ExifVersion" tag). try: self.value = UndefinedToString(self._value) except ValueError: # Some tags such as "Exif.Photo.UserComment" are marked as # Undefined but do not store their value as expected. # This should fix bug #173387. pass """ @staticmethod def _convert_to_python(value, xtype, fvalue): """ Convert a value to its corresponding python type. @param value: the value to be converted, as a string @type value: C{str} @param xtype: the EXIF type of the value @type xtype: C{str} @param fvalue: the value formatted as a human-readable string by exiv2 @type fvalue: C{str} @return: the value converted to its corresponding python type @rtype: depends on xtype (DOCME) @raise L{ExifValueError}: if the conversion fails """ if xtype == 'Ascii': # The value may contain a Datetime for format in ExifTag._datetime_formats: try: t = time.strptime(value, format) except ValueError: continue else: return datetime.datetime(*t[:6]) # Default to string try: return unicode(value, 'utf-8') except TypeError: raise ExifValueError(value, xtype) elif xtype == 'Short': try: return int(value) except ValueError: raise ExifValueError(value, xtype) elif xtype in ('Long', 'SLong'): try: return long(value) except ValueError: raise ExifValueError(value, xtype) elif xtype == 'Undefined': try: return unicode(fvalue, 'utf-8') except TypeError: raise ExifValueError(fvalue, xtype) # TODO: other types raise ExifValueError(value, xtype) @staticmethod def _convert_to_string(value, xtype): """ Convert a value to its corresponding string representation. @param value: the value to be converted @type value: depends on xtype (DOCME) @param xtype: the EXIF type of the value @type xtype: C{str} @return: the value converted to its corresponding string representation @rtype: C{str} @raise L{ExifValueError}: if the conversion fails """ if xtype == 'Ascii': if type(value) is datetime.datetime: return value.strftime('%Y-%m-%d %H:%M:%S') elif type(value) is datetime.date: return value.strftime('%Y-%m-%d 00:00:00') elif type(value) is unicode: try: return value.encode('utf-8') except UnicodeEncodeError: raise ExifValueError(value, xtype) elif type(value) is str: return value else: raise ExifValueError(value, xtype) elif xtype == 'Short': if type(value) is int and value >= 0: return str(value) else: raise ExifValueError(value, xtype) elif xtype == 'Long': if type(value) in (int, long) and value >= 0: return str(value) else: raise ExifValueError(value, xtype) elif xtype == 'SLong': if type(value) in (int, long): return str(value) else: raise ExifValueError(value, xtype) # TODO: other types raise ExifValueError(value, xtype) def __str__(self): """ Return a string representation of the EXIF tag. """ r = 'Key = ' + self.key + os.linesep + \ 'Name = ' + self.name + os.linesep + \ 'Label = ' + self.label + os.linesep + \ 'Description = ' + self.description + os.linesep + \ 'Type = ' + self.xtype + os.linesep + \ 'Value = ' + str(self.value) + os.linesep + \ 'Formatted value = ' + self.fvalue return r class IptcValueError(ValueError): def __init__(self, value, xtype): self.value = value self.xtype = xtype def __str__(self): return 'Invalid value for IPTC type [%s]: [%s]' % \ (self.xtype, self.value) class IptcTag(MetadataTag): """ An IPTC metadata tag can have several values (tags that have the repeatable property). """ # strptime is not flexible enough to handle all valid Time formats, we use a # custom regular expression _time_zone_re = r'(?P\+|-)(?P\d{2}):(?P\d{2})' _time_re = re.compile(r'(?P\d{2}):(?P\d{2}):(?P\d{2})(?P%s)' % _time_zone_re) def __init__(self, key, name, label, description, xtype, values): """ Constructor. """ super(IptcTag, self).__init__(key, name, label, description, xtype, values) self.values = map(lambda x: IptcTag._convert_to_python(x, xtype), values) @staticmethod def _convert_to_python(value, xtype): """ Convert a value to its corresponding python type. @param value: the value to be converted, as a string @type value: C{str} @param xtype: the IPTC type of the value @type xtype: C{str} @return: the value converted to its corresponding python type @rtype: depends on xtype (DOCME) @raise L{IptcValueError}: if the conversion fails """ if xtype == 'Short': try: return int(value) except ValueError: raise IptcValueError(value, xtype) elif xtype == 'String': try: return unicode(value, 'utf-8') except TypeError: raise IptcValueError(value, xtype) elif xtype == 'Date': # According to the IPTC specification, the format for a string field # representing a date is '%Y%m%d'. However, the string returned by # exiv2 using method DateValue::toString() is formatted using # pattern '%Y-%m-%d'. format = '%Y-%m-%d' try: t = time.strptime(value, format) return datetime.date(*t[:3]) except ValueError: raise IptcValueError(value, xtype) elif xtype == 'Time': # According to the IPTC specification, the format for a string field # representing a time is '%H%M%S±%H%M'. However, the string returned # by exiv2 using method TimeValue::toString() is formatted using # pattern '%H:%M:%S±%H:%M'. match = IptcTag._time_re.match(value) if match is None: raise IptcValueError(value, xtype) gd = match.groupdict() try: tzinfo = FixedOffset(gd['sign'], int(gd['ohours']), int(gd['ominutes'])) except TypeError: raise IptcValueError(value, xtype) try: return datetime.time(int(gd['hours']), int(gd['minutes']), int(gd['seconds']), tzinfo=tzinfo) except (TypeError, ValueError): raise IptcValueError(value, xtype) elif xtype == 'Undefined': # Binary data, return it unmodified return value raise IptcValueError(value, xtype) @staticmethod def _convert_to_string(value, xtype): """ Convert a value to its corresponding string representation. @param value: the value to be converted @type value: depends on xtype (DOCME) @param xtype: the IPTC type of the value @type xtype: C{str} @return: the value converted to its corresponding string representation @rtype: C{str} @raise L{IptcValueError}: if the conversion fails """ if xtype == 'Short': if type(value) is int: return str(value) else: raise IptcValueError(value, xtype) elif xtype == 'String': if type(value) is unicode: try: return value.encode('utf-8') except UnicodeEncodeError: raise IptcValueError(value, xtype) elif type(value) is str: return value else: raise IptcValueError(value, xtype) elif xtype == 'Date': if type(value) in (datetime.date, datetime.datetime): # ISO 8601 date format return value.strftime('%Y%m%d') else: raise IptcValueError(value, xtype) elif xtype == 'Time': if type(value) in (datetime.time, datetime.datetime): r = value.strftime('%H%M%S') if value.tzinfo is not None: r += value.strftime('%z') else: r += '+0000' return r else: raise IptcValueError(value, xtype) elif xtype == 'Undefined': if type(value) is str: return value else: raise IptcValueError(value, xtype) raise IptcValueError(value, xtype) def __str__(self): """ Return a string representation of the IPTC tag. """ r = 'Key = ' + self.key + os.linesep + \ 'Name = ' + self.name + os.linesep + \ 'Label = ' + self.label + os.linesep + \ 'Description = ' + self.description + os.linesep + \ 'Type = ' + self.xtype + os.linesep + \ 'Values = ' + str(self.values) return r class XmpValueError(ValueError): def __init__(self, value, xtype): self.value = value self.xtype = xtype def __str__(self): return 'Invalid value for XMP type [%s]: [%s]' % \ (self.xtype, self.value) class XmpTag(MetadataTag): """ An XMP metadata tag can have several values. """ # strptime is not flexible enough to handle all valid Date formats, we use a # custom regular expression _time_zone_re = r'Z|((?P\+|-)(?P\d{2}):(?P\d{2}))' _time_re = r'(?P\d{2})(:(?P\d{2})(:(?P\d{2})(.(?P\d+))?)?(?P%s))?' % _time_zone_re _date_re = re.compile(r'(?P\d{4})(-(?P\d{2})(-(?P\d{2})(T(?P