aboutsummaryrefslogtreecommitdiffstats
path: root/src/pyexiv2.py
blob: 9254d03a2a77998bccebba85a15fdef6e090bdbd (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
#!/usr/bin/python
# -*- coding: utf-8 -*-

# ******************************************************************************
#
# Copyright (C) 2006-2007 Olivier Tilloy <olivier@tilloy.net>
#
# This file is part of the pyexiv2 distribution.
#
# pyexiv2 is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# pyexiv2 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with pyexiv2; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, 5th Floor, Boston, MA 02110-1301 USA.
#
#
# File:      pyexiv2.py
# Author(s): Olivier Tilloy <olivier@tilloy.net>
#
# ******************************************************************************

"""
Manipulation of EXIF and IPTC metadata embedded in image files.

This module provides a single class, Image, and utility functions to manipulate
EXIF and IPTC metadata embedded in image files such as JPEG and TIFF files.
EXIF and IPTC metadata can be accessed in both read and write modes.

This module is a higher-level interface to the Python binding of the excellent
C++ library Exiv2, libpyexiv2.
Its only class, Image, inherits from libpyexiv2.Image and provides convenient
methods for the manipulation of EXIF and IPTC metadata using Python's built-in
types and modules such as datetime.
These methods should be preferred to the ones directly provided by
libpyexiv2.Image.

A typical use of this binding would be:

>>> import pyexiv2
>>> import datetime
>>> image = pyexiv2.Image('test/smiley.jpg')
>>> image.readMetadata()
>>> print image.exifKeys()
['Exif.Image.ImageDescription', 'Exif.Image.XResolution', 'Exif.Image.YResolution', 'Exif.Image.ResolutionUnit', 'Exif.Image.Software', 'Exif.Image.DateTime', 'Exif.Image.Artist', 'Exif.Image.Copyright', 'Exif.Image.ExifTag', 'Exif.Photo.Flash', 'Exif.Photo.PixelXDimension', 'Exif.Photo.PixelYDimension']
>>> print image['Exif.Image.DateTime']
2004-07-13 21:23:44
>>> image['Exif.Image.DateTime'] = datetime.datetime.today()
>>> image.writeMetadata()

"""

import libpyexiv2

import time
import datetime
import re

class FixedOffset(datetime.tzinfo):

	"""
	Fixed offset from a local time east from UTC.

	Represent a fixed (positive or negative) offset from a local time in hours
	and minutes.

	Public methods:
	utcoffset -- return offset of local time from UTC, in minutes east of UTC
	dst -- return the daylight saving time (DST) adjustment, here always 0
	tzname -- return a string representation of the offset with format '±%H%M'
	"""

	def __init__(self, offsetSign='+', offsetHours=0, offsetMinutes=0):
		"""
		Constructor.

		Construct a FixedOffset object from an offset sign ('+' or '-') and an
		offset absolute value expressed in hours and minutes.
		No check on the validity of those values is performed, it is the
		responsibility of the caller to pass correct values to the constructor.

		Keyword arguments:
		offsetSign -- the sign of the offset ('+' or '-')
		offsetHours -- the absolute number of hours of the offset
		offsetMinutes -- the absolute number of minutes of the offset
		"""
		self.offsetSign = offsetSign
		self.offsetHours = offsetHours
		self.offsetMinutes = offsetMinutes

	def utcoffset(self, dt):
		"""
		Return offset of local time from UTC, in minutes east of UTC.

		Return offset of local time from UTC, in minutes east of UTC.
		If local time is west of UTC, this should be negative.
		The value returned is a datetime.timedelta object specifying a whole
		number of minutes in the range -1439 to 1439 inclusive.

		Keyword arguments:
		dt -- the datetime.time object representing the local time
		"""
		totalOffsetMinutes = self.offsetHours * 60 + self.offsetMinutes
		if self.offsetSign == '-':
			totalOffsetMinutes = -totalOffsetMinutes
		return datetime.timedelta(minutes = totalOffsetMinutes)

	def dst(self, dt):
		"""
		Return the daylight saving time (DST) adjustment.

		Return the daylight saving time (DST) adjustment.
		In this implementation, it is always nil, and the method return
		datetime.timedelta(0).

		Keyword arguments:
		dt -- the datetime.time object representing the local time
		"""
		return datetime.timedelta(0)

	def tzname(self, dt):
		"""
		Return a string representation of the offset.

		Return a string representation of the offset with format '±%H:%M'.

		Keyword arguments:
		dt -- the datetime.time object representing the local time
		"""
		string = self.offsetSign
		string = string + ('%02d' % self.offsetHours) + ':'
		string = string + ('%02d' % self.offsetMinutes)
		return string

def UndefinedToString(undefined):
	"""
	Convert an undefined string into its corresponding sequence of bytes.

	Convert a string containing the ascii codes of a sequence of bytes, each
	followed by a blank space, into the corresponding string (e.g.
	"48 50 50 49 " will be converted into "0221").
	The Undefined type is defined in the EXIF specification.

	Keyword arguments:
	undefined -- the string containing the ascii codes of a sequence of bytes
	"""
	return ''.join(map(lambda x: chr(int(x)), undefined.rstrip().split(' ')))

def StringToUndefined(sequence):
	"""
	Convert a string containing a sequence of bytes into its undefined form.

	Convert a string containing a sequence of bytes into the corresponding
	sequence of ascii codes, each followed by a blank space (e.g. "0221" will
	be converted into "48 50 50 49 ").
	The Undefined type is defined in the EXIF specification.

	Keyword arguments:
	sequence -- the string containing the sequence of bytes
	"""
	return ''.join(map(lambda x: '%d ' % ord(x), sequence))

def StringToDateTime(string):
	"""
	Try to convert a string containing a date and time to a datetime object.

	Try to convert a string containing a date and time to the corresponding
	datetime object. The conversion is done by trying several patterns for
	regular expression matching.
	If no pattern matches, the string is returned unchanged.

	Keyword arguments:
	string -- the string potentially containing a date and time
	"""
	# Possible formats to try
	# According to the EXIF specification [http://www.exif.org/Exif2-2.PDF], the
	# only accepted format for a string field representing a datetime is
	# '%Y-%m-%d %H:%M:%S', but it seems that others formats can be found in the
	# wild, so this list could be extended to include new exotic formats.
	formats = ['%Y-%m-%d %H:%M:%S', '%Y:%m:%d %H:%M:%S', '%Y-%m-%dT%H:%M:%SZ']

	for format in formats:
		try:
			t = time.strptime(string, format)
			return datetime.datetime(*t[:6])
		except ValueError:
			# the tested format does not match, do nothing
			pass

	# none of the tested formats matched, return the original string unchanged
	return string

def StringToDate(string):
	"""
	Try to convert a string containing a date to a date object.

	Try to convert a string containing a date to the corresponding date object.
	The conversion is done by matching a regular expression.
	If the pattern does not match, the string is returned unchanged.

	Keyword arguments:
	string -- the string potentially containing a date
	"""
	# According to the IPTC specification
	# [http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf], the format
	# for a string field representing a date is '%Y%m%d'.
	# However, the string returned by exiv2 using method DateValue::toString()
	# is formatted using pattern '%Y-%m-%d'.
	format = '%Y-%m-%d'
	try:
		t = time.strptime(string, format)
		return datetime.date(*t[:3])
	except ValueError:
		# the tested format does not match, do nothing
		return string

def StringToTime(string):
	"""
	Try to convert a string containing a time to a time object.

	Try to convert a string containing a time to the corresponding time object.
	The conversion is done by matching a regular expression.
	If the pattern does not match, the string is returned unchanged.

	Keyword arguments:
	string -- the string potentially containing a time
	"""
	# According to the IPTC specification
	# [http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf], the format
	# for a string field representing a time is '%H%M%S±%H%M'.
	# However, the string returned by exiv2 using method TimeValue::toString()
	# is formatted using pattern '%H:%M:%S±%H:%M'.

	if len(string) != 14:
		# the string is not correctly formatted, do nothing
		return string

	if (string[2] != ':') or (string[5] != ':') or (string[11] != ':'):
		# the string is not correctly formatted, do nothing
		return string

	offsetSign = string[8]
	if (offsetSign != '+') and (offsetSign != '-'):
		# the string is not correctly formatted, do nothing
		return string

	try:
		hours = int(string[:2])
		minutes = int(string[3:5])
		seconds = int(string[6:8])
		offsetHours = int(string[9:11])
		offsetMinutes = int(string[12:])
	except ValueError:
		# the string is not correctly formatted, do nothing
		return string

	try:
		offset = FixedOffset(offsetSign, offsetHours, offsetMinutes)
		localTime = datetime.time(hours, minutes, seconds, tzinfo=offset)
	except ValueError:
		# the values are out of range, do nothing
		return string

	return localTime

class Image(libpyexiv2.Image):

	"""
	Provide convenient methods for the manipulation of EXIF and IPTC metadata.

	Provide convenient methods for the manipulation of EXIF and IPTC metadata
	embedded in image files such as JPEG and TIFF files, using Python's built-in
	types and modules such as datetime.
	"""

	def __init__(self, filename):
		libpyexiv2.Image.__init__(self, filename)
		self.__exifTagsDict = {}
		self.__iptcTagsDict = {}

	def __getExifTagValue(self, key):
		"""
		Get the value associated to a key in EXIF metadata.

		Get the value associated to a key in EXIF metadata.
		Whenever possible, the value is typed using Python's built-in types or
		modules such as datetime when the value is composed of a date and a time
		(e.g. the EXIF tag 'Exif.Photo.DateTimeOriginal').

		Keyword arguments:
		key -- the EXIF key of the requested metadata tag
		"""
		tagType, tagValue = self.__getExifTag(key)
		if tagType == 'Byte':
			return tagValue
		elif tagType == 'Ascii':
			# try to guess if the value is a datetime
			return StringToDateTime(tagValue)
		elif tagType == 'Short':
			return int(tagValue)
		elif tagType == 'Long' or tagType == 'SLong':
			return long(tagValue)
		# for Rational and SRational types, we use tuples
		# TODO: define a rational type?
		elif tagType == 'Rational':
			pattern = re.compile("([0-9]+)/([1-9][0-9]*)")
			match = pattern.match(tagValue)
			if match == None:
				return long(0), long(1)
			else:
				v = map(long, match.groups())
				return v[0], v[1]
		elif tagType == 'SRational':
			pattern = re.compile("(-?[0-9]+)/(-?[1-9][0-9]*)")
			match = pattern.match(tagValue)
			if match == None:
				return long(0), long(1)
			else:
				v = map(long, match.groups())
				return v[0], v[1]
		elif tagType == 'Undefined':
			# tagValue is a sequence of bytes whose codes are written as a
			# string, each code being followed by a blank space (e.g.
			# "48 50 50 49 " for "0221").
			# Note: in the case of tag "Exif.Photo.UserComment", it is better to
			# call method __getExifTagToString() to obtain directly the value as
			# a human-readable string.
			return UndefinedToString(tagValue)

	def __setExifTagValue(self, key, value):
		"""
		Set the value associated to a key in EXIF metadata.

		Set the value associated to a key in EXIF metadata.
		The new value passed should be typed using Python's built-in types or
		modules such as datetime when the value is composed of a date and a time
		(e.g. the EXIF tag 'Exif.Photo.DateTimeOriginal'), the method takes care
		of converting it before setting the internal EXIF tag value.

		Keyword arguments:
		key -- the EXIF key of the requested metadata tag
		value -- the new value for the requested metadata tag
		"""
		valueType = value.__class__
		if valueType == int or valueType == long:
			strVal = str(value)
		elif valueType == datetime.datetime:
			strVal = value.strftime('%Y:%m:%d %H:%M:%S')
		elif valueType == tuple:
			strVal = '%s/%s' % (str(value[0]), str(value[1]))
		else:
			# Value must already be a string.
			# Warning: no distinction is possible between values that really are
			# strings (type 'Ascii') and those that are supposed to be sequences
			# of bytes (type 'Undefined'), in which case value must be passed as
			# a string correctly formatted, using utility function
			# StringToUndefined().
			strVal = str(value)
		self.__setExifTag(key, strVal)

	def __convertIptcTagValueToPythonType(self, (tagType, tagValue)):
		"""
		Types a tag value using Python's built-in types or modules.

		Whenever possible, the value is typed using Python's built-in types or
		modules such as date when the value represents a date (e.g. the IPTC tag
		'Iptc.Application2.DateCreated').

		Keyword arguments:
		(tagType, tagValue) -- the type and the value of the IPTC tag as strings
		"""
		if tagType == 'Short':
			return int(tagValue)
		elif tagType == 'String':
			return tagValue
		elif tagType == 'Date':
			return StringToDate(tagValue)
		elif tagType == 'Time':
			return StringToTime(tagValue)
		elif tagType == 'Undefined':
			return tagValue

	def __getIptcTagValue(self, key):
		"""
		Get the value(s) associated to a key in IPTC metadata.

		Get the value associated to a key in IPTC metadata.
		Whenever possible, the value is typed using Python's built-in types or
		modules such as date when the value represents a date (e.g. the IPTC tag
		'Iptc.Application2.DateCreated').
		If key represents a repeatable tag, a list of several values is
		returned. If not, or if it has only one repetition, the list simply has
		one element.

		Keyword arguments:
		key -- the IPTC key of the requested metadata tag
		"""
		typeValuesList = self.__getIptcTag(key)
		return map(self.__convertIptcTagValueToPythonType, typeValuesList)

	def __setIptcTagValue(self, key, value, index=0):
		"""
		Set the value associated to a key in IPTC metadata.

		Set the value associated to a key in IPTC metadata.
		The new value passed should be typed using Python's built-in types or
		modules such as datetime when the value contains a date or a time
		(e.g. the IPTC tags 'Iptc.Application2.DateCreated' and
		'Iptc.Application2.TimeCreated'), the method takes care
		of converting it before setting the internal IPTC tag value.
		If key references a repeatable tag, the parameter index (starting from
		0 like a list index) is used to determine which of the repetitions is to
		be set. In case of an index greater than the highest existing one, adds
		a repetition of the tag. index defaults to 0 for (the majority of)
		non-repeatable tags.

		Keyword arguments:
		key -- the IPTC key of the requested metadata tag
		value -- the new value for the requested metadata tag
		index -- the index of the tag repetition to set (default value: 0)
		"""
		if (index < 0):
			raise IndexError('Index must be greater than or equal to zero')
		valueType = value.__class__
		if valueType == int or valueType == long:
			strVal = str(value)
		elif valueType == datetime.date:
			strVal = value.strftime('%Y-%m-%d')
		elif valueType == datetime.time:
			# The only legal format for a time is '%H:%M:%S±%H:%M',
			# but if the UTC offset is absent (format '%H:%M:%S'), the time can
			# still be set (exiv2 is permissive).
			strVal = value.strftime('%H:%M:%S%Z')
		else:
			# Value must already be a string.
			# Warning: no distinction is possible between values that really are
			# strings (type 'String') and those that are of type 'Undefined'.
			# FIXME: for tags of type 'Undefined', this does not seem to work...
			strVal = str(value)
		self.__setIptcTag(key, strVal, index)

	def __getitem__(self, key):
		"""
		Read access implementation of the [] operator on Image objects.

		Get the value associated to a key in EXIF/IPTC metadata.
		The value is cached in an internal dictionary for later accesses.

		Whenever possible, the value is typed using Python's built-in types or
		modules such as datetime when the value is composed of a date and a time
		(e.g. the EXIF tag 'Exif.Photo.DateTimeOriginal') or date when the value
		represents a date (e.g. the IPTC tag 'Iptc.Application2.DateCreated').

		If key references a repeatable tag (IPTC only), a list of several values
		is returned. If not, or if it has only one repetition, the list simply
		has one element.

		Keyword arguments:
		key -- the [EXIF|IPTC] key of the requested metadata tag
		"""
		if key.__class__ is not str:
			raise TypeError('Key must be of type string')
		tagFamily = key[:4]
		if tagFamily == 'Exif':
			try:
				return self.__exifTagsDict[key]
			except KeyError:
				value = self.__getExifTagValue(key)
				self.__exifTagsDict[key] = value
				return value
		elif tagFamily == 'Iptc':
			try:
				return self.__iptcTagsDict[key]
			except KeyError:
				value = self.__getIptcTagValue(key)
				if len(value) == 1:
					value = value[0]
				elif len(value) > 1:
					value = tuple(value)
				self.__iptcTagsDict[key] = value
				return value
		else:
			# This is exiv2's standard error message, all futures changes on
			# exiv2's side should be reflected here.
			# As a future development, consider i18n for error messages. 
			raise IndexError("Invalid key `" + key + "'")

	def __setitem__(self, key, value):
		"""
		Write access implementation of the [] operator on Image objects.

		Set the value associated to a key in EXIF/IPTC metadata.
		The value is cached in an internal dictionary for later accesses.

		The new value passed should be typed using Python's built-in types or
		modules such as datetime when the value contains a date and a time
		(e.g. the EXIF tag 'Exif.Photo.DateTimeOriginal' or the IPTC tags
		'Iptc.Application2.DateCreated' and 'Iptc.Application2.TimeCreated'),
		the method takes care of converting it before setting the internal tag
		value.

		If key references a repeatable tag (IPTC only), value can be a list of
		values (the new values will overwrite the old ones, and an empty list of
		values will unset the tag).

		Keyword arguments:
		key -- the [EXIF|IPTC] key of the requested metadata tag
		value -- the new value for the requested metadata tag
		"""
		if key.__class__ is not str:
			raise TypeError('Key must be of type string')
		tagFamily = key[:4]
		if tagFamily == 'Exif':
			if value is not None:
				self.__setExifTagValue(key, value)
				self.__exifTagsDict[key] = value
			else:
				self.__deleteExifTag(key)
				del self.__exifTagsDict[key]
		elif tagFamily == 'Iptc':
			# The case of IPTC tags is a bit trickier since some tags are
			# repeatable. To simplify the process, parameter 'value' is
			# transformed into a tuple if it is not already one and then each of
			# its values is processed (set, that is) in a loop.
			newValues = value
			if newValues is None:
				# Setting the value to None does not really make sense, but can
				# in a way be seen as equivalent to deleting it, so this
				# behaviour is simulated by providing an empty list for 'value'.
				newValues = ()
			if newValues.__class__ is not tuple:
				if newValues.__class__ is list:
					# For flexibility, passing a list instead of a tuple works
					newValues = tuple(newValues)
				else:
					# Interpret the value as a single element
					newValues = (newValues,)
			try:
				oldValues = self.__iptcTagsDict[key]
				if oldValues.__class__ is not tuple:
					oldValues = (oldValues,)
			except KeyError:
				# The tag is not set yet
				oldValues = ()
			# This loop processes the values one by one. There are n cases:
			#   * if the two tuples are of the exact same size, each item in
			#     oldValues is replaced by its new value in newValues;
			#   * if newValues is longer than oldValues, each item in oldValues
			#     is replaced by its new value in newValues and the new items
			#     are appended at the end of oldValues;
			#   * if newValues is shorter than oldValues, each item in newValues
			#     replaces the corresponding one in oldValues and the trailing
			#     extra items in oldValues are deleted.
			for i in xrange(max(len(oldValues), len(newValues))):
				try:
					self.__setIptcTagValue(key, newValues[i], i)
				except IndexError:
					self.__deleteIptcTag(key, min(len(oldValues), len(newValues)))
			if len(newValues) > 0:
				if len(newValues) == 1:
					newValues = newValues[0]
				self.__iptcTagsDict[key] = newValues
			else:
				del self.__iptcTagsDict[key]
		else:
			raise IndexError("Invalid key `" + key + "'")

	def __delitem__(self, key):
		"""
		Implementation of the del operator for deletion on Image objects.

		Delete the value associated to a key in EXIF/IPTC metadata.

		If key references a repeatable tag (IPTC only), all the associated
		values will be deleted.

		Keyword arguments:
		key -- the [EXIF|IPTC] key of the requested metadata tag
		"""
		if key.__class__ is not str:
			raise TypeError('Key must be of type string')
		tagFamily = key[:4]
		if tagFamily == 'Exif':
			self.__deleteExifTag(key)
			del self.__exifTagsDict[key]
		elif tagFamily == 'Iptc':
			try:
				oldValues = self.__iptcTagsDict[key]
			except KeyError:
				oldValues = self.__getIptcTag(key)
			for i in xrange(len(oldValues)):
				self.__deleteIptcTag(key, 0)
			del self.__iptcTagsDict[key]
		else:
			raise IndexError("Invalid key `" + key + "'")

	def cacheAllExifTags(self):
		"""
		Cache the EXIF tag values for faster subsequent access.

		Read the values of all the EXIF tags in the image and cache them in an
		internal dictionary so as to speed up subsequent accesses.
		"""
		for key in self.exifKeys():
			self[key]

	def cacheAllIptcTags(self):
		"""
		Cache the IPTC tag values for faster subsequent access.

		Read the values of all the IPTC tags in the image and cache them in an
		internal dictionary so as to speed up subsequent accesses.
		"""
		for key in self.iptcKeys():
			self[key]

def _test():
	print 'testing library pyexiv2...'
	# TODO: various tests
	print 'done.'

if __name__ == '__main__':
	_test()