From c3cfb0f832b4be5b3427490e55e7f3f63db583f2 Mon Sep 17 00:00:00 2001 From: Olivier Tilloy Date: Sun, 12 Dec 2010 22:12:17 +0100 Subject: Try and decode Exif.Photo.UserComment according to its charset if specified. --- src/exiv2wrapper.cpp | 20 ++++++++++++++++++-- src/exiv2wrapper.hpp | 8 +++++++- src/exiv2wrapper_python.cpp | 1 + src/pyexiv2/exif.py | 29 +++++++++++++++++++++++++---- 4 files changed, 51 insertions(+), 7 deletions(-) diff --git a/src/exiv2wrapper.cpp b/src/exiv2wrapper.cpp index 4004f20..293bb0b 100644 --- a/src/exiv2wrapper.cpp +++ b/src/exiv2wrapper.cpp @@ -230,7 +230,7 @@ const ExifTag Image::getExifTag(std::string key) throw Exiv2::Error(KEY_NOT_FOUND, key); } - return ExifTag(key, &(*_exifData)[key], _exifData); + return ExifTag(key, &(*_exifData)[key], _exifData, _image->byteOrder()); } void Image::deleteExifTag(std::string key) @@ -450,6 +450,12 @@ std::string Image::getDataBuffer() const return buffer; } +Exiv2::ByteOrder Image::getByteOrder() const +{ + CHECK_METADATA_READ + return _image->byteOrder(); +} + Exiv2::ExifThumb* Image::_getExifThumbnail() { CHECK_METADATA_READ @@ -510,7 +516,10 @@ void Image::setExifThumbnailFromData(const std::string& data) } -ExifTag::ExifTag(const std::string& key, Exiv2::Exifdatum* datum, Exiv2::ExifData* data): _key(key) +ExifTag::ExifTag(const std::string& key, + Exiv2::Exifdatum* datum, Exiv2::ExifData* data, + Exiv2::ByteOrder byteOrder): + _key(key), _byteOrder(byteOrder) { if (datum != 0 && data != 0) { @@ -580,6 +589,8 @@ void ExifTag::setParentImage(Image& image) delete _datum; _datum = &(*_data)[_key.key()]; _datum->setValue(value); + + _byteOrder = image.getByteOrder(); } const std::string ExifTag::getKey() @@ -627,6 +638,11 @@ const std::string ExifTag::getHumanValue() return _datum->print(_data); } +int ExifTag::getByteOrder() +{ + return _byteOrder; +} + IptcTag::IptcTag(const std::string& key, Exiv2::IptcData* data): _key(key) { diff --git a/src/exiv2wrapper.hpp b/src/exiv2wrapper.hpp index 5058d56..93b7823 100644 --- a/src/exiv2wrapper.hpp +++ b/src/exiv2wrapper.hpp @@ -42,7 +42,9 @@ class ExifTag { public: // Constructor - ExifTag(const std::string& key, Exiv2::Exifdatum* datum=0, Exiv2::ExifData* data=0); + ExifTag(const std::string& key, + Exiv2::Exifdatum* datum=0, Exiv2::ExifData* data=0, + Exiv2::ByteOrder byteOrder=Exiv2::invalidByteOrder); ~ExifTag(); @@ -58,6 +60,7 @@ public: const std::string getSectionDescription(); const std::string getRawValue(); const std::string getHumanValue(); + int getByteOrder(); private: Exiv2::ExifKey _key; @@ -69,6 +72,7 @@ private: std::string _description; std::string _sectionName; std::string _sectionDescription; + int _byteOrder; }; @@ -250,6 +254,8 @@ public: Exiv2::IptcData* getIptcData() { return _iptcData; }; Exiv2::XmpData* getXmpData() { return _xmpData; }; + Exiv2::ByteOrder getByteOrder() const; + private: std::string _filename; Exiv2::byte* _data; diff --git a/src/exiv2wrapper_python.cpp b/src/exiv2wrapper_python.cpp index 5b793f7..a718afe 100644 --- a/src/exiv2wrapper_python.cpp +++ b/src/exiv2wrapper_python.cpp @@ -64,6 +64,7 @@ BOOST_PYTHON_MODULE(libexiv2python) .def("_getSectionDescription", &ExifTag::getSectionDescription) .def("_getRawValue", &ExifTag::getRawValue) .def("_getHumanValue", &ExifTag::getHumanValue) + .def("_getByteOrder", &ExifTag::getByteOrder) ; class_("_IptcTag", init()) diff --git a/src/pyexiv2/exif.py b/src/pyexiv2/exif.py index 87ce581..f3e97ac 100644 --- a/src/pyexiv2/exif.py +++ b/src/pyexiv2/exif.py @@ -36,6 +36,7 @@ from pyexiv2.utils import Rational, Fraction, \ import time import datetime +import sys class ExifValueError(ValueError): @@ -264,10 +265,30 @@ class ExifTag(ListenerInterface): return value elif self.type == 'Comment': - # There is currently no charset conversion. - # TODO: guess the encoding and decode accordingly into unicode - # where relevant. - return value + if value.startswith('charset='): + charset, val = value.split(' ', 1) + charset = charset.split('=')[1].strip('"') + encoding = sys.getdefaultencoding() + if charset == 'Ascii': + encoding = 'ascii' + elif charset == 'Jis': + encoding = 'shift_jis' + elif charset == 'Unicode': + byte_order = self._tag._getByteOrder() + if byte_order == 1: + # little endian (II) + encoding = 'utf-16le' + elif byte_order == 2: + # big endian (MM) + encoding = 'utf-16be' + elif charset == 'Undefined': + pass + elif charset == 'InvalidCharsetId': + pass + return val.decode(encoding, 'replace') + else: + # No encoding defined. + return value elif self.type in ('Short', 'SShort'): try: -- cgit