aboutsummaryrefslogtreecommitdiffstats
path: root/src/tools/pygettext.py
diff options
context:
space:
mode:
authorshnavid <shnavid@ef72aa8b-4018-0410-8976-d6e080ef94d8>2007-07-16 18:13:17 +0000
committershnavid <shnavid@ef72aa8b-4018-0410-8976-d6e080ef94d8>2007-07-16 18:13:17 +0000
commit45e3a925e50005255eb336f6efed3fcb84ce64e2 (patch)
tree107dce0b7620138f3e4b695b4ec2479a8024bea0 /src/tools/pygettext.py
parentf9f2998b8836b15be851f04c518f1e6191b665aa (diff)
downloadsos-45e3a925e50005255eb336f6efed3fcb84ce64e2.tar.gz
* added translation tools (to generate locale)
git-svn-id: svn+ssh://svn.fedorahosted.org/svn/sos/trunk@209 ef72aa8b-4018-0410-8976-d6e080ef94d8
Diffstat (limited to 'src/tools/pygettext.py')
-rw-r--r--src/tools/pygettext.py762
1 files changed, 762 insertions, 0 deletions
diff --git a/src/tools/pygettext.py b/src/tools/pygettext.py
new file mode 100644
index 00000000..040b5c7f
--- /dev/null
+++ b/src/tools/pygettext.py
@@ -0,0 +1,762 @@
+#! /usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+# Originally written by Barry Warsaw <barry@zope.com>
+#
+# Minimally patched to make it even more xgettext compatible
+# by Peter Funk <pf@artcom-gmbh.de>
+#
+# 2002-11-22 Jürgen Hermann <jh@web.de>
+# Added checks that _() only contains string literals, and
+# command line args are resolved to module lists, i.e. you
+# can now pass a filename, a module or package name, or a
+# directory (including globbing chars, important for Win32).
+# Made docstring fit in 80 chars wide displays using pydoc.
+#
+
+import codecs
+
+# for selftesting
+import re
+try:
+ import fintl
+ _ = fintl.gettext
+except ImportError:
+ _ = lambda s: s
+
+__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
+
+Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
+internationalization of C programs. Most of these tools are independent of
+the programming language and can be used from within Python programs.
+Martin von Loewis' work[1] helps considerably in this regard.
+
+There's one problem though; xgettext is the program that scans source code
+looking for message strings, but it groks only C (or C++). Python
+introduces a few wrinkles, such as dual quoting characters, triple quoted
+strings, and raw strings. xgettext understands none of this.
+
+Enter pygettext, which uses Python's standard tokenize module to scan
+Python source code, generating .pot files identical to what GNU xgettext[2]
+generates for C and C++ code. From there, the standard GNU tools can be
+used.
+
+A word about marking Python strings as candidates for translation. GNU
+xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
+and gettext_noop. But those can be a lot of text to include all over your
+code. C and C++ have a trick: they use the C preprocessor. Most
+internationalized C source includes a #define for gettext() to _() so that
+what has to be written in the source is much less. Thus these are both
+translatable strings:
+
+ gettext("Translatable String")
+ _("Translatable String")
+
+Python of course has no preprocessor so this doesn't work so well. Thus,
+pygettext searches only for _() by default, but see the -k/--keyword flag
+below for how to augment this.
+
+ [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
+ [2] http://www.gnu.org/software/gettext/gettext.html
+
+NOTE: pygettext attempts to be option and feature compatible with GNU
+xgettext where ever possible. However some options are still missing or are
+not fully implemented. Also, xgettext's use of command line switches with
+option arguments is broken, and in these cases, pygettext just defines
+additional switches.
+
+Usage: pygettext [options] inputfile ...
+
+Options:
+
+ -a
+ --extract-all
+ Extract all strings.
+
+ -d name
+ --default-domain=name
+ Rename the default output file from messages.pot to name.pot.
+
+ -E
+ --escape
+ Replace non-ASCII characters with octal escape sequences.
+
+ -D
+ --docstrings
+ Extract module, class, method, and function docstrings. These do
+ not need to be wrapped in _() markers, and in fact cannot be for
+ Python to consider them docstrings. (See also the -X option).
+
+ -h
+ --help
+ Print this help message and exit.
+
+ -k word
+ --keyword=word
+ Keywords to look for in addition to the default set, which are:
+ %(DEFAULTKEYWORDS)s
+
+ You can have multiple -k flags on the command line.
+
+ -K
+ --no-default-keywords
+ Disable the default set of keywords (see above). Any keywords
+ explicitly added with the -k/--keyword option are still recognized.
+
+ --no-location
+ Do not write filename/lineno location comments.
+
+ -n
+ --add-location
+ Write filename/lineno location comments indicating where each
+ extracted string is found in the source. These lines appear before
+ each msgid. The style of comments is controlled by the -S/--style
+ option. This is the default.
+
+ -o filename
+ --output=filename
+ Rename the default output file from messages.pot to filename. If
+ filename is `-' then the output is sent to standard out.
+
+ -p dir
+ --output-dir=dir
+ Output files will be placed in directory dir.
+
+ -S stylename
+ --style stylename
+ Specify which style to use for location comments. Two styles are
+ supported:
+
+ Solaris # File: filename, line: line-number
+ GNU #: filename:line
+
+ The style name is case insensitive. GNU style is the default.
+
+ -v
+ --verbose
+ Print the names of the files being processed.
+
+ -V
+ --version
+ Print the version of pygettext and exit.
+
+ -w columns
+ --width=columns
+ Set width of output to columns.
+
+ -x filename
+ --exclude-file=filename
+ Specify a file that contains a list of strings that are not be
+ extracted from the input files. Each string to be excluded must
+ appear on a line by itself in the file.
+
+ -X filename
+ --no-docstrings=filename
+ Specify a file that contains a list of files (one per line) that
+ should not have their docstrings extracted. This is only useful in
+ conjunction with the -D option above.
+
+If `inputfile' is -, standard input is read.
+""")
+
+import os
+import imp
+import sys
+import glob
+import time
+import getopt
+import token
+import tokenize
+import operator
+import codecs
+
+from elementtree.ElementTree import ElementTree, XML
+
+__version__ = '1.5'
+
+default_keywords = ['_']
+DEFAULTKEYWORDS = ', '.join(default_keywords)
+
+EMPTYSTRING = ''
+
+
+
+# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
+# there.
+pot_header = _('''\
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\\n"
+"POT-Creation-Date: %(time)s\\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
+"Language-Team: LANGUAGE <LL@li.org>\\n"
+"MIME-Version: 1.0\\n"
+"Content-Type: text/plain; charset=%(charset)s\\n"
+"Content-Transfer-Encoding: %(charset)s\\n"
+"Generated-By: pygettext.py %(version)s\\n"
+
+''')
+
+
+def usage(code, msg=''):
+ print >> sys.stderr, __doc__ % globals()
+ if msg:
+ print >> sys.stderr, msg
+ sys.exit(code)
+
+
+
+escapes = []
+
+def make_escapes(pass_iso8859):
+ global escapes
+ if pass_iso8859:
+ # Allow iso-8859 characters to pass through so that e.g. 'msgid
+ # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
+ # escape any character outside the 32..126 range.
+ mod = 128
+ else:
+ mod = 256
+ for i in range(256):
+ if 32 <= (i % mod) <= 126:
+ escapes.append(chr(i))
+ else:
+ escapes.append("\\%03o" % i)
+ escapes[ord('\\')] = '\\\\'
+ escapes[ord('\t')] = '\\t'
+ escapes[ord('\r')] = '\\r'
+ escapes[ord('\n')] = '\\n'
+ escapes[ord('\"')] = '\\"'
+
+
+def escape_ascii(s):
+ "Escapes all text outside of 7bit ASCII plus control characters and Python literals."
+ global escapes
+ s = list(s)
+ for i in range(len(s)):
+ s[i] = escapes[ord(s[i])]
+ return EMPTYSTRING.join(s)
+
+def escape_unicode(s):
+ "Escapes control characters and Python literals only leaving non-ascii text intact."
+ #for sp in ('\t', '\r', '\n', '\"', '\\'):
+ s = s.replace('\\', '\\\\')
+ s = s.replace('\t', '\\t')
+ s = s.replace('\r', '\\r')
+ s = s.replace('\n', '\\n')
+ s = s.replace('\"', '\\"')
+ # escape control chars
+ def repl(m): return "\\%03o" % ord(m.group(0))
+ s = re.sub('[\001-\037]', repl, s)
+ return s
+
+def safe_eval(s):
+ # unwrap quotes, safely
+ return eval(s, {'__builtins__':{}}, {})
+
+
+def normalize(s, escape=False):
+ # This converts the various Python string types into a format that is
+ # appropriate for .po files, namely much closer to C style.
+ lines = s.split('\n')
+ if len(lines) == 1:
+ s = '"' + escape_unicode(s) + '"'
+ else:
+ if not lines[-1]:
+ del lines[-1]
+ lines[-1] = lines[-1] + '\n'
+ for i in range(len(lines)):
+ lines[i] = escape_unicode(lines[i])
+ lineterm = '\\n"\n"'
+ s = '""\n"' + lineterm.join(lines) + '"'
+ if isinstance(s, unicode):
+ s = s.encode('utf-8')
+ if escape:
+ def repl(m): return "\\%03o" % ord(m.group(0))
+ s = re.sub('[\200-\377]', repl, s)
+ return s
+
+
+def containsAny(str, set):
+ """Check whether 'str' contains ANY of the chars in 'set'"""
+ return 1 in [c in str for c in set]
+
+
+def _visit_pyfiles(list, dirname, names):
+ """Helper for getFilesForName()."""
+ # get extension for python source files
+ if not globals().has_key('_py_ext'):
+ global _py_ext
+ _py_ext = [triple[0] for triple in imp.get_suffixes()
+ if triple[2] == imp.PY_SOURCE][0]
+
+ # don't recurse into CVS directories
+ if 'CVS' in names:
+ names.remove('CVS')
+ if '.svn' in names:
+ names.remove('.svn')
+
+ # add all *.py files to list
+ list.extend(
+ [os.path.join(dirname, file) for file in names
+ if os.path.splitext(file)[1] == _py_ext]
+ )
+
+
+def _get_modpkg_path(dotted_name, pathlist=None):
+ """Get the filesystem path for a module or a package.
+
+ Return the file system path to a file for a module, and to a directory for
+ a package. Return None if the name is not found, or is a builtin or
+ extension module.
+ """
+ # split off top-most name
+ parts = dotted_name.split('.', 1)
+
+ if len(parts) > 1:
+ # we have a dotted path, import top-level package
+ try:
+ file, pathname, description = imp.find_module(parts[0], pathlist)
+ if file: file.close()
+ except ImportError:
+ return None
+
+ # check if it's indeed a package
+ if description[2] == imp.PKG_DIRECTORY:
+ # recursively handle the remaining name parts
+ pathname = _get_modpkg_path(parts[1], [pathname])
+ else:
+ pathname = None
+ else:
+ # plain name
+ try:
+ file, pathname, description = imp.find_module(
+ dotted_name, pathlist)
+ if file:
+ file.close()
+ if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
+ pathname = None
+ except ImportError:
+ pathname = None
+
+ return pathname
+
+
+def getFilesForName(name):
+ """Get a list of module files for a filename, a module or package name,
+ or a directory.
+ """
+ if not os.path.exists(name):
+ # check for glob chars
+ if containsAny(name, "*?[]"):
+ files = glob.glob(name)
+ list = []
+ for file in files:
+ list.extend(getFilesForName(file))
+ return list
+
+ # try to find module or package
+ name = _get_modpkg_path(name)
+ if not name:
+ return []
+
+ if os.path.isdir(name):
+ # find all python files in directory
+ list = []
+ os.path.walk(name, _visit_pyfiles, list)
+ return list
+ elif os.path.exists(name):
+ # a single file
+ return [name]
+
+ return []
+
+
+class TokenEater:
+ def __init__(self, options):
+ self.__options = options
+ self.__messages = {}
+ self.__state = self.__waiting
+ self.__data = []
+ self.__lineno = -1
+ self.__freshmodule = 1
+ self.__curfile = None
+ self.__encoding = None
+
+ def __call__(self, ttype, tstring, stup, etup, line):
+ # dispatch
+## import token
+## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
+## 'tstring:', tstring
+ self.__state(ttype, tstring, stup[0])
+
+ def __waiting(self, ttype, tstring, lineno):
+ opts = self.__options
+ # Do docstring extractions, if enabled
+ if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
+ # module docstring?
+ if self.__freshmodule:
+ if ttype == tokenize.STRING:
+ self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
+ self.__freshmodule = 0
+ elif ttype not in (tokenize.COMMENT, tokenize.NL):
+ self.__freshmodule = 0
+ return
+ # class docstring?
+ if ttype == tokenize.NAME and tstring in ('class', 'def'):
+ self.__state = self.__suiteseen
+ return
+ if ttype == tokenize.NAME and tstring in opts.keywords:
+ self.__state = self.__keywordseen
+
+ def __suiteseen(self, ttype, tstring, lineno):
+ # ignore anything until we see the colon
+ if ttype == tokenize.OP and tstring == ':':
+ self.__state = self.__suitedocstring
+
+ def __suitedocstring(self, ttype, tstring, lineno):
+ # ignore any intervening noise
+ if ttype == tokenize.STRING:
+ self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
+ self.__state = self.__waiting
+ elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
+ tokenize.COMMENT):
+ # there was no class docstring
+ self.__state = self.__waiting
+
+ def __keywordseen(self, ttype, tstring, lineno):
+ if ttype == tokenize.OP and tstring == '(':
+ self.__data = []
+ self.__lineno = lineno
+ self.__state = self.__openseen
+ else:
+ self.__state = self.__waiting
+
+ def __openseen(self, ttype, tstring, lineno):
+ if ttype == tokenize.OP and tstring == ')':
+ # We've seen the last of the translatable strings. Record the
+ # line number of the first line of the strings and update the list
+ # of messages seen. Reset state for the next batch. If there
+ # were no strings inside _(), then just ignore this entry.
+ if self.__data:
+ self.__addentry(EMPTYSTRING.join(self.__data))
+ self.__state = self.__waiting
+ elif ttype == tokenize.STRING:
+ self.__data.append(safe_eval(tstring))
+ elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
+ token.NEWLINE, tokenize.NL]:
+ # warn if we see anything else than STRING or whitespace
+ print >> sys.stderr, _(
+ '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
+ ) % {
+ 'token': tstring,
+ 'file': self.__curfile,
+ 'lineno': self.__lineno
+ }
+ self.__state = self.__waiting
+
+ def __addentry(self, msg, lineno=None, isdocstring=0, iskidstring=0):
+ # tokenize module always return unicode strings
+ # even when they are in fact coded string instances
+ # to deal with this we use a hack:
+ # evaluate string's representation without leading "u"
+ # to force interpration as coded string
+ # then we decode it using already known file's encoding
+ if not iskidstring:
+ if type(msg) is str:
+ msg = eval(repr(msg))
+ else:
+ msg = eval(repr(msg)[1:])
+ msg = msg.decode(self.__encoding)
+ if lineno is None:
+ lineno = self.__lineno
+ if not msg in self.__options.toexclude:
+ entry = (self.__curfile, lineno)
+ self.__messages.setdefault(msg, {})[entry] = isdocstring
+
+ def set_filename(self, filename):
+ self.__curfile = filename
+ self.__freshmodule = 1
+
+ def set_file_encoding(self, fp):
+ """Searches for -*- coding: -*- magic comment to find out file encoding."""
+ self.__encoding = 'utf-8' # reset to default for each new file
+ for line in fp.readlines()[:5]:
+ m = re.match('#\s*-\*-\s+coding:\s+(\w+)\s+-\*-', line)
+ if m:
+ self.__encoding = m.group(1)
+ break
+ fp.seek(0)
+
+ def contains_inline_python(self,msg):
+ if '${' in msg and not '$${' in msg: return True
+ return False
+
+ def strip_namespace_uri(self,tag):
+ return tag.split('}')[-1]
+
+ def get_text_node(self,node):
+ tag = re.sub('({[^}]+})?(\w+)', '\\2', node.tag)
+
+ if node.text:
+ msg = node.text.strip()
+ if msg and not self.contains_inline_python(msg):
+ if tag not in ['script','style']:
+ self.__addentry(msg,self.strip_namespace_uri(node.tag), iskidstring=1)
+
+ if node.getchildren():
+ for child in node: self.get_text_node(child)
+
+ if node.tail:
+ msg = node.tail.strip()
+ if msg and not self.contains_inline_python(msg):
+ self.__addentry(msg,self.strip_namespace_uri(node.tag), iskidstring=1)
+
+ def extract_kid_strings(self):
+ if not self.__curfile: return
+ f = None
+ try:
+ file = open(self.__curfile)
+ f = ElementTree(XML( fixentities(file.read() )))
+ except Exception, e:
+ print 'Skip %s: %s' % (self.__curfile, e)
+ return
+
+ node = f.getroot()
+ self.get_text_node(node)
+
+ def write(self, fp):
+ options = self.__options
+ # format without tz information
+ # because %Z is timezone's name, not offset
+ # and, say, on localized Windows XP this is non-ascii string
+ timestamp = time.strftime('%Y-%m-%d %H:%M')
+ # The time stamp in the header doesn't have the same format as that
+ # generated by xgettext...
+ t = {'time': timestamp, 'version': __version__, 'charset':'utf-8'}
+ print >> fp, pot_header % t
+ # Sort the entries. First sort each particular entry's keys, then
+ # sort all the entries by their first item.
+ reverse = {}
+ for k, v in self.__messages.items():
+ keys = v.keys()
+ keys.sort()
+ reverse.setdefault(tuple(keys), []).append((k, v))
+ rkeys = reverse.keys()
+ rkeys.sort()
+ for rkey in rkeys:
+ rentries = reverse[rkey]
+ rentries.sort()
+ for k, v in rentries:
+ isdocstring = 0
+ # If the entry was gleaned out of a docstring, then add a
+ # comment stating so. This is to aid translators who may wish
+ # to skip translating some unimportant docstrings.
+ if reduce(operator.__add__, v.values()):
+ isdocstring = 1
+ # k is the message string, v is a dictionary-set of (filename,
+ # lineno) tuples. We want to sort the entries in v first by
+ # file name and then by line number.
+ v = v.keys()
+ v.sort()
+ if not options.writelocations:
+ pass
+ # location comments are different b/w Solaris and GNU:
+ elif options.locationstyle == options.SOLARIS:
+ for filename, lineno in v:
+ d = {'filename': filename, 'lineno': lineno}
+ print >>fp, _(
+ '# File: %(filename)s, line: %(lineno)s') % d
+ elif options.locationstyle == options.GNU:
+ # fit as many locations on one line, as long as the
+ # resulting line length doesn't exceeds 'options.width'
+ locline = '#:'
+ for filename, lineno in v:
+ d = {'filename': filename, 'lineno': lineno}
+ s = _(' %(filename)s:%(lineno)s') % d
+ if len(locline) + len(s) <= options.width:
+ locline = locline + s
+ else:
+ print >> fp, locline
+ locline = "#:" + s
+ if len(locline) > 2:
+ print >> fp, locline
+ if isdocstring:
+ print >> fp, '#, docstring'
+ if k: # do not output empty msgid
+ print >> fp, 'msgid', normalize(k, options.escape)
+ print >> fp, 'msgstr ""\n'
+
+
+def main():
+ global default_keywords
+ try:
+ opts, args = getopt.getopt(
+ sys.argv[1:],
+ 'ad:UDEhk:Kno:p:S:Vvw:x:X:',
+ ['extract-all', 'default-domain=', 'escape', 'help',
+ 'keyword=', 'no-default-keywords',
+ 'add-location', 'no-location', 'output=', 'output-dir=',
+ 'style=', 'verbose', 'version', 'width=', 'exclude-file=',
+ 'docstrings', 'no-docstrings', 'support-unicode',
+ ])
+ except getopt.error, msg:
+ usage(1, msg)
+
+ # for holding option values
+ class Options:
+ # constants
+ GNU = 1
+ SOLARIS = 2
+ # defaults
+ extractall = 0 # FIXME: currently this option has no effect at all.
+ escape = 0
+ keywords = []
+ outpath = ''
+ outfile = 'messages.pot'
+ writelocations = 1
+ locationstyle = GNU
+ verbose = 0
+ width = 78
+ excludefilename = ''
+ docstrings = 0
+ nodocstrings = {}
+
+ options = Options()
+ locations = {'gnu' : options.GNU,
+ 'solaris' : options.SOLARIS,
+ }
+
+ # parse options
+ for opt, arg in opts:
+ if opt in ('-h', '--help'):
+ usage(0)
+ elif opt in ('-a', '--extract-all'):
+ options.extractall = 1
+ elif opt in ('-d', '--default-domain'):
+ options.outfile = arg + '.pot'
+ elif opt in ('-E', '--escape'):
+ options.escape = 1
+ elif opt in ('-D', '--docstrings'):
+ options.docstrings = 1
+ elif opt in ('-k', '--keyword'):
+ options.keywords.append(arg)
+ elif opt in ('-K', '--no-default-keywords'):
+ default_keywords = []
+ elif opt in ('-n', '--add-location'):
+ options.writelocations = 1
+ elif opt in ('--no-location',):
+ options.writelocations = 0
+ elif opt in ('-S', '--style'):
+ options.locationstyle = locations.get(arg.lower())
+ if options.locationstyle is None:
+ usage(1, _('Invalid value for --style: %s') % arg)
+ elif opt in ('-o', '--output'):
+ options.outfile = arg
+ elif opt in ('-p', '--output-dir'):
+ options.outpath = arg
+ elif opt in ('-v', '--verbose'):
+ options.verbose = 1
+ elif opt in ('-V', '--version'):
+ print _('pygettext.py (xgettext for Python) %s') % __version__
+ sys.exit(0)
+ elif opt in ('-w', '--width'):
+ try:
+ options.width = int(arg)
+ except ValueError:
+ usage(1, _('--width argument must be an integer: %s') % arg)
+ elif opt in ('-x', '--exclude-file'):
+ options.excludefilename = arg
+ elif opt in ('-X', '--no-docstrings'):
+ fp = open(arg)
+ try:
+ while 1:
+ line = fp.readline()
+ if not line:
+ break
+ options.nodocstrings[line[:-1]] = 1
+ finally:
+ fp.close()
+
+ # calculate escapes
+ make_escapes(0)
+
+ # calculate all keywords
+ options.keywords.extend(default_keywords)
+
+ # initialize list of strings to exclude
+ if options.excludefilename:
+ try:
+ fp = open(options.excludefilename)
+ options.toexclude = fp.readlines()
+ fp.close()
+ except IOError:
+ print >> sys.stderr, _(
+ "Can't read --exclude-file: %s") % options.excludefilename
+ sys.exit(1)
+ else:
+ options.toexclude = []
+
+ # resolve args to module lists
+ expanded = []
+ for arg in args:
+ if arg == '-':
+ expanded.append(arg)
+ else:
+ expanded.extend(getFilesForName(arg))
+ args = expanded
+
+ # slurp through all the files
+ eater = TokenEater(options)
+ for filename in args:
+ if filename == '-':
+ if options.verbose:
+ print _('Reading standard input')
+ fp = sys.stdin
+ closep = 0
+ else:
+ if options.verbose:
+ print _('Working on %s') % filename
+ fp = open(filename)
+ eater.set_file_encoding(fp)
+ closep = 1
+ try:
+ eater.set_filename(filename)
+ try:
+ tokenize.tokenize(fp.readline, eater)
+ except tokenize.TokenError, e:
+ print >> sys.stderr, '%s: %s, line %d, column %d' % (
+ e[0], filename, e[1][0], e[1][1])
+ finally:
+ if closep:
+ fp.close()
+
+ if os.path.splitext(filename)[-1].lower() == '.kid': eater.extract_kid_strings()
+
+ # write the output
+ if options.outfile == '-':
+ fp = sys.stdout
+ closep = 0
+ else:
+ if options.outpath:
+ options.outfile = os.path.join(options.outpath, options.outfile)
+ fp = open(options.outfile, 'wt')
+ closep = 1
+ try:
+ eater.write(fp)
+ finally:
+ if closep:
+ fp.close()
+
+
+if __name__ == '__main__':
+ main()
+ # some more test strings
+ _(u'a unicode string')
+ # this one creates a warning
+ _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
+ _('more' 'than' 'one' 'string')