aboutsummaryrefslogtreecommitdiffstats
path: root/trunk/src/tools
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/src/tools')
-rw-r--r--trunk/src/tools/msgfmt.py203
-rw-r--r--trunk/src/tools/pygettext.py762
2 files changed, 0 insertions, 965 deletions
diff --git a/trunk/src/tools/msgfmt.py b/trunk/src/tools/msgfmt.py
deleted file mode 100644
index 8a2d4e66..00000000
--- a/trunk/src/tools/msgfmt.py
+++ /dev/null
@@ -1,203 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: iso-8859-1 -*-
-# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
-
-"""Generate binary message catalog from textual translation description.
-
-This program converts a textual Uniforum-style message catalog (.po file) into
-a binary GNU catalog (.mo file). This is essentially the same function as the
-GNU msgfmt program, however, it is a simpler implementation.
-
-Usage: msgfmt.py [OPTIONS] filename.po
-
-Options:
- -o file
- --output-file=file
- Specify the output file to write to. If omitted, output will go to a
- file named filename.mo (based off the input file name).
-
- -h
- --help
- Print this message and exit.
-
- -V
- --version
- Display version information and exit.
-"""
-
-import sys
-import os
-import getopt
-import struct
-import array
-
-__version__ = "1.1"
-
-MESSAGES = {}
-
-
-
-def usage(code, msg=''):
- print >> sys.stderr, __doc__
- if msg:
- print >> sys.stderr, msg
- sys.exit(code)
-
-
-
-def add(id, str, fuzzy):
- "Add a non-fuzzy translation to the dictionary."
- global MESSAGES
- if not fuzzy and str:
- MESSAGES[id] = str
-
-
-
-def generate():
- "Return the generated output."
- global MESSAGES
- keys = MESSAGES.keys()
- # the keys are sorted in the .mo file
- keys.sort()
- offsets = []
- ids = strs = ''
- for id in keys:
- # For each string, we need size and file offset. Each string is NUL
- # terminated; the NUL does not count into the size.
- offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
- ids += id + '\0'
- strs += MESSAGES[id] + '\0'
- output = ''
- # The header is 7 32-bit unsigned integers. We don't use hash tables, so
- # the keys start right after the index tables.
- # translated string.
- keystart = 7*4+16*len(keys)
- # and the values start after the keys
- valuestart = keystart + len(ids)
- koffsets = []
- voffsets = []
- # The string table first has the list of keys, then the list of values.
- # Each entry has first the size of the string, then the file offset.
- for o1, l1, o2, l2 in offsets:
- koffsets += [l1, o1+keystart]
- voffsets += [l2, o2+valuestart]
- offsets = koffsets + voffsets
- output = struct.pack("Iiiiiii",
- 0x950412deL, # Magic
- 0, # Version
- len(keys), # # of entries
- 7*4, # start of key index
- 7*4+len(keys)*8, # start of value index
- 0, 0) # size and offset of hash table
- output += array.array("i", offsets).tostring()
- output += ids
- output += strs
- return output
-
-
-
-def make(filename, outfile):
- ID = 1
- STR = 2
-
- # Compute .mo name from .po name and arguments
- if filename.endswith('.po'):
- infile = filename
- else:
- infile = filename + '.po'
- if outfile is None:
- outfile = os.path.splitext(infile)[0] + '.mo'
-
- try:
- lines = open(infile).readlines()
- except IOError, msg:
- print >> sys.stderr, msg
- sys.exit(1)
-
- section = None
- fuzzy = 0
-
- # Parse the catalog
- lno = 0
- for l in lines:
- lno += 1
- # If we get a comment line after a msgstr, this is a new entry
- if l[0] == '#' and section == STR:
- add(msgid, msgstr, fuzzy)
- section = None
- fuzzy = 0
- # Record a fuzzy mark
- if l[:2] == '#,' and l.find('fuzzy'):
- fuzzy = 1
- # Skip comments
- if l[0] == '#':
- continue
- # Now we are in a msgid section, output previous section
- if l.startswith('msgid'):
- if section == STR:
- add(msgid, msgstr, fuzzy)
- section = ID
- l = l[5:]
- msgid = msgstr = ''
- # Now we are in a msgstr section
- elif l.startswith('msgstr'):
- section = STR
- l = l[6:]
- # Skip empty lines
- l = l.strip()
- if not l:
- continue
- # XXX: Does this always follow Python escape semantics?
- l = eval(l)
- if section == ID:
- msgid += l
- elif section == STR:
- msgstr += l
- else:
- print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \
- 'before:'
- print >> sys.stderr, l
- sys.exit(1)
- # Add last entry
- if section == STR:
- add(msgid, msgstr, fuzzy)
-
- # Compute output
- output = generate()
-
- try:
- open(outfile,"wb").write(output)
- except IOError,msg:
- print >> sys.stderr, msg
-
-
-
-def main():
- try:
- opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
- ['help', 'version', 'output-file='])
- except getopt.error, msg:
- usage(1, msg)
-
- outfile = None
- # parse options
- for opt, arg in opts:
- if opt in ('-h', '--help'):
- usage(0)
- elif opt in ('-V', '--version'):
- print >> sys.stderr, "msgfmt.py", __version__
- sys.exit(0)
- elif opt in ('-o', '--output-file'):
- outfile = arg
- # do it
- if not args:
- print >> sys.stderr, 'No input file given'
- print >> sys.stderr, "Try `msgfmt --help' for more information."
- return
-
- for filename in args:
- make(filename, outfile)
-
-
-if __name__ == '__main__':
- main()
diff --git a/trunk/src/tools/pygettext.py b/trunk/src/tools/pygettext.py
deleted file mode 100644
index 040b5c7f..00000000
--- a/trunk/src/tools/pygettext.py
+++ /dev/null
@@ -1,762 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: iso-8859-1 -*-
-# Originally written by Barry Warsaw <barry@zope.com>
-#
-# Minimally patched to make it even more xgettext compatible
-# by Peter Funk <pf@artcom-gmbh.de>
-#
-# 2002-11-22 Jürgen Hermann <jh@web.de>
-# Added checks that _() only contains string literals, and
-# command line args are resolved to module lists, i.e. you
-# can now pass a filename, a module or package name, or a
-# directory (including globbing chars, important for Win32).
-# Made docstring fit in 80 chars wide displays using pydoc.
-#
-
-import codecs
-
-# for selftesting
-import re
-try:
- import fintl
- _ = fintl.gettext
-except ImportError:
- _ = lambda s: s
-
-__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
-
-Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
-internationalization of C programs. Most of these tools are independent of
-the programming language and can be used from within Python programs.
-Martin von Loewis' work[1] helps considerably in this regard.
-
-There's one problem though; xgettext is the program that scans source code
-looking for message strings, but it groks only C (or C++). Python
-introduces a few wrinkles, such as dual quoting characters, triple quoted
-strings, and raw strings. xgettext understands none of this.
-
-Enter pygettext, which uses Python's standard tokenize module to scan
-Python source code, generating .pot files identical to what GNU xgettext[2]
-generates for C and C++ code. From there, the standard GNU tools can be
-used.
-
-A word about marking Python strings as candidates for translation. GNU
-xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
-and gettext_noop. But those can be a lot of text to include all over your
-code. C and C++ have a trick: they use the C preprocessor. Most
-internationalized C source includes a #define for gettext() to _() so that
-what has to be written in the source is much less. Thus these are both
-translatable strings:
-
- gettext("Translatable String")
- _("Translatable String")
-
-Python of course has no preprocessor so this doesn't work so well. Thus,
-pygettext searches only for _() by default, but see the -k/--keyword flag
-below for how to augment this.
-
- [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
- [2] http://www.gnu.org/software/gettext/gettext.html
-
-NOTE: pygettext attempts to be option and feature compatible with GNU
-xgettext where ever possible. However some options are still missing or are
-not fully implemented. Also, xgettext's use of command line switches with
-option arguments is broken, and in these cases, pygettext just defines
-additional switches.
-
-Usage: pygettext [options] inputfile ...
-
-Options:
-
- -a
- --extract-all
- Extract all strings.
-
- -d name
- --default-domain=name
- Rename the default output file from messages.pot to name.pot.
-
- -E
- --escape
- Replace non-ASCII characters with octal escape sequences.
-
- -D
- --docstrings
- Extract module, class, method, and function docstrings. These do
- not need to be wrapped in _() markers, and in fact cannot be for
- Python to consider them docstrings. (See also the -X option).
-
- -h
- --help
- Print this help message and exit.
-
- -k word
- --keyword=word
- Keywords to look for in addition to the default set, which are:
- %(DEFAULTKEYWORDS)s
-
- You can have multiple -k flags on the command line.
-
- -K
- --no-default-keywords
- Disable the default set of keywords (see above). Any keywords
- explicitly added with the -k/--keyword option are still recognized.
-
- --no-location
- Do not write filename/lineno location comments.
-
- -n
- --add-location
- Write filename/lineno location comments indicating where each
- extracted string is found in the source. These lines appear before
- each msgid. The style of comments is controlled by the -S/--style
- option. This is the default.
-
- -o filename
- --output=filename
- Rename the default output file from messages.pot to filename. If
- filename is `-' then the output is sent to standard out.
-
- -p dir
- --output-dir=dir
- Output files will be placed in directory dir.
-
- -S stylename
- --style stylename
- Specify which style to use for location comments. Two styles are
- supported:
-
- Solaris # File: filename, line: line-number
- GNU #: filename:line
-
- The style name is case insensitive. GNU style is the default.
-
- -v
- --verbose
- Print the names of the files being processed.
-
- -V
- --version
- Print the version of pygettext and exit.
-
- -w columns
- --width=columns
- Set width of output to columns.
-
- -x filename
- --exclude-file=filename
- Specify a file that contains a list of strings that are not be
- extracted from the input files. Each string to be excluded must
- appear on a line by itself in the file.
-
- -X filename
- --no-docstrings=filename
- Specify a file that contains a list of files (one per line) that
- should not have their docstrings extracted. This is only useful in
- conjunction with the -D option above.
-
-If `inputfile' is -, standard input is read.
-""")
-
-import os
-import imp
-import sys
-import glob
-import time
-import getopt
-import token
-import tokenize
-import operator
-import codecs
-
-from elementtree.ElementTree import ElementTree, XML
-
-__version__ = '1.5'
-
-default_keywords = ['_']
-DEFAULTKEYWORDS = ', '.join(default_keywords)
-
-EMPTYSTRING = ''
-
-
-
-# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
-# there.
-pot_header = _('''\
-# SOME DESCRIPTIVE TITLE.
-# Copyright (C) YEAR ORGANIZATION
-# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
-#
-msgid ""
-msgstr ""
-"Project-Id-Version: PACKAGE VERSION\\n"
-"POT-Creation-Date: %(time)s\\n"
-"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
-"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
-"Language-Team: LANGUAGE <LL@li.org>\\n"
-"MIME-Version: 1.0\\n"
-"Content-Type: text/plain; charset=%(charset)s\\n"
-"Content-Transfer-Encoding: %(charset)s\\n"
-"Generated-By: pygettext.py %(version)s\\n"
-
-''')
-
-
-def usage(code, msg=''):
- print >> sys.stderr, __doc__ % globals()
- if msg:
- print >> sys.stderr, msg
- sys.exit(code)
-
-
-
-escapes = []
-
-def make_escapes(pass_iso8859):
- global escapes
- if pass_iso8859:
- # Allow iso-8859 characters to pass through so that e.g. 'msgid
- # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
- # escape any character outside the 32..126 range.
- mod = 128
- else:
- mod = 256
- for i in range(256):
- if 32 <= (i % mod) <= 126:
- escapes.append(chr(i))
- else:
- escapes.append("\\%03o" % i)
- escapes[ord('\\')] = '\\\\'
- escapes[ord('\t')] = '\\t'
- escapes[ord('\r')] = '\\r'
- escapes[ord('\n')] = '\\n'
- escapes[ord('\"')] = '\\"'
-
-
-def escape_ascii(s):
- "Escapes all text outside of 7bit ASCII plus control characters and Python literals."
- global escapes
- s = list(s)
- for i in range(len(s)):
- s[i] = escapes[ord(s[i])]
- return EMPTYSTRING.join(s)
-
-def escape_unicode(s):
- "Escapes control characters and Python literals only leaving non-ascii text intact."
- #for sp in ('\t', '\r', '\n', '\"', '\\'):
- s = s.replace('\\', '\\\\')
- s = s.replace('\t', '\\t')
- s = s.replace('\r', '\\r')
- s = s.replace('\n', '\\n')
- s = s.replace('\"', '\\"')
- # escape control chars
- def repl(m): return "\\%03o" % ord(m.group(0))
- s = re.sub('[\001-\037]', repl, s)
- return s
-
-def safe_eval(s):
- # unwrap quotes, safely
- return eval(s, {'__builtins__':{}}, {})
-
-
-def normalize(s, escape=False):
- # This converts the various Python string types into a format that is
- # appropriate for .po files, namely much closer to C style.
- lines = s.split('\n')
- if len(lines) == 1:
- s = '"' + escape_unicode(s) + '"'
- else:
- if not lines[-1]:
- del lines[-1]
- lines[-1] = lines[-1] + '\n'
- for i in range(len(lines)):
- lines[i] = escape_unicode(lines[i])
- lineterm = '\\n"\n"'
- s = '""\n"' + lineterm.join(lines) + '"'
- if isinstance(s, unicode):
- s = s.encode('utf-8')
- if escape:
- def repl(m): return "\\%03o" % ord(m.group(0))
- s = re.sub('[\200-\377]', repl, s)
- return s
-
-
-def containsAny(str, set):
- """Check whether 'str' contains ANY of the chars in 'set'"""
- return 1 in [c in str for c in set]
-
-
-def _visit_pyfiles(list, dirname, names):
- """Helper for getFilesForName()."""
- # get extension for python source files
- if not globals().has_key('_py_ext'):
- global _py_ext
- _py_ext = [triple[0] for triple in imp.get_suffixes()
- if triple[2] == imp.PY_SOURCE][0]
-
- # don't recurse into CVS directories
- if 'CVS' in names:
- names.remove('CVS')
- if '.svn' in names:
- names.remove('.svn')
-
- # add all *.py files to list
- list.extend(
- [os.path.join(dirname, file) for file in names
- if os.path.splitext(file)[1] == _py_ext]
- )
-
-
-def _get_modpkg_path(dotted_name, pathlist=None):
- """Get the filesystem path for a module or a package.
-
- Return the file system path to a file for a module, and to a directory for
- a package. Return None if the name is not found, or is a builtin or
- extension module.
- """
- # split off top-most name
- parts = dotted_name.split('.', 1)
-
- if len(parts) > 1:
- # we have a dotted path, import top-level package
- try:
- file, pathname, description = imp.find_module(parts[0], pathlist)
- if file: file.close()
- except ImportError:
- return None
-
- # check if it's indeed a package
- if description[2] == imp.PKG_DIRECTORY:
- # recursively handle the remaining name parts
- pathname = _get_modpkg_path(parts[1], [pathname])
- else:
- pathname = None
- else:
- # plain name
- try:
- file, pathname, description = imp.find_module(
- dotted_name, pathlist)
- if file:
- file.close()
- if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
- pathname = None
- except ImportError:
- pathname = None
-
- return pathname
-
-
-def getFilesForName(name):
- """Get a list of module files for a filename, a module or package name,
- or a directory.
- """
- if not os.path.exists(name):
- # check for glob chars
- if containsAny(name, "*?[]"):
- files = glob.glob(name)
- list = []
- for file in files:
- list.extend(getFilesForName(file))
- return list
-
- # try to find module or package
- name = _get_modpkg_path(name)
- if not name:
- return []
-
- if os.path.isdir(name):
- # find all python files in directory
- list = []
- os.path.walk(name, _visit_pyfiles, list)
- return list
- elif os.path.exists(name):
- # a single file
- return [name]
-
- return []
-
-
-class TokenEater:
- def __init__(self, options):
- self.__options = options
- self.__messages = {}
- self.__state = self.__waiting
- self.__data = []
- self.__lineno = -1
- self.__freshmodule = 1
- self.__curfile = None
- self.__encoding = None
-
- def __call__(self, ttype, tstring, stup, etup, line):
- # dispatch
-## import token
-## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
-## 'tstring:', tstring
- self.__state(ttype, tstring, stup[0])
-
- def __waiting(self, ttype, tstring, lineno):
- opts = self.__options
- # Do docstring extractions, if enabled
- if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
- # module docstring?
- if self.__freshmodule:
- if ttype == tokenize.STRING:
- self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
- self.__freshmodule = 0
- elif ttype not in (tokenize.COMMENT, tokenize.NL):
- self.__freshmodule = 0
- return
- # class docstring?
- if ttype == tokenize.NAME and tstring in ('class', 'def'):
- self.__state = self.__suiteseen
- return
- if ttype == tokenize.NAME and tstring in opts.keywords:
- self.__state = self.__keywordseen
-
- def __suiteseen(self, ttype, tstring, lineno):
- # ignore anything until we see the colon
- if ttype == tokenize.OP and tstring == ':':
- self.__state = self.__suitedocstring
-
- def __suitedocstring(self, ttype, tstring, lineno):
- # ignore any intervening noise
- if ttype == tokenize.STRING:
- self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
- self.__state = self.__waiting
- elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
- tokenize.COMMENT):
- # there was no class docstring
- self.__state = self.__waiting
-
- def __keywordseen(self, ttype, tstring, lineno):
- if ttype == tokenize.OP and tstring == '(':
- self.__data = []
- self.__lineno = lineno
- self.__state = self.__openseen
- else:
- self.__state = self.__waiting
-
- def __openseen(self, ttype, tstring, lineno):
- if ttype == tokenize.OP and tstring == ')':
- # We've seen the last of the translatable strings. Record the
- # line number of the first line of the strings and update the list
- # of messages seen. Reset state for the next batch. If there
- # were no strings inside _(), then just ignore this entry.
- if self.__data:
- self.__addentry(EMPTYSTRING.join(self.__data))
- self.__state = self.__waiting
- elif ttype == tokenize.STRING:
- self.__data.append(safe_eval(tstring))
- elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
- token.NEWLINE, tokenize.NL]:
- # warn if we see anything else than STRING or whitespace
- print >> sys.stderr, _(
- '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
- ) % {
- 'token': tstring,
- 'file': self.__curfile,
- 'lineno': self.__lineno
- }
- self.__state = self.__waiting
-
- def __addentry(self, msg, lineno=None, isdocstring=0, iskidstring=0):
- # tokenize module always return unicode strings
- # even when they are in fact coded string instances
- # to deal with this we use a hack:
- # evaluate string's representation without leading "u"
- # to force interpration as coded string
- # then we decode it using already known file's encoding
- if not iskidstring:
- if type(msg) is str:
- msg = eval(repr(msg))
- else:
- msg = eval(repr(msg)[1:])
- msg = msg.decode(self.__encoding)
- if lineno is None:
- lineno = self.__lineno
- if not msg in self.__options.toexclude:
- entry = (self.__curfile, lineno)
- self.__messages.setdefault(msg, {})[entry] = isdocstring
-
- def set_filename(self, filename):
- self.__curfile = filename
- self.__freshmodule = 1
-
- def set_file_encoding(self, fp):
- """Searches for -*- coding: -*- magic comment to find out file encoding."""
- self.__encoding = 'utf-8' # reset to default for each new file
- for line in fp.readlines()[:5]:
- m = re.match('#\s*-\*-\s+coding:\s+(\w+)\s+-\*-', line)
- if m:
- self.__encoding = m.group(1)
- break
- fp.seek(0)
-
- def contains_inline_python(self,msg):
- if '${' in msg and not '$${' in msg: return True
- return False
-
- def strip_namespace_uri(self,tag):
- return tag.split('}')[-1]
-
- def get_text_node(self,node):
- tag = re.sub('({[^}]+})?(\w+)', '\\2', node.tag)
-
- if node.text:
- msg = node.text.strip()
- if msg and not self.contains_inline_python(msg):
- if tag not in ['script','style']:
- self.__addentry(msg,self.strip_namespace_uri(node.tag), iskidstring=1)
-
- if node.getchildren():
- for child in node: self.get_text_node(child)
-
- if node.tail:
- msg = node.tail.strip()
- if msg and not self.contains_inline_python(msg):
- self.__addentry(msg,self.strip_namespace_uri(node.tag), iskidstring=1)
-
- def extract_kid_strings(self):
- if not self.__curfile: return
- f = None
- try:
- file = open(self.__curfile)
- f = ElementTree(XML( fixentities(file.read() )))
- except Exception, e:
- print 'Skip %s: %s' % (self.__curfile, e)
- return
-
- node = f.getroot()
- self.get_text_node(node)
-
- def write(self, fp):
- options = self.__options
- # format without tz information
- # because %Z is timezone's name, not offset
- # and, say, on localized Windows XP this is non-ascii string
- timestamp = time.strftime('%Y-%m-%d %H:%M')
- # The time stamp in the header doesn't have the same format as that
- # generated by xgettext...
- t = {'time': timestamp, 'version': __version__, 'charset':'utf-8'}
- print >> fp, pot_header % t
- # Sort the entries. First sort each particular entry's keys, then
- # sort all the entries by their first item.
- reverse = {}
- for k, v in self.__messages.items():
- keys = v.keys()
- keys.sort()
- reverse.setdefault(tuple(keys), []).append((k, v))
- rkeys = reverse.keys()
- rkeys.sort()
- for rkey in rkeys:
- rentries = reverse[rkey]
- rentries.sort()
- for k, v in rentries:
- isdocstring = 0
- # If the entry was gleaned out of a docstring, then add a
- # comment stating so. This is to aid translators who may wish
- # to skip translating some unimportant docstrings.
- if reduce(operator.__add__, v.values()):
- isdocstring = 1
- # k is the message string, v is a dictionary-set of (filename,
- # lineno) tuples. We want to sort the entries in v first by
- # file name and then by line number.
- v = v.keys()
- v.sort()
- if not options.writelocations:
- pass
- # location comments are different b/w Solaris and GNU:
- elif options.locationstyle == options.SOLARIS:
- for filename, lineno in v:
- d = {'filename': filename, 'lineno': lineno}
- print >>fp, _(
- '# File: %(filename)s, line: %(lineno)s') % d
- elif options.locationstyle == options.GNU:
- # fit as many locations on one line, as long as the
- # resulting line length doesn't exceeds 'options.width'
- locline = '#:'
- for filename, lineno in v:
- d = {'filename': filename, 'lineno': lineno}
- s = _(' %(filename)s:%(lineno)s') % d
- if len(locline) + len(s) <= options.width:
- locline = locline + s
- else:
- print >> fp, locline
- locline = "#:" + s
- if len(locline) > 2:
- print >> fp, locline
- if isdocstring:
- print >> fp, '#, docstring'
- if k: # do not output empty msgid
- print >> fp, 'msgid', normalize(k, options.escape)
- print >> fp, 'msgstr ""\n'
-
-
-def main():
- global default_keywords
- try:
- opts, args = getopt.getopt(
- sys.argv[1:],
- 'ad:UDEhk:Kno:p:S:Vvw:x:X:',
- ['extract-all', 'default-domain=', 'escape', 'help',
- 'keyword=', 'no-default-keywords',
- 'add-location', 'no-location', 'output=', 'output-dir=',
- 'style=', 'verbose', 'version', 'width=', 'exclude-file=',
- 'docstrings', 'no-docstrings', 'support-unicode',
- ])
- except getopt.error, msg:
- usage(1, msg)
-
- # for holding option values
- class Options:
- # constants
- GNU = 1
- SOLARIS = 2
- # defaults
- extractall = 0 # FIXME: currently this option has no effect at all.
- escape = 0
- keywords = []
- outpath = ''
- outfile = 'messages.pot'
- writelocations = 1
- locationstyle = GNU
- verbose = 0
- width = 78
- excludefilename = ''
- docstrings = 0
- nodocstrings = {}
-
- options = Options()
- locations = {'gnu' : options.GNU,
- 'solaris' : options.SOLARIS,
- }
-
- # parse options
- for opt, arg in opts:
- if opt in ('-h', '--help'):
- usage(0)
- elif opt in ('-a', '--extract-all'):
- options.extractall = 1
- elif opt in ('-d', '--default-domain'):
- options.outfile = arg + '.pot'
- elif opt in ('-E', '--escape'):
- options.escape = 1
- elif opt in ('-D', '--docstrings'):
- options.docstrings = 1
- elif opt in ('-k', '--keyword'):
- options.keywords.append(arg)
- elif opt in ('-K', '--no-default-keywords'):
- default_keywords = []
- elif opt in ('-n', '--add-location'):
- options.writelocations = 1
- elif opt in ('--no-location',):
- options.writelocations = 0
- elif opt in ('-S', '--style'):
- options.locationstyle = locations.get(arg.lower())
- if options.locationstyle is None:
- usage(1, _('Invalid value for --style: %s') % arg)
- elif opt in ('-o', '--output'):
- options.outfile = arg
- elif opt in ('-p', '--output-dir'):
- options.outpath = arg
- elif opt in ('-v', '--verbose'):
- options.verbose = 1
- elif opt in ('-V', '--version'):
- print _('pygettext.py (xgettext for Python) %s') % __version__
- sys.exit(0)
- elif opt in ('-w', '--width'):
- try:
- options.width = int(arg)
- except ValueError:
- usage(1, _('--width argument must be an integer: %s') % arg)
- elif opt in ('-x', '--exclude-file'):
- options.excludefilename = arg
- elif opt in ('-X', '--no-docstrings'):
- fp = open(arg)
- try:
- while 1:
- line = fp.readline()
- if not line:
- break
- options.nodocstrings[line[:-1]] = 1
- finally:
- fp.close()
-
- # calculate escapes
- make_escapes(0)
-
- # calculate all keywords
- options.keywords.extend(default_keywords)
-
- # initialize list of strings to exclude
- if options.excludefilename:
- try:
- fp = open(options.excludefilename)
- options.toexclude = fp.readlines()
- fp.close()
- except IOError:
- print >> sys.stderr, _(
- "Can't read --exclude-file: %s") % options.excludefilename
- sys.exit(1)
- else:
- options.toexclude = []
-
- # resolve args to module lists
- expanded = []
- for arg in args:
- if arg == '-':
- expanded.append(arg)
- else:
- expanded.extend(getFilesForName(arg))
- args = expanded
-
- # slurp through all the files
- eater = TokenEater(options)
- for filename in args:
- if filename == '-':
- if options.verbose:
- print _('Reading standard input')
- fp = sys.stdin
- closep = 0
- else:
- if options.verbose:
- print _('Working on %s') % filename
- fp = open(filename)
- eater.set_file_encoding(fp)
- closep = 1
- try:
- eater.set_filename(filename)
- try:
- tokenize.tokenize(fp.readline, eater)
- except tokenize.TokenError, e:
- print >> sys.stderr, '%s: %s, line %d, column %d' % (
- e[0], filename, e[1][0], e[1][1])
- finally:
- if closep:
- fp.close()
-
- if os.path.splitext(filename)[-1].lower() == '.kid': eater.extract_kid_strings()
-
- # write the output
- if options.outfile == '-':
- fp = sys.stdout
- closep = 0
- else:
- if options.outpath:
- options.outfile = os.path.join(options.outpath, options.outfile)
- fp = open(options.outfile, 'wt')
- closep = 1
- try:
- eater.write(fp)
- finally:
- if closep:
- fp.close()
-
-
-if __name__ == '__main__':
- main()
- # some more test strings
- _(u'a unicode string')
- # this one creates a warning
- _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
- _('more' 'than' 'one' 'string')