diff options
author | Matěj Cepl <mcepl@cepl.eu> | 2020-06-18 10:33:02 +0200 |
---|---|---|
committer | Matěj Cepl <mcepl@cepl.eu> | 2020-06-18 10:34:56 +0200 |
commit | c4a7abd99f96f37ef0b18b1fd8e3660470a98829 (patch) | |
tree | 53dcb0e00cb7cf3c144948b25eb65f7fe79ac5ef | |
parent | b5a7cf08178b587942c63eb119f66a5411503d1a (diff) | |
download | epubgrep-0.4.0.tar.gz |
Make epubgrep concurrent for speed improvement.0.4.0
Surprisingly ProcessPoolExecutor is vastly faster and less memory
demanding than ThreadPoolExecutor.
-rwxr-xr-x | epubgrep.py | 54 | ||||
-rw-r--r-- | setup.py | 2 |
2 files changed, 38 insertions, 18 deletions
diff --git a/epubgrep.py b/epubgrep.py index 0451c17..cf70bcf 100755 --- a/epubgrep.py +++ b/epubgrep.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import argparse +import concurrent.futures import logging import os.path import re @@ -17,7 +18,8 @@ log = logging.getLogger('epubgrep') def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple[str, int]]: if mdata is not None: - found_list = [(x['title'], x['index']) for x in mdata if x['src'] == fname] + found_list = [(x['title'], x['index']) + for x in mdata if x['src'] == fname] if len(found_list) > 0: chap_title = found_list[0][0].strip(' \t.0123456789') return chap_title, found_list[0][1] @@ -29,12 +31,14 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col assert os.path.isfile(filename), "{} is not EPub file.".format(filename) sought_RE = re.compile('(' + pattern + ')', flags) count = 0 + out_title = '' + out = '' mline = flags & re.M == re.M try: metadata = epub_meta.get_epub_metadata(filename) - except epub_meta.EPubException as ex: + except epub_meta.EPubException: log.exception('Failed to open {}'.format(filename)) book = zipfile.ZipFile(filename) printed_booktitle = False @@ -46,31 +50,35 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col res = sought_RE.search(decoded_str) if res: if not printed_booktitle: - print('{}'.format(filename)) + out += '{}\n'.format(filename) printed_booktitle = True if counting: count += 1 else: - chap_info = get_chapter_title(metadata.toc, zif.filename) - print("{}. {}:\n".format(chap_info[1], chap_info[0])) - print('{}\n'.format(res.group(0))) + chap_info = get_chapter_title(metadata.toc, + zif.filename) + out += "{}. {}:\n\n".format(chap_info[1], chap_info[0]) + out += '{}\n\n'.format(res.group(0)) else: printed_title = False for line in inf: decoded_line = line.decode(errors='replace').strip() res = sought_RE.search(decoded_line) if res: - if not printed_booktitle: - print('{}'.format(filename)) - printed_booktitle = True + if not out_title: + out_title = '{}'.format(filename) if counting: count += 1 else: + if not printed_booktitle: + out += out_title + '\n' + printed_booktitle = True if not printed_title: chap_info = get_chapter_title(metadata.toc, zif.filename) if chap_info is not None: - print("{}. {}:\n".format(chap_info[1], chap_info[0])) + out += "{}. {}:\n\n".format(chap_info[1], + chap_info[0]) printed_title = True # https://stackoverflow.com/a/33206814 # print("\033[31;1;4mHello\033[0m") @@ -79,12 +87,14 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col found_line = decoded_line.replace( res.group(1), "\033[31;1m" + res.group(1) + "\033[31;0m") - print('{}'.format(found_line)) + out += '{}\n'.format(found_line) else: - print(decoded_line) + out += decoded_line + '\n' if count > 0: - print('Found: {}'.format(count)) + out += '{:02d}:{}'.format(count, out_title) + + return out if __name__ == "__main__": @@ -113,9 +123,19 @@ if __name__ == "__main__": if args.multi_line: search_flags |= re.M | re.S - for filename in args.files: - book_fname = os.path.realpath(filename) + with concurrent.futures.ProcessPoolExecutor() as executor: + fut_to_fname = {executor.submit(grep_book, + os.path.realpath(filename), + args.pattern, search_flags, + args.count, args.color): + filename for filename in args.files} + for future in concurrent.futures.as_completed(fut_to_fname): + fname = fut_to_fname[future] try: - grep_book(book_fname, args.pattern, search_flags, args.count, args.color) - except BrokenPipeError: + data = future.result() + if data: + data = data.rstrip() + if len(data) > 0: + print(data) + except (BrokenPipeError, KeyboardInterrupt): sys.exit() @@ -4,7 +4,7 @@ from setuptools import setup, find_packages setup( name="epubgrep", - version="0.3.0", + version="0.4.0", description='Grep through EPub files', author=u'Matěj Cepl', author_email='mcepl@cepl.eu', |