aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@cepl.eu>2020-06-18 10:33:02 +0200
committerMatěj Cepl <mcepl@cepl.eu>2020-06-18 10:34:56 +0200
commitc4a7abd99f96f37ef0b18b1fd8e3660470a98829 (patch)
tree53dcb0e00cb7cf3c144948b25eb65f7fe79ac5ef
parentb5a7cf08178b587942c63eb119f66a5411503d1a (diff)
downloadepubgrep-0.4.0.tar.gz
Make epubgrep concurrent for speed improvement.0.4.0
Surprisingly ProcessPoolExecutor is vastly faster and less memory demanding than ThreadPoolExecutor.
-rwxr-xr-xepubgrep.py54
-rw-r--r--setup.py2
2 files changed, 38 insertions, 18 deletions
diff --git a/epubgrep.py b/epubgrep.py
index 0451c17..cf70bcf 100755
--- a/epubgrep.py
+++ b/epubgrep.py
@@ -1,5 +1,6 @@
#!/usr/bin/env python3
import argparse
+import concurrent.futures
import logging
import os.path
import re
@@ -17,7 +18,8 @@ log = logging.getLogger('epubgrep')
def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple[str, int]]:
if mdata is not None:
- found_list = [(x['title'], x['index']) for x in mdata if x['src'] == fname]
+ found_list = [(x['title'], x['index'])
+ for x in mdata if x['src'] == fname]
if len(found_list) > 0:
chap_title = found_list[0][0].strip(' \t.0123456789')
return chap_title, found_list[0][1]
@@ -29,12 +31,14 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col
assert os.path.isfile(filename), "{} is not EPub file.".format(filename)
sought_RE = re.compile('(' + pattern + ')', flags)
count = 0
+ out_title = ''
+ out = ''
mline = flags & re.M == re.M
try:
metadata = epub_meta.get_epub_metadata(filename)
- except epub_meta.EPubException as ex:
+ except epub_meta.EPubException:
log.exception('Failed to open {}'.format(filename))
book = zipfile.ZipFile(filename)
printed_booktitle = False
@@ -46,31 +50,35 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col
res = sought_RE.search(decoded_str)
if res:
if not printed_booktitle:
- print('{}'.format(filename))
+ out += '{}\n'.format(filename)
printed_booktitle = True
if counting:
count += 1
else:
- chap_info = get_chapter_title(metadata.toc, zif.filename)
- print("{}. {}:\n".format(chap_info[1], chap_info[0]))
- print('{}\n'.format(res.group(0)))
+ chap_info = get_chapter_title(metadata.toc,
+ zif.filename)
+ out += "{}. {}:\n\n".format(chap_info[1], chap_info[0])
+ out += '{}\n\n'.format(res.group(0))
else:
printed_title = False
for line in inf:
decoded_line = line.decode(errors='replace').strip()
res = sought_RE.search(decoded_line)
if res:
- if not printed_booktitle:
- print('{}'.format(filename))
- printed_booktitle = True
+ if not out_title:
+ out_title = '{}'.format(filename)
if counting:
count += 1
else:
+ if not printed_booktitle:
+ out += out_title + '\n'
+ printed_booktitle = True
if not printed_title:
chap_info = get_chapter_title(metadata.toc,
zif.filename)
if chap_info is not None:
- print("{}. {}:\n".format(chap_info[1], chap_info[0]))
+ out += "{}. {}:\n\n".format(chap_info[1],
+ chap_info[0])
printed_title = True
# https://stackoverflow.com/a/33206814
# print("\033[31;1;4mHello\033[0m")
@@ -79,12 +87,14 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col
found_line = decoded_line.replace(
res.group(1),
"\033[31;1m" + res.group(1) + "\033[31;0m")
- print('{}'.format(found_line))
+ out += '{}\n'.format(found_line)
else:
- print(decoded_line)
+ out += decoded_line + '\n'
if count > 0:
- print('Found: {}'.format(count))
+ out += '{:02d}:{}'.format(count, out_title)
+
+ return out
if __name__ == "__main__":
@@ -113,9 +123,19 @@ if __name__ == "__main__":
if args.multi_line:
search_flags |= re.M | re.S
- for filename in args.files:
- book_fname = os.path.realpath(filename)
+ with concurrent.futures.ProcessPoolExecutor() as executor:
+ fut_to_fname = {executor.submit(grep_book,
+ os.path.realpath(filename),
+ args.pattern, search_flags,
+ args.count, args.color):
+ filename for filename in args.files}
+ for future in concurrent.futures.as_completed(fut_to_fname):
+ fname = fut_to_fname[future]
try:
- grep_book(book_fname, args.pattern, search_flags, args.count, args.color)
- except BrokenPipeError:
+ data = future.result()
+ if data:
+ data = data.rstrip()
+ if len(data) > 0:
+ print(data)
+ except (BrokenPipeError, KeyboardInterrupt):
sys.exit()
diff --git a/setup.py b/setup.py
index e55dd19..6fb4595 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
setup(
name="epubgrep",
- version="0.3.0",
+ version="0.4.0",
description='Grep through EPub files',
author=u'Matěj Cepl',
author_email='mcepl@cepl.eu',