aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@cepl.eu>2020-12-09 17:52:08 +0100
committerMatěj Cepl <mcepl@cepl.eu>2020-12-09 17:52:08 +0100
commit55b934da841e3c0eb7b391217280ac3470ca0fce (patch)
tree44be49ebb0f013e5c1b5ad2614a38f432c4a44e7
parentf9544bfe5bc10065526e2a2a81bb30a7a334b472 (diff)
downloadepubgrep-55b934da841e3c0eb7b391217280ac3470ca0fce.tar.gz
Add search in descriptions.0.6.0
Also, refactor grep_book() function not to be so damn complicated.
-rwxr-xr-xepubgrep.py157
-rw-r--r--setup.py2
2 files changed, 110 insertions, 49 deletions
diff --git a/epubgrep.py b/epubgrep.py
index 4122906..8a9cfa8 100755
--- a/epubgrep.py
+++ b/epubgrep.py
@@ -16,7 +16,8 @@ logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
log = logging.getLogger('epubgrep')
-def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple[str, int]]:
+def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) \
+ -> Optional[Tuple[str, int]]:
if mdata is not None:
found_list = [(x['title'], x['index'])
for x in mdata if x['src'] == fname]
@@ -27,69 +28,124 @@ def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple
return ('Unknown', 0)
-def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, w_counting: bool=False, color: bool=False):
+def _colorize_found(dline: str, res: re.Match, col: bool) -> str:
+ out = ''
+ if col:
+ found_line = dline.replace(
+ res.group(1),
+ "\033[31;1m" + res.group(1) + "\033[31;0m")
+ out += '{}\n'.format(found_line)
+ else:
+ out += dline + '\n'
+ return out
+
+
+def _multiline_search(inf, sought_RE, printed_booktitle, filename,
+ counting, w_counting, metadata, zif) -> Tuple[int, str]:
+ out = ""
+ count = 0
+ decoded_str = inf.read().decode(errors='replace')
+ res = sought_RE.search(decoded_str)
+ if res:
+ if not printed_booktitle:
+ out += '{}\n'.format(filename)
+ printed_booktitle = True
+ if counting or w_counting:
+ count += 1
+ else:
+ chap_info = get_chapter_title(metadata.toc,
+ zif.filename)
+ out += "{}. {}:\n\n".format(chap_info[1], chap_info[0])
+ out += '{}\n\n'.format(res.group(0))
+ return count, out
+
+
+def _singleline_search(inf, sought_RE, out_title, filename, counting,
+ w_counting, printed_booktitle, metadata, zif, color,
+ count) -> Tuple[int, str]:
+ out = ""
+ count = 0
+ printed_title = False
+ for line in inf:
+ decoded_line = line.decode(errors='replace').strip()
+ res = sought_RE.search(decoded_line)
+ if res:
+ if not out_title:
+ out_title = '{}'.format(filename)
+ if counting or w_counting:
+ count += 1
+ else:
+ if not printed_booktitle:
+ out += out_title + '\n'
+ printed_booktitle = True
+ if not printed_title:
+ chap_info = get_chapter_title(metadata.toc,
+ zif.filename)
+ if chap_info is not None:
+ out += "{}. {}:\n\n".format(chap_info[1],
+ chap_info[0])
+ printed_title = True
+ if not (counting or w_counting):
+ out += _colorize_found(decoded_line, res, color)
+ return count, out
+
+
+def _description_search(mdata: dict, sre: re.Pattern, fname: str,
+ col: bool) -> str:
+ out = ''
+ decoded_line = mdata['description']
+ if decoded_line is None:
+ return out
+
+ res = sre.search(decoded_line)
+ if res:
+ title = '{}'.format(fname)
+ out += title + '\n'
+ out += _colorize_found(decoded_line, res, col)
+ return out
+
+
+def grep_book(filename: str, pattern: str, flags: int, desc: bool = False,
+ counting: bool = False, w_counting: bool = False,
+ color: bool = False) -> Optional[str]:
assert os.path.isfile(filename), "{} is not EPub file.".format(filename)
sought_RE = re.compile('(' + pattern + ')', flags)
count = 0
+ icount = 0
out_title = ''
out = ''
+ iout = ''
mline = flags & re.M == re.M
try:
metadata = epub_meta.get_epub_metadata(filename)
- except epub_meta.EPubException:
+ except (epub_meta.EPubException, KeyError, IndexError):
log.exception('Failed to open {}'.format(filename))
+ return None
book = zipfile.ZipFile(filename)
printed_booktitle = False
+ if desc:
+ return _description_search(metadata, sought_RE, filename, color)
+
for zif in book.infolist():
with book.open(zif) as inf:
if mline:
- decoded_str = inf.read().decode(errors='replace')
- res = sought_RE.search(decoded_str)
- if res:
- if not printed_booktitle:
- out += '{}\n'.format(filename)
- printed_booktitle = True
- if counting or w_counting:
- count += 1
- else:
- chap_info = get_chapter_title(metadata.toc,
- zif.filename)
- out += "{}. {}:\n\n".format(chap_info[1], chap_info[0])
- out += '{}\n\n'.format(res.group(0))
+ icount, iout = _multiline_search(inf, sought_RE,
+ printed_booktitle, filename,
+ counting, w_counting,
+ metadata, zif)
+ count += icount
+ out += iout
else:
- printed_title = False
- for line in inf:
- decoded_line = line.decode(errors='replace').strip()
- res = sought_RE.search(decoded_line)
- if res:
- if not out_title:
- out_title = '{}'.format(filename)
- if counting or w_counting:
- count += 1
- else:
- if not printed_booktitle:
- out += out_title + '\n'
- printed_booktitle = True
- if not printed_title:
- chap_info = get_chapter_title(metadata.toc,
- zif.filename)
- if chap_info is not None:
- out += "{}. {}:\n\n".format(chap_info[1],
- chap_info[0])
- printed_title = True
- # https://stackoverflow.com/a/33206814
- # print("\033[31;1;4mHello\033[0m")
- if not (counting or w_counting):
- if color:
- found_line = decoded_line.replace(
- res.group(1),
- "\033[31;1m" + res.group(1) + "\033[31;0m")
- out += '{}\n'.format(found_line)
- else:
- out += decoded_line + '\n'
+ icount, iout = _singleline_search(inf, sought_RE, out_title,
+ filename, counting,
+ w_counting,
+ printed_booktitle, metadata,
+ zif, color, count)
+ count += icount
+ out += iout
if count > 0:
if counting:
@@ -111,6 +167,9 @@ def main():
parser.add_argument('-C', '--weighted-count',
action='store_true',
help="counts of found patterns as a proportion of whole text")
+ parser.add_argument('-d', '--description',
+ action='store_true',
+ help="search just in descriptions")
parser.add_argument('-i', '--ignore-case',
action='store_true',
help="make search case insensitive")
@@ -134,14 +193,16 @@ def main():
fut_to_fname = {executor.submit(grep_book,
os.path.realpath(filename),
args.pattern, search_flags,
- args.count, args.weighted_count, args.color):
+ args.description,
+ args.count, args.weighted_count,
+ args.color):
filename for filename in args.files}
for future in concurrent.futures.as_completed(fut_to_fname):
try:
data = future.result()
if data:
data = data.rstrip()
- if len(data) > 0:
+ if (data is not None) and len(data) > 0:
print(data)
except (BrokenPipeError, KeyboardInterrupt):
sys.exit()
diff --git a/setup.py b/setup.py
index 804ccd4..077de0a 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
setup(
name="epubgrep",
- version="0.5.0",
+ version="0.6.0",
description='Grep through EPub files',
author=u'Matěj Cepl',
author_email='mcepl@cepl.eu',