Add search in descriptions.0.6.0

Also, refactor grep_book() function not to be so damn complicated.
author: Matěj Cepl <mcepl@cepl.eu> 2020-12-09 17:52:08 +0100
committer: Matěj Cepl <mcepl@cepl.eu> 2020-12-09 17:52:08 +0100
commit: 55b934da841e3c0eb7b391217280ac3470ca0fce (patch)
tree: 44be49ebb0f013e5c1b5ad2614a38f432c4a44e7
parent: f9544bfe5bc10065526e2a2a81bb30a7a334b472 (diff)
download: epubgrep-55b934da841e3c0eb7b391217280ac3470ca0fce.tar.gz
2 files changed, 110 insertions, 49 deletions
diff --git a/epubgrep.py b/epubgrep.py
index 4122906..8a9cfa8 100755
--- a/epubgrep.py
+++ b/epubgrep.py
@@ -16,7 +16,8 @@ logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
 log = logging.getLogger('epubgrep')
 
 
-def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple[str, int]]:
+def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) \
+        -> Optional[Tuple[str, int]]:
     if mdata is not None:
         found_list = [(x['title'], x['index'])
                       for x in mdata if x['src'] == fname]
@@ -27,69 +28,124 @@ def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple
             return ('Unknown', 0)
 
 
-def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, w_counting: bool=False, color: bool=False):
+def _colorize_found(dline: str, res: re.Match, col: bool) -> str:
+    out = ''
+    if col:
+        found_line = dline.replace(
+            res.group(1),
+            "\033[31;1m" + res.group(1) + "\033[31;0m")
+        out += '{}\n'.format(found_line)
+    else:
+        out += dline + '\n'
+    return out
+
+
+def _multiline_search(inf, sought_RE, printed_booktitle, filename,
+                      counting, w_counting, metadata, zif) -> Tuple[int, str]:
+    out = ""
+    count = 0
+    decoded_str = inf.read().decode(errors='replace')
+    res = sought_RE.search(decoded_str)
+    if res:
+        if not printed_booktitle:
+            out += '{}\n'.format(filename)
+            printed_booktitle = True
+        if counting or w_counting:
+            count += 1
+        else:
+            chap_info = get_chapter_title(metadata.toc,
+                                          zif.filename)
+            out += "{}. {}:\n\n".format(chap_info[1], chap_info[0])
+            out += '{}\n\n'.format(res.group(0))
+    return count, out
+
+
+def _singleline_search(inf, sought_RE, out_title, filename, counting,
+                       w_counting, printed_booktitle, metadata, zif, color,
+                       count) -> Tuple[int, str]:
+    out = ""
+    count = 0
+    printed_title = False
+    for line in inf:
+        decoded_line = line.decode(errors='replace').strip()
+        res = sought_RE.search(decoded_line)
+        if res:
+            if not out_title:
+                out_title = '{}'.format(filename)
+            if counting or w_counting:
+                count += 1
+            else:
+                if not printed_booktitle:
+                    out += out_title + '\n'
+                    printed_booktitle = True
+                if not printed_title:
+                    chap_info = get_chapter_title(metadata.toc,
+                                                  zif.filename)
+                    if chap_info is not None:
+                        out += "{}. {}:\n\n".format(chap_info[1],
+                                                    chap_info[0])
+                        printed_title = True
+            if not (counting or w_counting):
+                out += _colorize_found(decoded_line, res, color)
+    return count, out
+
+
+def _description_search(mdata: dict, sre: re.Pattern, fname: str,
+                        col: bool) -> str:
+    out = ''
+    decoded_line = mdata['description']
+    if decoded_line is None:
+        return out
+
+    res = sre.search(decoded_line)
+    if res:
+        title = '{}'.format(fname)
+        out += title + '\n'
+        out += _colorize_found(decoded_line, res, col)
+    return out
+
+
+def grep_book(filename: str, pattern: str, flags: int, desc: bool = False,
+              counting: bool = False, w_counting: bool = False,
+              color: bool = False) -> Optional[str]:
     assert os.path.isfile(filename), "{} is not EPub file.".format(filename)
     sought_RE = re.compile('(' + pattern + ')', flags)
     count = 0
+    icount = 0
     out_title = ''
     out = ''
+    iout = ''
 
     mline = flags & re.M == re.M
 
     try:
         metadata = epub_meta.get_epub_metadata(filename)
-    except epub_meta.EPubException:
+    except (epub_meta.EPubException, KeyError, IndexError):
         log.exception('Failed to open {}'.format(filename))
+        return None
     book = zipfile.ZipFile(filename)
     printed_booktitle = False
 
+    if desc:
+        return _description_search(metadata, sought_RE, filename, color)
+
     for zif in book.infolist():
         with book.open(zif) as inf:
             if mline:
-                decoded_str = inf.read().decode(errors='replace')
-                res = sought_RE.search(decoded_str)
-                if res:
-                    if not printed_booktitle:
-                        out += '{}\n'.format(filename)
-                        printed_booktitle = True
-                    if counting or w_counting:
-                        count += 1
-                    else:
-                        chap_info = get_chapter_title(metadata.toc,
-                                                      zif.filename)
-                        out += "{}. {}:\n\n".format(chap_info[1], chap_info[0])
-                        out += '{}\n\n'.format(res.group(0))
+                icount, iout = _multiline_search(inf, sought_RE,
+                                                 printed_booktitle, filename,
+                                                 counting, w_counting,
+                                                 metadata, zif)
+                count += icount
+                out += iout
             else:
-                printed_title = False
-                for line in inf:
-                    decoded_line = line.decode(errors='replace').strip()
-                    res = sought_RE.search(decoded_line)
-                    if res:
-                        if not out_title:
-                            out_title = '{}'.format(filename)
-                        if counting or w_counting:
-                            count += 1
-                        else:
-                            if not printed_booktitle:
-                                out += out_title + '\n'
-                                printed_booktitle = True
-                            if not printed_title:
-                                chap_info = get_chapter_title(metadata.toc,
-                                                              zif.filename)
-                                if chap_info is not None:
-                                    out += "{}. {}:\n\n".format(chap_info[1],
-                                                                chap_info[0])
-                                    printed_title = True
-                        # https://stackoverflow.com/a/33206814
-                        # print("\033[31;1;4mHello\033[0m")
-                        if not (counting or w_counting):
-                            if color:
-                                found_line = decoded_line.replace(
-                                    res.group(1),
-                                    "\033[31;1m" + res.group(1) + "\033[31;0m")
-                                out += '{}\n'.format(found_line)
-                            else:
-                                out += decoded_line + '\n'
+                icount, iout = _singleline_search(inf, sought_RE, out_title,
+                                                  filename, counting,
+                                                  w_counting,
+                                                  printed_booktitle, metadata,
+                                                  zif, color, count)
+                count += icount
+                out += iout
 
     if count > 0:
         if counting:
@@ -111,6 +167,9 @@ def main():
     parser.add_argument('-C', '--weighted-count',
                         action='store_true',
                         help="counts of found patterns as a proportion of whole text")
+    parser.add_argument('-d', '--description',
+                        action='store_true',
+                        help="search just in descriptions")
     parser.add_argument('-i', '--ignore-case',
                         action='store_true',
                         help="make search case insensitive")
@@ -134,14 +193,16 @@ def main():
         fut_to_fname = {executor.submit(grep_book,
                                         os.path.realpath(filename),
                                         args.pattern, search_flags,
-                                        args.count, args.weighted_count, args.color):
+                                        args.description,
+                                        args.count, args.weighted_count,
+                                        args.color):
                         filename for filename in args.files}
     for future in concurrent.futures.as_completed(fut_to_fname):
         try:
             data = future.result()
             if data:
                 data = data.rstrip()
-            if len(data) > 0:
+            if (data is not None) and len(data) > 0:
                 print(data)
         except (BrokenPipeError, KeyboardInterrupt):
             sys.exit()
diff --git a/setup.py b/setup.py
index 804ccd4..077de0a 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
 
 setup(
     name="epubgrep",
-    version="0.5.0",
+    version="0.6.0",
     description='Grep through EPub files',
     author=u'Matěj Cepl',
     author_email='mcepl@cepl.eu',
author	Matěj Cepl <mcepl@cepl.eu>	2020-12-09 17:52:08 +0100
committer	Matěj Cepl <mcepl@cepl.eu>	2020-12-09 17:52:08 +0100
commit	55b934da841e3c0eb7b391217280ac3470ca0fce (patch)
tree	44be49ebb0f013e5c1b5ad2614a38f432c4a44e7
parent	f9544bfe5bc10065526e2a2a81bb30a7a334b472 (diff)
download	epubgrep-55b934da841e3c0eb7b391217280ac3470ca0fce.tar.gz