When searching through metadata, search through tags as well.

author: Matěj Cepl <mcepl@cepl.eu> 2021-05-09 10:49:02 +0200
committer: Matěj Cepl <mcepl@cepl.eu> 2021-05-09 10:52:18 +0200
commit: 5f8b8cdbd7c5706d00c86e098660f65586a282c8 (patch)
tree: aa763c6ea074f96871113e647adb2c8d296a3cb0 /epubgrep.py
parent: e34704aada1840fa2b8c8629e15ed99dcd000374 (diff)
download: epubgrep-5f8b8cdbd7c5706d00c86e098660f65586a282c8.tar.gz
1 files changed, 33 insertions, 15 deletions
diff --git a/epubgrep.py b/epubgrep.py
index c66a326..189e8b7 100755
--- a/epubgrep.py
+++ b/epubgrep.py
@@ -12,7 +12,7 @@ from typing import Any, Dict, List, Optional, Tuple
 import epub_meta
 
 logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
-                    level=logging.DEBUG)
+                    level=logging.INFO)
 log = logging.getLogger('epubgrep')
 
 
@@ -85,18 +85,35 @@ def _singleline_search(inf, sought_RE, out_title, filename, counting,
     return count, out
 
 
-def _description_search(mdata: dict, sre: re.Pattern, fname: str,
+def _metadata_search(mdata: dict, sre: re.Pattern, fname: str,
                         col: bool) -> str:
+    """
+    Search through metadata, not text.
+
+    :param: mdata: complete metadata to search through
+    :param: sre: re.Pattern to search
+    :param: fname: filename of the book
+    :param: col: should we colorize the output
+    """
     out = ''
-    decoded_line = mdata['description']
-    if decoded_line is None:
-        return out
+    decoded_line = mdata.get('description')
+    tags = mdata.get('subject')
 
-    res = sre.search(decoded_line)
-    if res:
-        title = '\n{}'.format(fname)
-        out += title + '\n'
-        out += _colorize_found(decoded_line, res, col)
+    if decoded_line:
+        res = sre.search(decoded_line)
+        if res:
+            title = f'\n{fname}'
+            out += title + '\n'
+            out += _colorize_found(decoded_line, res, col)
+        title = ''
+    if tags:
+        for tag in tags:
+            res = sre.search(tag)
+            if res:
+                if not title:
+                    title = f'\n{fname}'
+                    out += f'{title}\n'
+                out += _colorize_found(tag, res, col)
     return out
 
 
@@ -120,8 +137,8 @@ def grep_book(filename: str, opts: argparse.Namespace,
     book = zipfile.ZipFile(filename)
     printed_booktitle = False
 
-    if opts.description:
-        return _description_search(metadata, sought_RE, filename, opts.color)
+    if opts.metadata:
+        return _metadata_search(metadata, sought_RE, filename, opts.color)
 
     for zif in book.infolist():
         with book.open(zif) as inf:
@@ -166,10 +183,11 @@ def main():
                         help="just counts of found patterns")
     parser.add_argument('-C', '--weighted-count',
                         action='store_true',
-                        help="counts of found patterns as a proportion of whole text")
-    parser.add_argument('-d', '--description',
+                        help="counts of found patterns " +
+                             "as a proportion of whole text")
+    parser.add_argument('-d', '--metadata',
                         action='store_true',
-                        help="search just in descriptions")
+                        help="search just in metadata")
     parser.add_argument('-i', '--ignore-case',
                         action='store_true',
                         help="make search case insensitive")
author	Matěj Cepl <mcepl@cepl.eu>	2021-05-09 10:49:02 +0200
committer	Matěj Cepl <mcepl@cepl.eu>	2021-05-09 10:52:18 +0200
commit	5f8b8cdbd7c5706d00c86e098660f65586a282c8 (patch)
tree	aa763c6ea074f96871113e647adb2c8d296a3cb0 /epubgrep.py
parent	e34704aada1840fa2b8c8629e15ed99dcd000374 (diff)
download	epubgrep-5f8b8cdbd7c5706d00c86e098660f65586a282c8.tar.gz