aboutsummaryrefslogblamecommitdiffstats
path: root/epubgrep.py
blob: 78728d7f2b9d7262ce223b9a01adf198b048b18e (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11

                      
              







                                                   





                                                                                            
                                                                               
                                            






                                                             
                                                       
                                                                            

                                          

                                

                                                    
                             


                                   



                                                                 


                                                    
















                                                                                 
 
 






                                                                          


                                                      
                              
                                




                            


                                   

                                                     
#!/usr/bin/env python3
import argparse
import logging
import os.path
import re
import zipfile

from typing import Any, Dict, List, Optional, Tuple

import epub_meta

logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
                    level=logging.INFO)
log = logging.getLogger('epubgrep')


def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple[str, int]]:
    found_list = [(x['title'], x['index']) for x in mdata if x['src'] == fname]
    log.debug('found_list = %s', found_list)
    if len(found_list) > 0:
        chap_title = found_list[0][0].strip(' \t.0123456789')
        return chap_title, found_list[0][1]
    else:
        return ('Unknown', 0)


def grep_book(filename: str, pattern: str, flags: int):
    assert os.path.isfile(filename), "{} is not EPub file.".format(filename)
    sought_RE = re.compile(pattern, flags)

    mline = flags & re.M == re.M

    metadata = epub_meta.get_epub_metadata(filename)
    book = zipfile.ZipFile(filename)
    printed_booktitle = False

    for zif in book.infolist():
        with book.open(zif) as inf:
            if mline:
                decoded_str = inf.read().decode(errors='replace')
                res = sought_RE.search(decoded_str)
                if res:
                    if not printed_booktitle:
                        print('{}'.format(filename))
                        printed_booktitle = True
                    chap_info = get_chapter_title(metadata.toc, zif.filename)
                    print("{}. {}:\n".format(chap_info[1], chap_info[0]))
                    print('{}\n'.format(res.group(0)))
            else:
                printed_title = False
                for line in inf:
                    decoded_line = line.decode(errors='replace').strip()
                    if sought_RE.search(decoded_line):
                        if not printed_booktitle:
                            print('{}'.format(filename))
                            printed_booktitle = True
                        if not printed_title:
                            chap_info = get_chapter_title(metadata.toc,
                                                          zif.filename)
                            print("{}. {}:\n".format(chap_info[1], chap_info[0]))
                            printed_title = True
                        print(decoded_line)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Grep through EPub book')
    parser.add_argument('pattern')
    parser.add_argument('filename')
    parser.add_argument('-i', '--ignore-case',
                        action='store_true',
                        help="make search case insensitive")
    parser.add_argument('-m', '--multi-line',
                        action='store_true',
                        help="make search multi line")
    args = parser.parse_args()
    log.debug('args = %s', args)

    search_flags = 0
    if args.ignore_case:
        search_flags |= re.I

    if args.multi_line:
        search_flags |= re.M | re.S

    book_fname = os.path.realpath(args.filename)
    grep_book(book_fname, args.pattern, search_flags)