diff options
author | benadha <benawiadha@gmail.com> | 2021-01-29 20:07:56 +0700 |
---|---|---|
committer | benadha <benawiadha@gmail.com> | 2021-01-29 20:07:56 +0700 |
commit | 5d523c113a8a5b03eb2a9076ef893212c1c0c3d6 (patch) | |
tree | 79ba6147f0f05013a5f53f48ebc00a24a0af6e1e /epy.py | |
parent | 8f1113c2d2ec193561a49704c4a07a41ec2bc8cb (diff) | |
download | epy-5d523c113a8a5b03eb2a9076ef893212c1c0c3d6.tar.gz |
New scheme for sections parsing
Diffstat (limited to 'epy.py')
-rwxr-xr-x | epy.py | 20 |
1 files changed, 14 insertions, 6 deletions
@@ -14,7 +14,7 @@ Options: """ -__version__ = "2020.11.19" +__version__ = "2021.1.29" __license__ = "GPL-3.0" __author__ = "Benawi Adha" __email__ = "benawiadha@gmail.com" @@ -414,6 +414,7 @@ class HTMLtoLines(HTMLParser): self.idbull = set() self.idpref = set() self.sects = sects + self.sectsindex = {} def handle_starttag(self, tag, attrs): if re.match("h[1-6]", tag) is not None: @@ -439,7 +440,9 @@ class HTMLtoLines(HTMLParser): if self.sects != {""}: for i in attrs: if i[0] == "id" and i[1] in self.sects: - self.text[-1] += " (#" + i[1] + ") " + # self.text[-1] += " (#" + i[1] + ") " + # self.sectsindex.append([len(self.text), i[1]]) + self.sectsindex[len(self.text)-1] = i[1] def handle_startendtag(self, tag, attrs): if tag == "br": @@ -456,7 +459,8 @@ class HTMLtoLines(HTMLParser): if self.sects != {""}: for i in attrs: if i[0] == "id" and i[1] in self.sects: - self.text[-1] += " (#" + i[1] + ") " + # self.text[-1] += " (#" + i[1] + ") " + self.sectsindex[len(self.text)-1] = i[1] def handle_endtag(self, tag): if re.match("h[1-6]", tag) is not None: @@ -510,9 +514,13 @@ class HTMLtoLines(HTMLParser): return self.text for n, i in enumerate(self.text): findsect = re.search(r"(?<= \(#).*?(?=\) )", i) - if findsect is not None and findsect.group() in self.sects: - i = i.replace(" (#" + findsect.group() + ") ", "") - sect[findsect.group()] = len(text) + # findsect = re.search(r"(?<= \(#).*?(?=\) )", i) + # if findsect is not None and findsect.group() in self.sects: + # i = i.replace(" (#" + findsect.group() + ") ", "") + # # i = i.replace(" (#" + findsect.group() + ") ", " "*(5+len(findsect.group()))) + # sect[findsect.group()] = len(text) + if n in self.sectsindex.keys(): + sect[self.sectsindex[n]] = len(text)-1 if n in self.idhead: text += [i.rjust(width//2 + len(i)//2)] + [""] elif n in self.idinde: |