aboutsummaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorbenadha <benawiadha@gmail.com>2022-01-10 20:38:20 +0700
committerbenadha <benawiadha@gmail.com>2022-01-10 20:38:20 +0700
commite93199843aa9a03e3fdd2ce6ee2d60ac36e43518 (patch)
tree8211c2a4627ce7a60c4a47006175b9a7df386f84 /tests
parentbb20ff2710af248a5e8b2e91f66018ce028a98ee (diff)
downloadepy-e93199843aa9a03e3fdd2ce6ee2d60ac36e43518.tar.gz
Restructured tests
Diffstat (limited to 'tests')
-rw-r--r--tests/test_text_parser.py132
1 files changed, 132 insertions, 0 deletions
diff --git a/tests/test_text_parser.py b/tests/test_text_parser.py
new file mode 100644
index 0000000..3c61657
--- /dev/null
+++ b/tests/test_text_parser.py
@@ -0,0 +1,132 @@
+from collections import namedtuple
+
+from epy import CharPos, TextMark, TextSpan, resolve_path, HTMLtoLines
+
+
+def test_resolve_path():
+ UnresolvedPath = namedtuple("UnresolvedPath", ["current_dir", "relative_path"])
+
+ inputs = [
+ UnresolvedPath("/aaa/bbb/book.html", "../ccc.png"),
+ UnresolvedPath("/aaa/bbb/book.html", "../../ccc.png"),
+ UnresolvedPath("aaa/bbb/book.html", "../../ccc.png"),
+ ]
+
+ expecteds = [
+ "/aaa/ccc.png",
+ "/ccc.png",
+ "ccc.png",
+ ]
+
+ for input, expected in zip(inputs, expecteds):
+ assert resolve_path(input.current_dir, input.relative_path) == expected
+
+
+def test_mark_to_span():
+ text = [
+ "Lorem ipsum dolor sit amet,",
+ "consectetur adipiscing elit.",
+ "Curabitur rutrum massa", # 2
+ "pretium, pulvinar ligula a,", # 3
+ "aliquam est. Proin ut lectus", # 4
+ "ac massa fermentum commodo.", # 5
+ "Duis ac urna a felis mollis",
+ "laoreet. Nullam finibus nibh",
+ "convallis, commodo nisl sit",
+ "amet, vestibulum mauris. Nulla",
+ "lacinia ultrices lacinia. Duis",
+ "auctor nunc non felis",
+ "ultricies, ut egestas tellus",
+ "rhoncus. Aenean ultrices",
+ "efficitur lacinia. Aliquam",
+ "eros lacus, luctus eu lacinia",
+ "in, eleifend nec nunc. Nam",
+ "condimentum malesuada",
+ "facilisis.",
+ ]
+
+ assert HTMLtoLines._mark_to_spans(
+ text, [TextMark(start=CharPos(row=2, col=3), end=CharPos(row=2, col=19))]
+ ) == [TextSpan(start=CharPos(row=2, col=3), n_letters=16)]
+
+ assert HTMLtoLines._mark_to_spans(
+ text,
+ [
+ TextMark(start=CharPos(row=2, col=3), end=CharPos(row=3, col=5)),
+ ],
+ ) == [
+ TextSpan(start=CharPos(row=2, col=3), n_letters=19),
+ TextSpan(start=CharPos(row=3, col=0), n_letters=5),
+ ]
+
+ assert HTMLtoLines._mark_to_spans(
+ text,
+ [
+ TextMark(start=CharPos(row=2, col=3), end=CharPos(row=5, col=3)),
+ ],
+ ) == [
+ TextSpan(start=CharPos(row=2, col=3), n_letters=19),
+ TextSpan(start=CharPos(row=3, col=0), n_letters=27),
+ TextSpan(start=CharPos(row=4, col=0), n_letters=28),
+ TextSpan(start=CharPos(row=5, col=0), n_letters=3),
+ ]
+
+
+def test_span_adjustment():
+ # 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur rutrum massa.'
+
+ text = [
+ "Lorem ipsum dolor",
+ "sit amet,",
+ "consectetur",
+ "adipiscing elit.",
+ "Curabitur rutrum",
+ "massa.",
+ ]
+
+ assert HTMLtoLines._adjust_wrapped_spans(
+ text, TextSpan(start=CharPos(row=0, col=2), n_letters=0)
+ ) == [TextSpan(start=CharPos(row=0, col=2), n_letters=0)]
+
+ assert HTMLtoLines._adjust_wrapped_spans(
+ text, TextSpan(start=CharPos(row=0, col=2), n_letters=5)
+ ) == [TextSpan(start=CharPos(row=0, col=2), n_letters=5)]
+
+ assert HTMLtoLines._adjust_wrapped_spans(
+ text, TextSpan(start=CharPos(row=0, col=15), n_letters=2)
+ ) == [TextSpan(start=CharPos(row=0, col=15), n_letters=2)]
+
+ assert HTMLtoLines._adjust_wrapped_spans(
+ text, TextSpan(start=CharPos(row=0, col=14), n_letters=7)
+ ) == [
+ TextSpan(start=CharPos(row=0, col=14), n_letters=3),
+ TextSpan(start=CharPos(row=1, col=0), n_letters=4),
+ ]
+
+ # assert HTMLtoLines._adjust_wrapped_spans(
+ # text, TextSpan(start=CharPos(row=1, col=7), n_letters=20)
+ # ) == [TextSpan(start=CharPos(row=0, col=14), n_letters=3), TextSpan(start=CharPos(row=1, col=0), n_letters=4)]
+
+
+def test_group_blocks():
+ block_list = [
+ TextSpan(start=CharPos(row=0, col=0), n_letters=4),
+ TextSpan(start=CharPos(row=1, col=0), n_letters=4),
+ TextSpan(start=CharPos(row=3, col=0), n_letters=4),
+ TextSpan(start=CharPos(row=3, col=0), n_letters=4),
+ TextSpan(start=CharPos(row=15, col=0), n_letters=4),
+ TextSpan(start=CharPos(row=15, col=0), n_letters=4),
+ ]
+
+ assert HTMLtoLines._group_spans_by_row(block_list) == {
+ 0: [TextSpan(start=CharPos(row=0, col=0), n_letters=4)],
+ 1: [TextSpan(start=CharPos(row=1, col=0), n_letters=4)],
+ 3: [
+ TextSpan(start=CharPos(row=3, col=0), n_letters=4),
+ TextSpan(start=CharPos(row=3, col=0), n_letters=4),
+ ],
+ 15: [
+ TextSpan(start=CharPos(row=15, col=0), n_letters=4),
+ TextSpan(start=CharPos(row=15, col=0), n_letters=4),
+ ],
+ }