aboutsummaryrefslogtreecommitdiffstats
path: root/filters
diff options
context:
space:
mode:
authorRobin Jarry <robin@jarry.cc>2024-06-24 22:53:53 +0200
committerRobin Jarry <robin@jarry.cc>2024-06-25 21:04:35 +0200
commit7c5a1afbda60ec81e84161306c2cf71d974b341a (patch)
tree2e4e9770e2a28805baa23976a6c0383a24992dd0 /filters
parent4e920d1def515f2c6d7da5168e39fd89fb200f63 (diff)
downloadaerc-7c5a1afbda60ec81e84161306c2cf71d974b341a.tar.gz
wrap: fix cjk prose ratio on macos
Depending on the locale and the libc implementation, iswalpha() may return true or false for CJK symbols. Reuse the same logic as in the split point detection introduced in commit 99bc69918ea7 ("wrap: fix wide CJK characters support"). Include all missing Korean and Japanese specific Unicode blocks. Handle syllabic symbols with a parameter to avoid wrapping in the middle of syllables. Signed-off-by: Robin Jarry <robin@jarry.cc> Tested-by: Gregory Anders <greg@gpanders.com>
Diffstat (limited to 'filters')
-rw-r--r--filters/wrap.c61
1 files changed, 46 insertions, 15 deletions
diff --git a/filters/wrap.c b/filters/wrap.c
index c3109747..1a8d5810 100644
--- a/filters/wrap.c
+++ b/filters/wrap.c
@@ -209,6 +209,49 @@ static size_t list_item_offset(const wchar_t *buf)
return i;
}
+static bool is_cjk(wchar_t c, bool include_syllables) {
+ /* CJK Radicals Supplement */
+ if (c >= 0x2e80 && c <= 0x2fd5)
+ return true;
+ /* CJK Compatibility */
+ if (c >= 0x3300 && c <= 0x33ff)
+ return true;
+ /* CJK Unified Ideographs Extension A */
+ if (c >= 0x3400 && c <= 0x4db5)
+ return true;
+ /* CJK Unified Ideographs */
+ if (c >= 0x4e00 && c <= 0x9fcb)
+ return true;
+ /* CJK Compatibility Ideographs */
+ if (c >= 0xf900 && c <= 0xfa6a)
+ return true;
+ /* Hangul Jamo */
+ if (c >= 0x1100 && c <= 0x11ff)
+ return true;
+ /* Hangul Compatibility Jamo */
+ if (c >= 0x3130 && c <= 0x318f)
+ return true;
+ /* Hangul Jamo Extended-A */
+ if (c >= 0xa960 && c <= 0xa97f)
+ return true;
+ /* Hangul Jamo Extended-B */
+ if (c >= 0xd7b0 && c <= 0xd7ff)
+ return true;
+
+ if (include_syllables) {
+ /* Japanese Hiragana */
+ if (c >= 0x3040 && c <= 0x309f)
+ return true;
+ /* Japanese Katakana */
+ if (c >= 0x30a0 && c <= 0x30ff)
+ return true;
+ /* Hangul Syllables */
+ if (c >= 0xac00 && c <= 0xd7af)
+ return true;
+ }
+ return false;
+}
+
static struct paragraph *parse_line(const wchar_t *buf)
{
size_t i, q, t, e, letters, indent_len, text_len;
@@ -251,7 +294,8 @@ static struct paragraph *parse_line(const wchar_t *buf)
e = t;
letters = 0;
while (buf[e] != L'\0') {
- if (iswalpha((wint_t)buf[e++])) {
+ wchar_t c = buf[e++];
+ if (iswalpha((wint_t)c) || is_cjk(c, true)) {
letters++;
}
}
@@ -351,20 +395,7 @@ static bool is_split_point(const wchar_t c)
if (iswspace((wint_t)c))
return true;
- /* CJK Radicals Supplement */
- if (c >= 0x2e80 && c <= 0x2fd5)
- return true;
- /* CJK Compatibility */
- if (c >= 0x3300 && c <= 0x33ff)
- return true;
- /* CJK Unified Ideographs Extension A */
- if (c >= 0x3400 && c <= 0x4db5)
- return true;
- /* CJK Unified Ideographs */
- if (c >= 0x4e00 && c <= 0x9fcb)
- return true;
- /* CJK Compatibility Ideographs */
- if (c >= 0xf900 && c <= 0xfa6a)
+ if (is_cjk(c, false))
return true;
return false;