summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--regress/char/unicode/input.inbin2427 -> 2588 bytes
-rw-r--r--regress/char/unicode/input.out_ascii32
-rw-r--r--regress/char/unicode/input.out_lint46
-rw-r--r--regress/char/unicode/input.out_utf832
4 files changed, 57 insertions, 53 deletions
diff --git a/regress/char/unicode/input.in b/regress/char/unicode/input.in
index b0d9c7fc..cc312340 100644
--- a/regress/char/unicode/input.in
+++ b/regress/char/unicode/input.in
Binary files differ
diff --git a/regress/char/unicode/input.out_ascii b/regress/char/unicode/input.out_ascii
index 410bdc85..66e904de 100644
--- a/regress/char/unicode/input.out_ascii
+++ b/regress/char/unicode/input.out_ascii
@@ -20,10 +20,10 @@ DDEESSCCRRIIPPTTIIOONN
U+0000 0xc080 ?? lowest obfuscated ASCII
U+007f 0xc1bf ?? highest obfuscated ASCII
- 0xc278 ?x ASCII continuation
U+0080 0xc280 <80><80> lowest two-byte
- 0xc2c380 ?`A high continuation
U+07FF 0xdfbf <?><?> highest two-byte
+ 0xc278 ?x ASCII instead of continuation
+ 0xc2c380 ?`A start byte instead of continuation
TThhrreeee--bbyyttee rraannggee
@@ -32,10 +32,10 @@ DDEESSCCRRIIPPTTIIOONN
U+0080 0xe08280 ??? lowest obfuscated two-byte
U+07FF 0xe09fbf ??? highest obfuscated two-byte
U+0800 0xe0a080 <?><?> lowest three-byte
- U+0FFF 0xe0bfbf <?><?> end of first middle byte
- U+1000 0xe18080 <?><?> begin of second middle byte
- U+CFFF 0xecbfbf <?><?> end of last normal middle byte
- U+D000 0xed8080 <?><?> begin of strange middle byte
+ U+0FFF 0xe0bfbf <?><?> end of first start byte
+ U+1000 0xe18080 <?><?> begin of second start byte
+ U+CFFF 0xecbfbf <?><?> end of last normal start byte
+ U+D000 0xed8080 <?><?> begin of last start byte
U+D7FF 0xed9fbf <?><?> highest public three-byte
U+D800 0xeda080 ??? lowest surrogate
U+DFFF 0xedbfbf ??? highest surrogate
@@ -51,17 +51,19 @@ DDEESSCCRRIIPPTTIIOONN
U+0800 0xf080a080 ???? lowest obfuscated three-byte
U+FFFF 0xf08fbfbf ???? highest obfuscated three-byte
U+10000 0xf0908080 <?><?> lowest four-byte
- U+3FFFF 0xf0bfbfbf <?><?> end of first middle byte
- U+40000 0xf1808080 <?><?> second middle byte
- U+FFFFF 0xf3bfbfbf <?><?> last normal middle byte
- U+100000 0xf4808080 <?><?> strange middle byte
- U+10FFFF 0xf48fbfbf <?><?> last valid four-byte
+ U+3FFFF 0xf0bfbfbf <?><?> end of first start byte
+ U+40000 0xf1808080 <?><?> begin of second start byte
+ U+EFFFF 0xf2bfbfbf <?><?> highest public character
+ U+F0000 0xf3808080 <?><?> lowest plane 15 private use
+ U+FFFFF 0xf3bfbfbf <?><?> highest plane 15 private use
+ U+100000 0xf4808080 <?><?> lowest plane 16 private use
+ U+10FFFF 0xf48fbfbf <?><?> highest valid four-byte
U+110000 0xf4908080 ???? lowest beyond Unicode
- U+13FFFF 0xf4bfbfbf ???? end of strange middle byte
- U+140000 0xf5808080 ???? lowest invalid middle byte
- U+1FFFFF 0xf7bfbfbf ???? highest four-byte
+ U+13FFFF 0xf4bfbfbf ???? end of last start byte
+ U+140000 0xf5808080 ???? lowest invalid start byte
+ U+1FFFFF 0xf7bfbfbf ???? highest invalid four-byte
U+200000 0xf888808080 ????? lowest five-byte
-OpenBSD December 19, 2014 CHAR-UNICODE-INPUT(1)
+OpenBSD June 2, 2021 CHAR-UNICODE-INPUT(1)
diff --git a/regress/char/unicode/input.out_lint b/regress/char/unicode/input.out_lint
index e537b4fd..fbd053b2 100644
--- a/regress/char/unicode/input.out_lint
+++ b/regress/char/unicode/input.out_lint
@@ -7,8 +7,8 @@ mandoc: input.in:21:15: ERROR: skipping bad character: 0xc0
mandoc: input.in:21:16: ERROR: skipping bad character: 0x80
mandoc: input.in:22:15: ERROR: skipping bad character: 0xc1
mandoc: input.in:22:16: ERROR: skipping bad character: 0xbf
-mandoc: input.in:23:9: ERROR: skipping bad character: 0xc2
-mandoc: input.in:25:11: ERROR: skipping bad character: 0xc2
+mandoc: input.in:25:9: ERROR: skipping bad character: 0xc2
+mandoc: input.in:26:11: ERROR: skipping bad character: 0xc2
mandoc: input.in:32:17: ERROR: skipping bad character: 0xc0
mandoc: input.in:32:18: ERROR: skipping bad character: 0x80
mandoc: input.in:32:19: ERROR: skipping bad character: 0x80
@@ -53,29 +53,29 @@ mandoc: input.in:56:19: ERROR: skipping bad character: 0xf0
mandoc: input.in:56:20: ERROR: skipping bad character: 0x8f
mandoc: input.in:56:21: ERROR: skipping bad character: 0xbf
mandoc: input.in:56:22: ERROR: skipping bad character: 0xbf
-mandoc: input.in:63:31: ERROR: skipping bad character: 0xf4
-mandoc: input.in:63:32: ERROR: skipping bad character: 0x90
-mandoc: input.in:63:33: ERROR: skipping bad character: 0x80
-mandoc: input.in:63:34: ERROR: skipping bad character: 0x80
-mandoc: input.in:63:21: WARNING: invalid escape sequence: \[u110000]
-mandoc: input.in:64:31: ERROR: skipping bad character: 0xf4
-mandoc: input.in:64:32: ERROR: skipping bad character: 0xbf
-mandoc: input.in:64:33: ERROR: skipping bad character: 0xbf
-mandoc: input.in:64:34: ERROR: skipping bad character: 0xbf
-mandoc: input.in:64:21: WARNING: invalid escape sequence: \[u13FFFF]
-mandoc: input.in:65:31: ERROR: skipping bad character: 0xf5
-mandoc: input.in:65:32: ERROR: skipping bad character: 0x80
+mandoc: input.in:65:31: ERROR: skipping bad character: 0xf4
+mandoc: input.in:65:32: ERROR: skipping bad character: 0x90
mandoc: input.in:65:33: ERROR: skipping bad character: 0x80
mandoc: input.in:65:34: ERROR: skipping bad character: 0x80
-mandoc: input.in:65:21: WARNING: invalid escape sequence: \[u140000]
-mandoc: input.in:66:31: ERROR: skipping bad character: 0xf7
+mandoc: input.in:65:21: WARNING: invalid escape sequence: \[u110000]
+mandoc: input.in:66:31: ERROR: skipping bad character: 0xf4
mandoc: input.in:66:32: ERROR: skipping bad character: 0xbf
mandoc: input.in:66:33: ERROR: skipping bad character: 0xbf
mandoc: input.in:66:34: ERROR: skipping bad character: 0xbf
-mandoc: input.in:66:21: WARNING: invalid escape sequence: \[u1FFFFF]
-mandoc: input.in:67:33: ERROR: skipping bad character: 0xf8
-mandoc: input.in:67:34: ERROR: skipping bad character: 0x88
-mandoc: input.in:67:35: ERROR: skipping bad character: 0x80
-mandoc: input.in:67:36: ERROR: skipping bad character: 0x80
-mandoc: input.in:67:37: ERROR: skipping bad character: 0x80
-mandoc: input.in:67:23: WARNING: invalid escape sequence: \[u200000]
+mandoc: input.in:66:21: WARNING: invalid escape sequence: \[u13FFFF]
+mandoc: input.in:67:31: ERROR: skipping bad character: 0xf5
+mandoc: input.in:67:32: ERROR: skipping bad character: 0x80
+mandoc: input.in:67:33: ERROR: skipping bad character: 0x80
+mandoc: input.in:67:34: ERROR: skipping bad character: 0x80
+mandoc: input.in:67:21: WARNING: invalid escape sequence: \[u140000]
+mandoc: input.in:68:31: ERROR: skipping bad character: 0xf7
+mandoc: input.in:68:32: ERROR: skipping bad character: 0xbf
+mandoc: input.in:68:33: ERROR: skipping bad character: 0xbf
+mandoc: input.in:68:34: ERROR: skipping bad character: 0xbf
+mandoc: input.in:68:21: WARNING: invalid escape sequence: \[u1FFFFF]
+mandoc: input.in:69:33: ERROR: skipping bad character: 0xf8
+mandoc: input.in:69:34: ERROR: skipping bad character: 0x88
+mandoc: input.in:69:35: ERROR: skipping bad character: 0x80
+mandoc: input.in:69:36: ERROR: skipping bad character: 0x80
+mandoc: input.in:69:37: ERROR: skipping bad character: 0x80
+mandoc: input.in:69:23: WARNING: invalid escape sequence: \[u200000]
diff --git a/regress/char/unicode/input.out_utf8 b/regress/char/unicode/input.out_utf8
index 882d14fd..af4645bd 100644
--- a/regress/char/unicode/input.out_utf8
+++ b/regress/char/unicode/input.out_utf8
@@ -20,10 +20,10 @@ DDEESSCCRRIIPPTTIIOONN
U+0000 0xc080 ?? lowest obfuscated ASCII
U+007f 0xc1bf ?? highest obfuscated ASCII
- 0xc278 ?x ASCII continuation
U+0080 0xc280 �� lowest two-byte
- 0xc2c380 ?À high continuation
U+07FF 0xdfbf ߿߿ highest two-byte
+ 0xc278 ?x ASCII instead of continuation
+ 0xc2c380 ?À start byte instead of continuation
TThhrreeee--bbyyttee rraannggee
@@ -32,10 +32,10 @@ DDEESSCCRRIIPPTTIIOONN
U+0080 0xe08280 ??? lowest obfuscated two-byte
U+07FF 0xe09fbf ??? highest obfuscated two-byte
U+0800 0xe0a080 ࠀࠀ lowest three-byte
- U+0FFF 0xe0bfbf ࿿࿿ end of first middle byte
- U+1000 0xe18080 ကက begin of second middle byte
- U+CFFF 0xecbfbf 쿿쿿 end of last normal middle byte
- U+D000 0xed8080 퀀퀀 begin of strange middle byte
+ U+0FFF 0xe0bfbf ࿿࿿ end of first start byte
+ U+1000 0xe18080 ကက begin of second start byte
+ U+CFFF 0xecbfbf 쿿쿿 end of last normal start byte
+ U+D000 0xed8080 퀀퀀 begin of last start byte
U+D7FF 0xed9fbf ퟿퟿ highest public three-byte
U+D800 0xeda080 ??? lowest surrogate
U+DFFF 0xedbfbf ??? highest surrogate
@@ -51,17 +51,19 @@ DDEESSCCRRIIPPTTIIOONN
U+0800 0xf080a080 ???? lowest obfuscated three-byte
U+FFFF 0xf08fbfbf ???? highest obfuscated three-byte
U+10000 0xf0908080 𐀀𐀀 lowest four-byte
- U+3FFFF 0xf0bfbfbf 𿿿𿿿 end of first middle byte
- U+40000 0xf1808080 񀀀񀀀 second middle byte
- U+FFFFF 0xf3bfbfbf 󿿿󿿿 last normal middle byte
- U+100000 0xf4808080 􀀀􀀀 strange middle byte
- U+10FFFF 0xf48fbfbf 􏿿􏿿 last valid four-byte
+ U+3FFFF 0xf0bfbfbf 𿿿𿿿 end of first start byte
+ U+40000 0xf1808080 񀀀񀀀 begin of second start byte
+ U+EFFFF 0xf2bfbfbf 󯿿򿿿 highest public character
+ U+F0000 0xf3808080 󰀀󀀀 lowest plane 15 private use
+ U+FFFFF 0xf3bfbfbf 󿿿󿿿 highest plane 15 private use
+ U+100000 0xf4808080 􀀀􀀀 lowest plane 16 private use
+ U+10FFFF 0xf48fbfbf 􏿿􏿿 highest valid four-byte
U+110000 0xf4908080 ???? lowest beyond Unicode
- U+13FFFF 0xf4bfbfbf ???? end of strange middle byte
- U+140000 0xf5808080 ???? lowest invalid middle byte
- U+1FFFFF 0xf7bfbfbf ???? highest four-byte
+ U+13FFFF 0xf4bfbfbf ???? end of last start byte
+ U+140000 0xf5808080 ???? lowest invalid start byte
+ U+1FFFFF 0xf7bfbfbf ???? highest invalid four-byte
U+200000 0xf888808080 ????? lowest five-byte
-OpenBSD December 19, 2014 CHAR-UNICODE-INPUT(1)
+OpenBSD June 2, 2021 CHAR-UNICODE-INPUT(1)