00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "swunicod.h"
00020 unsigned char* UTF32to8 (unsigned long utf32, unsigned char * utf8) {
00021 unsigned int i;
00022 for (i = 0; i < 6; i++) utf8[i] = 0;
00023
00024 if (utf32 < 0x80) {
00025 utf8[0] = (char)utf32;
00026 }
00027 else if (utf32 < 0x800) {
00028 i = utf32 & 0x3f;
00029 utf8[1] = 0x80 | i;
00030 utf32 >>= 6;
00031
00032 i = utf32 & 0x1f;
00033 utf8[0] = 0xc0 | i;
00034 }
00035 else if (utf32 < 0x10000) {
00036 i = utf32 & 0x3f;
00037 utf8[2] = 0x80 | i;
00038 utf32 >>= 6;
00039
00040 i = utf32 & 0x3f;
00041 utf8[1] = 0x80 | i;
00042 utf32 >>= 6;
00043
00044 i = utf32 & 0x0f;
00045 utf8[0] = 0xe0 | i;
00046 }
00047 else if (utf32 < 0x200000) {
00048 i = utf32 & 0x3f;
00049 utf8[3] = 0x80 | i;
00050 utf32 >>= 6;
00051
00052 i = utf32 & 0x3f;
00053 utf8[2] = 0x80 | i;
00054 utf32 >>= 6;
00055
00056 i = utf32 & 0x3f;
00057 utf8[1] = 0x80 | i;
00058 utf32 >>= 6;
00059
00060 i = utf32 & 0x07;
00061 utf8[0] = 0xf0 | i;
00062 }
00063 else if (utf32 < 0x4000000) {
00064 i = utf32 & 0x3f;
00065 utf8[4] = 0x80 | i;
00066 utf32 >>= 6;
00067
00068 i = utf32 & 0x3f;
00069 utf8[3] = 0x80 | i;
00070 utf32 >>= 6;
00071
00072 i = utf32 & 0x3f;
00073 utf8[2] = 0x80 | i;
00074 utf32 >>= 6;
00075
00076 i = utf32 & 0x3f;
00077 utf8[1] = 0x80 | i;
00078 utf32 >>= 6;
00079
00080 i = utf32 & 0x03;
00081 utf8[0] = 0xf8 | i;
00082 }
00083 else if (utf32 < 0x80000000) {
00084 i = utf32 & 0x3f;
00085 utf8[5] = 0x80 | i;
00086 utf32 >>= 6;
00087
00088 i = utf32 & 0x3f;
00089 utf8[4] = 0x80 | i;
00090 utf32 >>= 6;
00091
00092 i = utf32 & 0x3f;
00093 utf8[3] = 0x80 | i;
00094 utf32 >>= 6;
00095
00096 i = utf32 & 0x3f;
00097 utf8[2] = 0x80 | i;
00098 utf32 >>= 6;
00099
00100 i = utf32 & 0x3f;
00101 utf8[1] = 0x80 | i;
00102 utf32 >>= 6;
00103
00104 i = utf32 & 0x01;
00105 utf8[0] = 0xfc | i;
00106 }
00107 return utf8;
00108 }
00109
00114 unsigned long UTF8to32 (unsigned char * utf8) {
00115
00116 unsigned char i = utf8[0];
00117 unsigned char count;
00118 unsigned long utf32 = 0;
00119
00120 for (count = 0; i & 0x80; count++) i <<= 1;
00121 if (!count) {
00122 return utf8[0];
00123 }
00124 else if (count == 1) {
00125 return 0xffff;
00126 }
00127 else {
00128 count--;
00129 utf32 = i >> count;
00130 for (i = 1; i <= count; i++) {
00131 if (0xc0 & utf8[i] != 0x80) {
00132 return 0xffff;
00133 }
00134 utf32 <<= 6;
00135 utf32 |= (utf8[i] & 0x3f);
00136 }
00137 }
00138 return utf32;
00139 }