diff options
Diffstat (limited to 'src/utilfuns/swunicod.cpp')
-rw-r--r-- | src/utilfuns/swunicod.cpp | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/src/utilfuns/swunicod.cpp b/src/utilfuns/swunicod.cpp new file mode 100644 index 0000000..f42fd86 --- /dev/null +++ b/src/utilfuns/swunicod.cpp @@ -0,0 +1,139 @@ +/* + * + * Copyright 1998 CrossWire Bible Society (http://www.crosswire.org) + * CrossWire Bible Society + * P. O. Box 2528 + * Tempe, AZ 85280-2528 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +#include "swunicod.h" +unsigned char* UTF32to8 (unsigned long utf32, unsigned char * utf8) { + unsigned int i; + for (i = 0; i < 6; i++) utf8[i] = 0; + + if (utf32 < 0x80) { + utf8[0] = (char)utf32; + } + else if (utf32 < 0x800) { + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x1f; + utf8[0] = 0xc0 | i; + } + else if (utf32 < 0x10000) { + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x0f; + utf8[0] = 0xe0 | i; + } + else if (utf32 < 0x200000) { + i = utf32 & 0x3f; + utf8[3] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x07; + utf8[0] = 0xf0 | i; + } + else if (utf32 < 0x4000000) { + i = utf32 & 0x3f; + utf8[4] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[3] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x03; + utf8[0] = 0xf8 | i; + } + else if (utf32 < 0x80000000) { + i = utf32 & 0x3f; + utf8[5] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[4] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[3] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[2] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x3f; + utf8[1] = 0x80 | i; + utf32 >>= 6; + + i = utf32 & 0x01; + utf8[0] = 0xfc | i; + } + return utf8; +} + +/** Converts a UTF-8 encoded 1-6 byte array into a 32-bit unsigned integer UTF-32 value + * @param utf8 pointer to an array of 6 unsigned chars containing the UTF-8 value, starting in the utf8[0] + * @param utf32 the UTF-32 Unicode code point value + */ +unsigned long UTF8to32 (unsigned char * utf8) { + + unsigned char i = utf8[0]; + unsigned char count; + unsigned long utf32 = 0; + + for (count = 0; i & 0x80; count++) i <<= 1; + if (!count) { + return utf8[0]; + } + else if (count == 1) { + return 0xffff; + } + else { + count--; + utf32 = i >> count; + for (i = 1; i <= count; i++) { + if (0xc0 & utf8[i] != 0x80) { + return 0xffff; + } + utf32 <<= 6; + utf32 |= (utf8[i] & 0x3f); + } + } + return utf32; +} |