Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

utf8utf16.cpp

00001 /******************************************************************************
00002  *
00003  * UTF8UTF16 -  SWFilter decendant to convert UTF-8 to UTF-16
00004  *
00005  */
00006 
00007 #include <stdlib.h>
00008 #include <stdio.h>
00009 
00010 #include <utf8utf16.h>
00011 
00012 UTF8UTF16::UTF8UTF16() {
00013 }
00014 
00015 
00016 char UTF8UTF16::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
00017 {
00018   unsigned char *from;
00019   unsigned short *to;
00020 
00021   int len;
00022   unsigned long uchar;
00023   unsigned char significantFirstBits, subsequent;
00024   unsigned short schar;
00025   
00026   len = strlen(text) + 1;                                               // shift string to right of buffer
00027   if (len < maxlen) {
00028     memmove(&text[maxlen - len], text, len);
00029     from = (unsigned char*)&text[maxlen - len];
00030   }
00031   else
00032     from = (unsigned char*)text;
00033   
00034   
00035   // -------------------------------
00036   
00037   for (to = (unsigned short*)text; *from; from++) {
00038     uchar = 0;
00039     if ((*from & 128) != 128) {
00040       //                if (*from != ' ')
00041       uchar = *from;
00042     }
00043     else if ((*from & 128) && ((*from & 64) != 64)) {
00044       // error, do nothing
00045       continue;
00046     }
00047     else {
00048       *from <<= 1;
00049       for (subsequent = 1; (*from & 128); subsequent++) {
00050         *from <<= 1;
00051         from[subsequent] &= 63;
00052         uchar <<= 6;
00053         uchar |= from[subsequent];
00054       }
00055       subsequent--;
00056       *from <<=1;
00057       significantFirstBits = 8 - (2+subsequent);
00058       
00059       uchar |= (((short)*from) << (((6*subsequent)+significantFirstBits)-8));
00060       from += subsequent;
00061     }
00062 
00063     if (uchar < 0x1ffff) {
00064       *to++ = (unsigned short)uchar;
00065     }
00066     else {
00067       uchar -= 0x10000;
00068       schar = 0xD800 | (uchar & 0x03ff);
00069       uchar >>= 10;
00070       uchar |= 0xDC00;
00071       *to++ = (unsigned short)schar;
00072       *to++ = (unsigned short)uchar;
00073     }
00074   }
00075   *to = (unsigned short)0;
00076 
00077   return 0;
00078 }
00079 

Generated on Thu Jun 20 22:13:01 2002 for The Sword Project by doxygen1.2.15