Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

utf16utf8.cpp

00001 /******************************************************************************
00002  *
00003  * UTF16UTF8 -  SWFilter decendant to convert UTF-16 to UTF-8
00004  *
00005  */
00006 
00007 #include <stdlib.h>
00008 #include <stdio.h>
00009 
00010 #include <utf16utf8.h>
00011 
00012 UTF16UTF8::UTF16UTF8() {
00013 }
00014 
00015 
00016 char UTF16UTF8::ProcessText(char *text, int maxlen, const SWKey *key, const SWModule *module)
00017 {
00018   unsigned short *from;
00019   unsigned char *to;
00020 
00021   int len;
00022   unsigned long uchar;
00023   unsigned short schar;
00024 
00025   len = 0;
00026   from = (unsigned short*) text;
00027   while (*from) {
00028         len += 2;
00029         from++;
00030   }
00031 
00032   // shift string to right of buffer
00033   if (len < maxlen) {
00034     memmove(&text[maxlen - len], text, len);
00035     from = (unsigned short*)&text[maxlen - len];
00036   }
00037   else
00038     from = (unsigned short*)text;
00039   
00040 
00041   // -------------------------------
00042 
00043   for (to = (unsigned char*)text; *from; from++) {
00044     uchar = 0;
00045 
00046     if (*from < 0xD800 || *from > 0xDFFF) {
00047       uchar = *from;
00048     }
00049     else if (*from >= 0xD800 && *from <= 0xDBFF) {
00050       uchar = *from;
00051       schar = *(from+1);
00052       if (uchar < 0xDC00 || uchar > 0xDFFF) {
00053         //error, do nothing
00054         continue;
00055       }
00056       uchar &= 0x03ff;
00057       schar &= 0x03ff;
00058       uchar <<= 10;
00059       uchar |= schar;
00060       uchar += 0x10000;
00061       from++;
00062     }
00063     else {
00064       //error, do nothing
00065       continue;
00066     }
00067     
00068     if (uchar < 0x80) { 
00069       *to++ = uchar; 
00070     }
00071     else if (uchar < 0x800) { 
00072       *to++ = 0xc0 | (uchar >> 6); 
00073       *to++ = 0x80 | (uchar & 0x3f);
00074     }
00075     else if (uchar < 0x10000) {
00076       *to++ = 0xe0 | (uchar >> 12); 
00077       *to++ = 0x80 | (uchar >> 6) & 0x3f; 
00078       *to++ = 0x80 | uchar & 0x3f;
00079     }
00080     else if (uchar < 0x200000) {
00081       *to++ = 0xF0 | (uchar >> 18);
00082       *to++ = 0x80 | (uchar >> 12) & 0x3F; 
00083       *to++ = 0x80 | (uchar >> 6) & 0x3F; 
00084       *to++ = 0x80 | uchar & 0x3F;
00085     }
00086   }
00087   *to++ = 0;
00088   *to = 0;
00089   
00090   return 0;
00091 }
00092 
00093 
00094 
00095 

Generated on Thu Jun 20 22:13:01 2002 for The Sword Project by doxygen1.2.15