史蒂夫嘰嘰叫: Microwindow 中文顯示 with pcf fonts

相關連結

Unicode Code Converter
http://rishida.net/scripts/uniview/conversion.php

http://space.flash8.net/space/?638324/action_viewspace_itemid_374138.html

字型使用 fireflyR16.pcf

fonts.dir

1
fireflyR16.pcf -firefly-sung-medium-r-normal--16-150-75-75-p-159-iso10646-1

要顯示firefly字型,需要作UTF-8 to ISO10646-1
也就是 UTF-8 to Unicode (UCS-2)

Microwindow 中相關的檔案

src/config
src/engine/devfont.c
src/engine/font_pcf.c

1.src/config

####################################################################
# PCF font support
# Selecting HAVE_PCFGZ_SUPPORT will allow you to directly read
# .pcf.gz files, but it will add libz to the size of the server
####################################################################
HAVE_PCF_SUPPORT         = Y
HAVE_PCFGZ_SUPPORT       = N
PCF_FONT_DIR             = "/phone/lib/X11/fonts/misc"

不知道為何gziped pcf font在我的系統中讀到的資料是不正確的.
應該跟gzopen, gzread, gzseek, gzclose有關.所以不支援囉.

注意!!!PCF_FONT_DIR要正確設定到PCF字型所在的路徑

2.src/engine/devfont.c

關於轉碼的部份在
int GdConvertEncoding(const void *istr, MWTEXTFLAGS iflags, int cc, void *ostr,
    MWTEXTFLAGS oflags)

看起來是有支援UTF-8 to Unicode, 實際上轉出來的碼是不正確的

3.src/engine/font_pcf.c

參考
http://tw.myblog.yahoo.com/stevegigijoe/article?mid=55&prev=56&next=54

4.UTF-8 to UCS-2

/* Set to 1 to turn bad UTF8 bytes into ISO-8859-1. If this is to zero
   they are instead turned into the Unicode REPLACEMENT CHARACTER, of
   value 0xfffd.
   If this is on utf8decode will correctly map most (perhaps all)
   human-readable text that is in ISO-8859-1. This may allow you
   to completely ignore character sets in your code because virtually
   everything is either ISO-8859-1 or UTF-8.
*/
#define ERRORS_TO_ISO8859_1 1

/* Set to 1 to turn bad UTF8 bytes in the 0x80-0x9f range into the
   Unicode index for Microsoft's CP1252 character set. You should
   also set ERRORS_TO_ISO8859_1. With this a huge amount of more
   available text (such as all web pages) are correctly converted
   to Unicode.
*/
#define ERRORS_TO_CP1252 1

/* A number of Unicode code points are in fact illegal and should not
   be produced by a UTF-8 converter. Turn this on will replace the
   bytes in those encodings with errors. If you do this then converting
   arbitrary 16-bit data to UTF-8 and then back is not an identity,
   which will probably break a lot of software.
*/
#define STRICT_RFC3629 0

#if ERRORS_TO_CP1252
// Codes 0x80..0x9f from the Microsoft CP1252 character set, translated
// to Unicode:
static unsigned short cp1252[32] = {
0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f,
0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178
};
#endif

/*! Decode a single UTF-8 encoded character starting at \e p. The
    resulting Unicode value (in the range 0-0x10ffff) is returned,
    and \e len is set the the number of bytes in the UTF-8 encoding
    (adding \e len to \e p will point at the next character).

    If \a p points at an illegal UTF-8 encoding, including one that
    would go past \e end, or where a code is uses more bytes than
    necessary, then *(unsigned char*)p is translated as though it is
    in the Microsoft CP1252 character set and \e len is set to 1.
    Treating errors this way allows this to decode almost any
    ISO-8859-1 or CP1252 text that has been mistakenly placed where
    UTF-8 is expected, and has proven very useful.

    If you want errors to be converted to error characters (as the
    standards recommend), adding a test to see if the length is
    unexpectedly 1 will work:

\code
    if (*p & 0x80) { // what should be a multibyte encoding
      code = utf8decode(p,end,&len);
      if (len<2) code = 0xFFFD; // Turn errors into REPLACEMENT CHARACTER
    } else { // handle the 1-byte utf8 encoding:
      code = *p;
      len = 1;
    }
\endcode

    Direct testing for the 1-byte case (as shown above) will also
    speed up the scanning of strings where the majority of characters
    are ASCII.
*/
unsigned utf8decode(const char* p, const char* end, int* len)
{
unsigned char c = *(unsigned char*)p;
if (c < 0x80) {
    *len = 1;
    return c;
#if ERRORS_TO_CP1252
} else if (c < 0xa0) {
    *len = 1;
    return cp1252[c-0x80];
#endif
} else if (c < 0xc2) {
    goto FAIL;
}
if (p+1 >= end || (p[1]&0xc0) != 0x80) goto FAIL;
if (c < 0xe0) {
    *len = 2;
    return
      ((p[0] & 0x1f) << 6) +
      ((p[1] & 0x3f));
} else if (c == 0xe0) {
    if (((unsigned char*)p)[1] < 0xa0) goto FAIL;
    goto UTF8_3;
#if STRICT_RFC3629
} else if (c == 0xed) {
    // RFC 3629 says surrogate chars are illegal.
    if (((unsigned char*)p)[1] >= 0xa0) goto FAIL;
    goto UTF8_3;
} else if (c == 0xef) {
    // 0xfffe and 0xffff are also illegal characters
    if (((unsigned char*)p)[1]==0xbf &&
((unsigned char*)p)[2]>=0xbe) goto FAIL;
    goto UTF8_3;
#endif
} else if (c < 0xf0) {
UTF8_3:
    if (p+2 >= end || (p[2]&0xc0) != 0x80) goto FAIL;
    *len = 3;
    return
      ((p[0] & 0x0f) << 12) +
      ((p[1] & 0x3f) << 6) +
      ((p[2] & 0x3f));
} else if (c == 0xf0) {
    if (((unsigned char*)p)[1] < 0x90) goto FAIL;
    goto UTF8_4;
} else if (c < 0xf4) {
UTF8_4:
    if (p+3 >= end || (p[2]&0xc0) != 0x80 || (p[3]&0xc0) != 0x80) goto FAIL;
    *len = 4;
#if STRICT_RFC3629
    // RFC 3629 says all codes ending in fffe or ffff are illegal:
    if ((p[1]&0xf)==0xf &&
((unsigned char*)p)[2] == 0xbf &&
((unsigned char*)p)[3] >= 0xbe) goto FAIL;
#endif
    return
      ((p[0] & 0x07) << 18) +
      ((p[1] & 0x3f) << 12) +
      ((p[2] & 0x3f) << 6) +
      ((p[3] & 0x3f));
} else if (c == 0xf4) {
    if (((unsigned char*)p)[1] > 0x8f) goto FAIL; // after 0x10ffff
    goto UTF8_4;
} else {
FAIL:
    *len = 1;
#if ERRORS_TO_ISO8859_1
    return c;
#else
    return 0xfffd; // Unicode REPLACEMENT CHARACTER
#endif
}
}

typedef struct {                /* normal 16 bit characters are two bytes */
    unsigned char byte1;
    unsigned char byte2;
} XChar2b;

////////////////////////////////////////////////////////////////
// Things you can do once the font+size has been selected:

// I see no sign of "FontSets" working. Instead this supposedly will
// draw the correct letters if you happen to pick an iso10646-1 font.

// This is similar to utf8towc() but works with the big-endian-only
// structure X seems to want, and does not bother with surrogate
// pairs. If all characters are 1 byte or errors it returns
// null. Otherwise it converts it to 16-bit and returns the allocated
// buffer and size:
static XChar2b* utf8to2b(const char* text, int n, int* charcount) {

static XChar2b* buffer = 0;
static int bufcount = 0;

const char* p = text;
const char* e = text+n;
int sawutf8 = 0;
int count = 0;
while (p < e) {
    if (*(unsigned char*)p < 0x80) p++; // ascii
    else if (*(unsigned char*)p < 0xa0) {sawutf8 = 1; p++;} //cp1252
    else if (*(unsigned char*)p < 0xC2) p++; // other bad code
    else {
      int len; utf8decode(p,e,&len);
      if (len > 1) sawutf8 = 1;
      else if (!len) len = 1;
      p += len;
    }
    count++;
}
if (!sawutf8) return 0;
*charcount = count;
if(bufcount < count) {
    bufcount = count;
    if(buffer)
      free(buffer);
    buffer = malloc(sizeof(XChar2b)*count);
} else if(buffer)
    memset(buffer, 0, sizeof(XChar2b)*count);

count = 0;
p = text;
while (p < e) {
    unsigned char c = *(unsigned char*)p;
    if (c < 0xC2) { // ascii letter or bad code
      buffer[count].byte1 = 0;
      buffer[count].byte2 = c;
      p++;
    } else {
      int len;
      unsigned n = utf8decode(p,e,&len);
      if (n > 0xffff) n = '?';
      p += len;
      buffer[count].byte1 = n>>8;
      buffer[count].byte2 = n;
    }
    count++;
}
return buffer;
}

5.實際測試

int count = 0;
unsigned char buffer[512];

strcpy(buffer, "測試在Microwindow中顯示PCF字型");
XChar2b *uc16 = utf8to2b(buffer, strlen(buffer), &count);
GrText(main_wid, gc, 0, 0, uc16, count, GR_TFXCHAR2B|GR_TFBASELINE);

史蒂夫嘰嘰叫

2008年11月16日星期日

Microwindow 中文顯示 with pcf fonts

1 則留言:

2008年11月16日 星期日

Microwindow 中文顯示 with pcf fonts

1 則留言:

2008年11月16日星期日