相關連結
Unicode Code Converter
http://rishida.net/scripts/uniview/conversion.php
http://space.flash8.net/space/?638324/action_viewspace_itemid_374138.html
字型使用 fireflyR16.pcf
fonts.dir
1
fireflyR16.pcf -firefly-sung-medium-r-normal--16-150-75-75-p-159-iso10646-1
要顯示firefly字型,需要作UTF-8 to ISO10646-1
也就是 UTF-8 to Unicode (UCS-2)
Microwindow 中相關的檔案
src/config
src/engine/devfont.c
src/engine/font_pcf.c
1.src/config
####################################################################
# PCF font support
# Selecting HAVE_PCFGZ_SUPPORT will allow you to directly read
# .pcf.gz files, but it will add libz to the size of the server
####################################################################
HAVE_PCF_SUPPORT = Y
HAVE_PCFGZ_SUPPORT = N
PCF_FONT_DIR = "/phone/lib/X11/fonts/misc"
不知道為何gziped pcf font在我的系統中讀到的資料是不正確的.
應該跟gzopen, gzread, gzseek, gzclose有關.所以不支援囉.
注意!!!PCF_FONT_DIR要正確設定到PCF字型所在的路徑
2.src/engine/devfont.c
關於轉碼的部份在
int GdConvertEncoding(const void *istr, MWTEXTFLAGS iflags, int cc, void *ostr,
MWTEXTFLAGS oflags)
看起來是有支援UTF-8 to Unicode, 實際上轉出來的碼是不正確的
3.src/engine/font_pcf.c
參考
http://tw.myblog.yahoo.com/stevegigijoe/article?mid=55&prev=56&next=54
4.UTF-8 to UCS-2
/* Set to 1 to turn bad UTF8 bytes into ISO-8859-1. If this is to zero
they are instead turned into the Unicode REPLACEMENT CHARACTER, of
value 0xfffd.
If this is on utf8decode will correctly map most (perhaps all)
human-readable text that is in ISO-8859-1. This may allow you
to completely ignore character sets in your code because virtually
everything is either ISO-8859-1 or UTF-8.
*/
#define ERRORS_TO_ISO8859_1 1
/* Set to 1 to turn bad UTF8 bytes in the 0x80-0x9f range into the
Unicode index for Microsoft's CP1252 character set. You should
also set ERRORS_TO_ISO8859_1. With this a huge amount of more
available text (such as all web pages) are correctly converted
to Unicode.
*/
#define ERRORS_TO_CP1252 1
/* A number of Unicode code points are in fact illegal and should not
be produced by a UTF-8 converter. Turn this on will replace the
bytes in those encodings with errors. If you do this then converting
arbitrary 16-bit data to UTF-8 and then back is not an identity,
which will probably break a lot of software.
*/
#define STRICT_RFC3629 0
#if ERRORS_TO_CP1252
// Codes 0x80..0x9f from the Microsoft CP1252 character set, translated
// to Unicode:
static unsigned short cp1252[32] = {
0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f,
0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178
};
#endif
/*! Decode a single UTF-8 encoded character starting at \e p. The
resulting Unicode value (in the range 0-0x10ffff) is returned,
and \e len is set the the number of bytes in the UTF-8 encoding
(adding \e len to \e p will point at the next character).
If \a p points at an illegal UTF-8 encoding, including one that
would go past \e end, or where a code is uses more bytes than
necessary, then *(unsigned char*)p is translated as though it is
in the Microsoft CP1252 character set and \e len is set to 1.
Treating errors this way allows this to decode almost any
ISO-8859-1 or CP1252 text that has been mistakenly placed where
UTF-8 is expected, and has proven very useful.
If you want errors to be converted to error characters (as the
standards recommend), adding a test to see if the length is
unexpectedly 1 will work:
\code
if (*p & 0x80) { // what should be a multibyte encoding
code = utf8decode(p,end,&len);
if (len<2) code = 0xFFFD; // Turn errors into REPLACEMENT CHARACTER
} else { // handle the 1-byte utf8 encoding:
code = *p;
len = 1;
}
\endcode
Direct testing for the 1-byte case (as shown above) will also
speed up the scanning of strings where the majority of characters
are ASCII.
*/
unsigned utf8decode(const char* p, const char* end, int* len)
{
unsigned char c = *(unsigned char*)p;
if (c < 0x80) {
*len = 1;
return c;
#if ERRORS_TO_CP1252
} else if (c < 0xa0) {
*len = 1;
return cp1252[c-0x80];
#endif
} else if (c < 0xc2) {
goto FAIL;
}
if (p+1 >= end || (p[1]&0xc0) != 0x80) goto FAIL;
if (c < 0xe0) {
*len = 2;
return
((p[0] & 0x1f) << 6) +
((p[1] & 0x3f));
} else if (c == 0xe0) {
if (((unsigned char*)p)[1] < 0xa0) goto FAIL;
goto UTF8_3;
#if STRICT_RFC3629
} else if (c == 0xed) {
// RFC 3629 says surrogate chars are illegal.
if (((unsigned char*)p)[1] >= 0xa0) goto FAIL;
goto UTF8_3;
} else if (c == 0xef) {
// 0xfffe and 0xffff are also illegal characters
if (((unsigned char*)p)[1]==0xbf &&
((unsigned char*)p)[2]>=0xbe) goto FAIL;
goto UTF8_3;
#endif
} else if (c < 0xf0) {
UTF8_3:
if (p+2 >= end || (p[2]&0xc0) != 0x80) goto FAIL;
*len = 3;
return
((p[0] & 0x0f) << 12) +
((p[1] & 0x3f) << 6) +
((p[2] & 0x3f));
} else if (c == 0xf0) {
if (((unsigned char*)p)[1] < 0x90) goto FAIL;
goto UTF8_4;
} else if (c < 0xf4) {
UTF8_4:
if (p+3 >= end || (p[2]&0xc0) != 0x80 || (p[3]&0xc0) != 0x80) goto FAIL;
*len = 4;
#if STRICT_RFC3629
// RFC 3629 says all codes ending in fffe or ffff are illegal:
if ((p[1]&0xf)==0xf &&
((unsigned char*)p)[2] == 0xbf &&
((unsigned char*)p)[3] >= 0xbe) goto FAIL;
#endif
return
((p[0] & 0x07) << 18) +
((p[1] & 0x3f) << 12) +
((p[2] & 0x3f) << 6) +
((p[3] & 0x3f));
} else if (c == 0xf4) {
if (((unsigned char*)p)[1] > 0x8f) goto FAIL; // after 0x10ffff
goto UTF8_4;
} else {
FAIL:
*len = 1;
#if ERRORS_TO_ISO8859_1
return c;
#else
return 0xfffd; // Unicode REPLACEMENT CHARACTER
#endif
}
}
typedef struct { /* normal 16 bit characters are two bytes */
unsigned char byte1;
unsigned char byte2;
} XChar2b;
////////////////////////////////////////////////////////////////
// Things you can do once the font+size has been selected:
// I see no sign of "FontSets" working. Instead this supposedly will
// draw the correct letters if you happen to pick an iso10646-1 font.
// This is similar to utf8towc() but works with the big-endian-only
// structure X seems to want, and does not bother with surrogate
// pairs. If all characters are 1 byte or errors it returns
// null. Otherwise it converts it to 16-bit and returns the allocated
// buffer and size:
static XChar2b* utf8to2b(const char* text, int n, int* charcount) {
static XChar2b* buffer = 0;
static int bufcount = 0;
const char* p = text;
const char* e = text+n;
int sawutf8 = 0;
int count = 0;
while (p < e) {
if (*(unsigned char*)p < 0x80) p++; // ascii
else if (*(unsigned char*)p < 0xa0) {sawutf8 = 1; p++;} //cp1252
else if (*(unsigned char*)p < 0xC2) p++; // other bad code
else {
int len; utf8decode(p,e,&len);
if (len > 1) sawutf8 = 1;
else if (!len) len = 1;
p += len;
}
count++;
}
if (!sawutf8) return 0;
*charcount = count;
if(bufcount < count) {
bufcount = count;
if(buffer)
free(buffer);
buffer = malloc(sizeof(XChar2b)*count);
} else if(buffer)
memset(buffer, 0, sizeof(XChar2b)*count);
count = 0;
p = text;
while (p < e) {
unsigned char c = *(unsigned char*)p;
if (c < 0xC2) { // ascii letter or bad code
buffer[count].byte1 = 0;
buffer[count].byte2 = c;
p++;
} else {
int len;
unsigned n = utf8decode(p,e,&len);
if (n > 0xffff) n = '?';
p += len;
buffer[count].byte1 = n>>8;
buffer[count].byte2 = n;
}
count++;
}
return buffer;
}
5.實際測試
int count = 0;
unsigned char buffer[512];
strcpy(buffer, "測試在Microwindow中顯示PCF字型");
XChar2b *uc16 = utf8to2b(buffer, strlen(buffer), &count);
GrText(main_wid, gc, 0, 0, uc16, count, GR_TFXCHAR2B|GR_TFBASELINE);
你好...又來打擾你了..這篇的第一點和第四點看不太懂。第一點的PCF_FONT_DIR=路徑是指到X11的fonts下嗎? 還是microwindows的fonts下?要用target board上的路徑還是用host上的路徑呢? 第四點是要修改那一個檔案呢? 再次麻煩你了......感激不盡
回覆刪除[版主回覆10/08/2009 12:09:34]PCF_FONT_DIR=路徑是你的target上字型擺放的位置.
第四點是測試程式的片段可以不用管他