You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
351 lines
8.2 KiB
351 lines
8.2 KiB
/****************************************************************************
|
|
**
|
|
** Implementation of TQUtf{8,16}Codec class
|
|
**
|
|
** Created : 981015
|
|
**
|
|
** Copyright (C) 1998-2008 Trolltech ASA. All rights reserved.
|
|
**
|
|
** This file is part of the tools module of the TQt GUI Toolkit.
|
|
**
|
|
** This file may be used under the terms of the GNU General
|
|
** Public License versions 2.0 or 3.0 as published by the Free
|
|
** Software Foundation and appearing in the files LICENSE.GPL2
|
|
** and LICENSE.GPL3 included in the packaging of this file.
|
|
** Alternatively you may (at your option) use any later version
|
|
** of the GNU General Public License if such license has been
|
|
** publicly approved by Trolltech ASA (or its successors, if any)
|
|
** and the KDE Free TQt Foundation.
|
|
**
|
|
** Please review the following information to ensure GNU General
|
|
** Public Licensing requirements will be met:
|
|
** http://trolltech.com/products/qt/licenses/licensing/opensource/.
|
|
** If you are unsure which license is appropriate for your use, please
|
|
** review the following information:
|
|
** http://trolltech.com/products/qt/licenses/licensing/licensingoverview
|
|
** or contact the sales department at sales@trolltech.com.
|
|
**
|
|
** This file may be used under the terms of the Q Public License as
|
|
** defined by Trolltech ASA and appearing in the file LICENSE.TQPL
|
|
** included in the packaging of this file. Licensees holding valid TQt
|
|
** Commercial licenses may use this file in accordance with the TQt
|
|
** Commercial License Agreement provided with the Software.
|
|
**
|
|
** This file is provided "AS IS" with NO WARRANTY OF ANY KIND,
|
|
** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR
|
|
** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted
|
|
** herein.
|
|
**
|
|
**********************************************************************/
|
|
|
|
#include "ntqutfcodec.h"
|
|
|
|
#ifndef QT_NO_TEXTCODEC
|
|
|
|
int TQUtf8Codec::mibEnum() const
|
|
{
|
|
return 106;
|
|
}
|
|
|
|
TQCString TQUtf8Codec::fromUnicode(const TQString& uc, int& lenInOut) const
|
|
{
|
|
int l = uc.length();
|
|
if (lenInOut > 0)
|
|
l = TQMIN(l, lenInOut);
|
|
int rlen = l*3+1;
|
|
TQCString rstr(rlen);
|
|
uchar* cursor = (uchar*)rstr.data();
|
|
const TQChar *ch = uc.unicode();
|
|
for (int i=0; i < l; i++) {
|
|
uint u = ch->unicode();
|
|
if ( u < 0x80 ) {
|
|
*cursor++ = (uchar)u;
|
|
} else {
|
|
if ( u < 0x0800 ) {
|
|
*cursor++ = 0xc0 | ((uchar) (u >> 6));
|
|
} else {
|
|
if (u >= 0xd800 && u < 0xdc00 && i < l-1) {
|
|
unsigned short low = ch[1].unicode();
|
|
if (low >= 0xdc00 && low < 0xe000) {
|
|
++ch;
|
|
++i;
|
|
u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000;
|
|
}
|
|
}
|
|
if (u > 0xffff) {
|
|
// see TQString::fromUtf8() and TQString::utf8() for explanations
|
|
if (u > 0x10fe00 && u < 0x10ff00) {
|
|
*cursor++ = (u - 0x10fe00);
|
|
++ch;
|
|
continue;
|
|
} else {
|
|
*cursor++ = 0xf0 | ((uchar) (u >> 18));
|
|
*cursor++ = 0x80 | ( ((uchar) (u >> 12)) & 0x3f);
|
|
}
|
|
} else {
|
|
*cursor++ = 0xe0 | ((uchar) (u >> 12));
|
|
}
|
|
*cursor++ = 0x80 | ( ((uchar) (u >> 6)) & 0x3f);
|
|
}
|
|
*cursor++ = 0x80 | ((uchar) (u&0x3f));
|
|
}
|
|
++ch;
|
|
}
|
|
*cursor = 0;
|
|
lenInOut = cursor - (uchar*)rstr.data();
|
|
((TQByteArray&)rstr).resize(lenInOut+1);
|
|
return rstr;
|
|
}
|
|
|
|
TQString TQUtf8Codec::toUnicode(const char* chars, int len) const
|
|
{
|
|
if (len > 3 && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) {
|
|
// starts with a byte order mark
|
|
chars += 3;
|
|
len -= 3;
|
|
}
|
|
return TQString::fromUtf8( chars, len );
|
|
}
|
|
|
|
|
|
const char* TQUtf8Codec::name() const
|
|
{
|
|
return "UTF-8";
|
|
}
|
|
|
|
int TQUtf8Codec::heuristicContentMatch(const char* chars, int len) const
|
|
{
|
|
int score = 0;
|
|
for (int i=0; i<len; i++) {
|
|
uchar ch = chars[i];
|
|
// No nulls allowed.
|
|
if ( !ch )
|
|
return -1;
|
|
if ( ch < 128 ) {
|
|
// Inconclusive
|
|
score++;
|
|
} else if ( (ch&0xe0) == 0xc0 ) {
|
|
if ( i < len-1 ) {
|
|
uchar c2 = chars[++i];
|
|
if ( (c2&0xc0) != 0x80 )
|
|
return -1;
|
|
score+=3;
|
|
}
|
|
} else if ( (ch&0xf0) == 0xe0 ) {
|
|
if ( i < len-1 ) {
|
|
uchar c2 = chars[++i];
|
|
if ( (c2&0xc0) != 0x80 ) {
|
|
return -1;
|
|
#if 0
|
|
if ( i < len-1 ) {
|
|
uchar c3 = chars[++i];
|
|
if ( (c3&0xc0) != 0x80 )
|
|
return -1;
|
|
score+=3;
|
|
}
|
|
#endif
|
|
}
|
|
score+=2;
|
|
}
|
|
}
|
|
}
|
|
return score;
|
|
}
|
|
|
|
|
|
|
|
|
|
class TQUtf8Decoder : public TQTextDecoder {
|
|
uint uc;
|
|
uint min_uc;
|
|
int need;
|
|
bool headerDone;
|
|
public:
|
|
TQUtf8Decoder() : need(0), headerDone(FALSE)
|
|
{
|
|
}
|
|
|
|
TQString toUnicode(const char* chars, int len)
|
|
{
|
|
TQString result;
|
|
result.setLength( len + 1 ); // worst case
|
|
TQChar *qch = (TQChar *)result.unicode();
|
|
uchar ch;
|
|
int error = -1;
|
|
for (int i=0; i<len; i++) {
|
|
ch = chars[i];
|
|
if (need) {
|
|
if ( (ch&0xc0) == 0x80 ) {
|
|
uc = (uc << 6) | (ch & 0x3f);
|
|
need--;
|
|
if ( !need ) {
|
|
if (uc > 0xffff) {
|
|
// surrogate pair
|
|
uc -= 0x10000;
|
|
unsigned short high = uc/0x400 + 0xd800;
|
|
unsigned short low = uc%0x400 + 0xdc00;
|
|
*qch++ = TQChar(high);
|
|
*qch++ = TQChar(low);
|
|
headerDone = TRUE;
|
|
} else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
|
|
*qch++ = TQChar::replacement;
|
|
} else {
|
|
if (headerDone || TQChar(uc) != TQChar::byteOrderMark)
|
|
*qch++ = uc;
|
|
headerDone = TRUE;
|
|
}
|
|
}
|
|
} else {
|
|
// error
|
|
i = error;
|
|
*qch++ = TQChar::replacement;
|
|
need = 0;
|
|
}
|
|
} else {
|
|
if ( ch < 128 ) {
|
|
*qch++ = ch;
|
|
headerDone = TRUE;
|
|
} else if ((ch & 0xe0) == 0xc0) {
|
|
uc = ch & 0x1f;
|
|
need = 1;
|
|
error = i;
|
|
min_uc = 0x80;
|
|
} else if ((ch & 0xf0) == 0xe0) {
|
|
uc = ch & 0x0f;
|
|
need = 2;
|
|
error = i;
|
|
min_uc = 0x800;
|
|
} else if ((ch&0xf8) == 0xf0) {
|
|
uc = ch & 0x07;
|
|
need = 3;
|
|
error = i;
|
|
min_uc = 0x10000;
|
|
} else {
|
|
// error
|
|
*qch++ = TQChar::replacement;
|
|
}
|
|
}
|
|
}
|
|
result.truncate( qch - result.unicode() );
|
|
return result;
|
|
}
|
|
};
|
|
|
|
TQTextDecoder* TQUtf8Codec::makeDecoder() const
|
|
{
|
|
return new TQUtf8Decoder;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int TQUtf16Codec::mibEnum() const
|
|
{
|
|
return 1000;
|
|
}
|
|
|
|
const char* TQUtf16Codec::name() const
|
|
{
|
|
return "ISO-10646-UCS-2";
|
|
}
|
|
|
|
int TQUtf16Codec::heuristicContentMatch(const char* chars, int len) const
|
|
{
|
|
uchar* uchars = (uchar*)chars;
|
|
if ( len >= 2 && ((uchars[0] == 0xff && uchars[1] == 0xfe) ||
|
|
(uchars[1] == 0xff && uchars[0] == 0xfe)) )
|
|
return len;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
|
|
class TQUtf16Encoder : public TQTextEncoder {
|
|
bool headerdone;
|
|
public:
|
|
TQUtf16Encoder() : headerdone(FALSE)
|
|
{
|
|
}
|
|
|
|
TQCString fromUnicode(const TQString& uc, int& lenInOut)
|
|
{
|
|
if ( headerdone ) {
|
|
lenInOut = uc.length()*sizeof(TQChar);
|
|
TQCString d(lenInOut);
|
|
memcpy(d.data(),uc.unicode(),lenInOut);
|
|
return d;
|
|
} else {
|
|
headerdone = TRUE;
|
|
lenInOut = (1+uc.length())*sizeof(TQChar);
|
|
TQCString d(lenInOut);
|
|
memcpy(d.data(),&TQChar::byteOrderMark,sizeof(TQChar));
|
|
memcpy(d.data()+sizeof(TQChar),uc.unicode(),uc.length()*sizeof(TQChar));
|
|
return d;
|
|
}
|
|
}
|
|
};
|
|
|
|
class TQUtf16Decoder : public TQTextDecoder {
|
|
uchar buf;
|
|
bool half;
|
|
bool swap;
|
|
bool headerdone;
|
|
|
|
public:
|
|
TQUtf16Decoder() : half(FALSE), swap(FALSE), headerdone(FALSE)
|
|
{
|
|
}
|
|
|
|
TQString toUnicode(const char* chars, int len)
|
|
{
|
|
TQString result;
|
|
result.setLength( len + 1 ); // worst case
|
|
TQChar *qch = (TQChar *)result.unicode();
|
|
TQChar ch;
|
|
while ( len-- ) {
|
|
if ( half ) {
|
|
if ( swap ) {
|
|
ch.setRow( *chars++ );
|
|
ch.setCell( buf );
|
|
} else {
|
|
ch.setRow( buf );
|
|
ch.setCell( *chars++ );
|
|
}
|
|
if ( !headerdone ) {
|
|
if ( ch == TQChar::byteOrderSwapped ) {
|
|
swap = !swap;
|
|
} else if ( ch == TQChar::byteOrderMark ) {
|
|
// Ignore ZWNBSP
|
|
} else {
|
|
*qch++ = ch;
|
|
}
|
|
headerdone = TRUE;
|
|
} else
|
|
*qch++ = ch;
|
|
half = FALSE;
|
|
} else {
|
|
buf = *chars++;
|
|
half = TRUE;
|
|
}
|
|
}
|
|
result.truncate( qch - result.unicode() );
|
|
return result;
|
|
}
|
|
};
|
|
|
|
TQTextDecoder* TQUtf16Codec::makeDecoder() const
|
|
{
|
|
return new TQUtf16Decoder;
|
|
}
|
|
|
|
TQTextEncoder* TQUtf16Codec::makeEncoder() const
|
|
{
|
|
return new TQUtf16Encoder;
|
|
}
|
|
|
|
#endif //QT_NO_TEXTCODEC
|