You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3122 lines
103 KiB
3122 lines
103 KiB
13 years ago
|
/****************************************************************************
|
||
|
**
|
||
|
** Implementation of TQTextCodec class
|
||
|
**
|
||
|
** Created : 981015
|
||
|
**
|
||
|
** Copyright (C) 1998-2008 Trolltech ASA. All rights reserved.
|
||
|
**
|
||
|
** This file is part of the tools module of the TQt GUI Toolkit.
|
||
|
**
|
||
|
** This file may be used under the terms of the GNU General
|
||
|
** Public License versions 2.0 or 3.0 as published by the Free
|
||
|
** Software Foundation and appearing in the files LICENSE.GPL2
|
||
|
** and LICENSE.GPL3 included in the packaging of this file.
|
||
|
** Alternatively you may (at your option) use any later version
|
||
|
** of the GNU General Public License if such license has been
|
||
|
** publicly approved by Trolltech ASA (or its successors, if any)
|
||
|
** and the KDE Free TQt Foundation.
|
||
|
**
|
||
|
** Please review the following information to ensure GNU General
|
||
|
** Public Licensing retquirements will be met:
|
||
|
** http://trolltech.com/products/qt/licenses/licensing/opensource/.
|
||
|
** If you are unsure which license is appropriate for your use, please
|
||
|
** review the following information:
|
||
|
** http://trolltech.com/products/qt/licenses/licensing/licensingoverview
|
||
|
** or contact the sales department at sales@trolltech.com.
|
||
|
**
|
||
|
** This file may be used under the terms of the Q Public License as
|
||
|
** defined by Trolltech ASA and appearing in the file LICENSE.TQPL
|
||
|
** included in the packaging of this file. Licensees holding valid TQt
|
||
|
** Commercial licenses may use this file in accordance with the TQt
|
||
|
** Commercial License Agreement provided with the Software.
|
||
|
**
|
||
|
** This file is provided "AS IS" with NO WARRANTY OF ANY KIND,
|
||
|
** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR
|
||
|
** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted
|
||
|
** herein.
|
||
|
**
|
||
|
**********************************************************************/
|
||
|
|
||
|
#include "qplatformdefs.h"
|
||
|
|
||
|
// UNIX Large File Support redefines open -> open64
|
||
|
#if defined(open)
|
||
|
# undef open
|
||
|
#endif
|
||
|
|
||
|
#include "qtextcodec.h"
|
||
|
#ifndef QT_NO_TEXTCODEC
|
||
|
|
||
|
#include "qvaluelist.h"
|
||
|
#include "qtextcodecfactory.h"
|
||
|
#include "qutfcodec.h"
|
||
|
#include "qnamespace.h"
|
||
|
#ifndef QT_NO_CODECS
|
||
|
#include "qrtlcodec.h"
|
||
|
#include "qtsciicodec.h"
|
||
|
#include "qisciicodec_p.h"
|
||
|
#endif // QT_NO_CODECS
|
||
|
#ifndef QT_NO_BIG_CODECS
|
||
|
#include "qbig5codec.h"
|
||
|
#include "qeucjpcodec.h"
|
||
|
#include "qeuckrcodec.h"
|
||
|
#include "qgb18030codec.h"
|
||
|
#include "qjiscodec.h"
|
||
|
#include "qjpunicode.h"
|
||
|
#include "qsjiscodec.h"
|
||
|
#endif // QT_NO_BIG_CODECS
|
||
|
#include "qfile.h"
|
||
|
#include "qstrlist.h"
|
||
|
#include "qstring.h"
|
||
|
#include "../tools/qlocale_p.h"
|
||
|
|
||
|
#if !defined(QT_NO_CODECS) && !defined(QT_NO_BIG_CODECS) && defined(Q_WS_X11)
|
||
|
# include "qfontcodecs_p.h"
|
||
|
#endif
|
||
|
|
||
|
#ifdef QT_THREAD_SUPPORT
|
||
|
# include <private/qmutexpool_p.h>
|
||
|
#endif // QT_THREAD_SUPPORT
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
#include <ctype.h>
|
||
|
#ifndef Q_OS_TEMP
|
||
|
#include <locale.h>
|
||
|
#endif
|
||
|
#if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX6)
|
||
|
#include <langinfo.h>
|
||
|
#endif
|
||
|
|
||
|
static TQValueList<TQTextCodec*> *all = 0;
|
||
|
static bool destroying_is_ok; // starts out as 0
|
||
|
static TQTextCodec * localeMapper = 0;
|
||
|
|
||
|
class TQTextCodecCleanup {
|
||
|
public:
|
||
|
~TQTextCodecCleanup() {
|
||
|
TQTextCodec::deleteAllCodecs();
|
||
|
}
|
||
|
};
|
||
|
static TQTextCodecCleanup qtextcodec_cleanup;
|
||
|
|
||
|
/*!
|
||
|
Deletes all the created codecs.
|
||
|
|
||
|
\warning Do not call this function.
|
||
|
|
||
|
TQApplication calls this function just before exiting to delete
|
||
|
any TQTextCodec objects that may be lying around. Since various
|
||
|
other classes hold pointers to TQTextCodec objects, it is not safe
|
||
|
to call this function earlier.
|
||
|
|
||
|
If you are using the utility classes (like TQString) but not using
|
||
|
TQApplication, calling this function at the very end of your
|
||
|
application may be helpful for chasing down memory leaks by
|
||
|
eliminating any TQTextCodec objects.
|
||
|
*/
|
||
|
|
||
|
void TQTextCodec::deleteAllCodecs()
|
||
|
{
|
||
|
if ( !all )
|
||
|
return;
|
||
|
|
||
|
#ifdef QT_THREAD_SUPPORT
|
||
|
TQMutexLocker locker( qt_global_mutexpool ?
|
||
|
qt_global_mutexpool->get( &all ) : 0 );
|
||
|
if ( !all )
|
||
|
return;
|
||
|
#endif // QT_THREAD_SUPPORT
|
||
|
|
||
|
destroying_is_ok = TRUE;
|
||
|
|
||
|
TQValueList<TQTextCodec*> *ball = all;
|
||
|
all = 0;
|
||
|
TQValueList<TQTextCodec*>::Iterator it;
|
||
|
for ( it = ball->begin(); it != ball->end(); ++it ) {
|
||
|
delete *it;
|
||
|
*it = 0;
|
||
|
}
|
||
|
ball->clear();
|
||
|
delete ball;
|
||
|
|
||
|
destroying_is_ok = FALSE;
|
||
|
}
|
||
|
|
||
|
|
||
|
static void realSetup();
|
||
|
|
||
|
|
||
|
static inline void setup()
|
||
|
{
|
||
|
if ( all ) return;
|
||
|
|
||
|
#ifdef QT_THREAD_SUPPORT
|
||
|
TQMutexLocker locker( qt_global_mutexpool ?
|
||
|
qt_global_mutexpool->get( &all ) : 0 );
|
||
|
if ( all ) return;
|
||
|
#endif // QT_THREAD_SUPPORT
|
||
|
|
||
|
realSetup();
|
||
|
}
|
||
|
|
||
|
|
||
|
class TQTextStatelessEncoder: public TQTextEncoder {
|
||
|
const TQTextCodec* codec;
|
||
|
public:
|
||
|
TQTextStatelessEncoder(const TQTextCodec*);
|
||
|
TQCString fromUnicode(const TQString& uc, int& lenInOut);
|
||
|
};
|
||
|
|
||
|
|
||
|
class TQTextStatelessDecoder : public TQTextDecoder {
|
||
|
const TQTextCodec* codec;
|
||
|
public:
|
||
|
TQTextStatelessDecoder(const TQTextCodec*);
|
||
|
TQString toUnicode(const char* chars, int len);
|
||
|
};
|
||
|
|
||
|
TQTextStatelessEncoder::TQTextStatelessEncoder(const TQTextCodec* c) :
|
||
|
codec(c)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
|
||
|
TQCString TQTextStatelessEncoder::fromUnicode(const TQString& uc, int& lenInOut)
|
||
|
{
|
||
|
return codec->fromUnicode(uc,lenInOut);
|
||
|
}
|
||
|
|
||
|
|
||
|
TQTextStatelessDecoder::TQTextStatelessDecoder(const TQTextCodec* c) :
|
||
|
codec(c)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
|
||
|
TQString TQTextStatelessDecoder::toUnicode(const char* chars, int len)
|
||
|
{
|
||
|
return codec->toUnicode(chars,len);
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/*!
|
||
|
\class TQTextCodec qtextcodec.h
|
||
|
\brief The TQTextCodec class provides conversion between text encodings.
|
||
|
\reentrant
|
||
|
\ingroup i18n
|
||
|
|
||
|
TQt uses Unicode to store, draw and manipulate strings. In many
|
||
|
situations you may wish to deal with data that uses a different
|
||
|
encoding. For example, most Japanese documents are still stored in
|
||
|
Shift-JIS or ISO2022, while Russian users often have their
|
||
|
documents in KOI8-R or CP1251.
|
||
|
|
||
|
TQt provides a set of TQTextCodec classes to help with converting
|
||
|
non-Unicode formats to and from Unicode. You can also create your
|
||
|
own codec classes (\link #subclassing see later\endlink).
|
||
|
|
||
|
The supported encodings are:
|
||
|
\list
|
||
|
\i Latin1
|
||
|
\i Big5 -- Chinese
|
||
|
\i Big5-HKSCS -- Chinese
|
||
|
\i eucJP -- Japanese
|
||
|
\i eucKR -- Korean
|
||
|
\i GB2312 -- Chinese
|
||
|
\i GBK -- Chinese
|
||
|
\i GB18030 -- Chinese
|
||
|
\i JIS7 -- Japanese
|
||
|
\i Shift-JIS -- Japanese
|
||
|
\i TSCII -- Tamil
|
||
|
\i utf8 -- Unicode, 8-bit
|
||
|
\i utf16 -- Unicode
|
||
|
\i KOI8-R -- Russian
|
||
|
\i KOI8-U -- Ukrainian
|
||
|
\i ISO8859-1 -- Western
|
||
|
\i ISO8859-2 -- Central European
|
||
|
\i ISO8859-3 -- Central European
|
||
|
\i ISO8859-4 -- Baltic
|
||
|
\i ISO8859-5 -- Cyrillic
|
||
|
\i ISO8859-6 -- Arabic
|
||
|
\i ISO8859-7 -- Greek
|
||
|
\i ISO8859-8 -- Hebrew, visually ordered
|
||
|
\i ISO8859-8-i -- Hebrew, logically ordered
|
||
|
\i ISO8859-9 -- Turkish
|
||
|
\i ISO8859-10
|
||
|
\i ISO8859-13
|
||
|
\i ISO8859-14
|
||
|
\i ISO8859-15 -- Western
|
||
|
\i IBM 850
|
||
|
\i IBM 866
|
||
|
\i CP874
|
||
|
\i CP1250 -- Central European
|
||
|
\i CP1251 -- Cyrillic
|
||
|
\i CP1252 -- Western
|
||
|
\i CP1253 -- Greek
|
||
|
\i CP1254 -- Turkish
|
||
|
\i CP1255 -- Hebrew
|
||
|
\i CP1256 -- Arabic
|
||
|
\i CP1257 -- Baltic
|
||
|
\i CP1258
|
||
|
\i Apple Roman
|
||
|
\i TIS-620 -- Thai
|
||
|
\endlist
|
||
|
|
||
|
TQTextCodecs can be used as follows to convert some locally encoded
|
||
|
string to Unicode. Suppose you have some string encoded in Russian
|
||
|
KOI8-R encoding, and want to convert it to Unicode. The simple way
|
||
|
to do this is:
|
||
|
|
||
|
\code
|
||
|
TQCString locallyEncoded = "..."; // text to convert
|
||
|
TQTextCodec *codec = TQTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R
|
||
|
TQString unicodeString = codec->toUnicode( locallyEncoded );
|
||
|
\endcode
|
||
|
|
||
|
After this, \c{unicodeString} holds the text converted to Unicode.
|
||
|
Converting a string from Unicode to the local encoding is just as
|
||
|
easy:
|
||
|
|
||
|
\code
|
||
|
TQString unicodeString = "..."; // any Unicode text
|
||
|
TQTextCodec *codec = TQTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R
|
||
|
TQCString locallyEncoded = codec->fromUnicode( unicodeString );
|
||
|
\endcode
|
||
|
|
||
|
Some care must be taken when trying to convert the data in chunks,
|
||
|
for example, when receiving it over a network. In such cases it is
|
||
|
possible that a multi-byte character will be split over two
|
||
|
chunks. At best this might result in the loss of a character and
|
||
|
at worst cause the entire conversion to fail.
|
||
|
|
||
|
The approach to use in these situations is to create a TQTextDecoder
|
||
|
object for the codec and use this TQTextDecoder for the whole
|
||
|
decoding process, as shown below:
|
||
|
|
||
|
\code
|
||
|
TQTextCodec *codec = TQTextCodec::codecForName( "Shift-JIS" );
|
||
|
TQTextDecoder *decoder = codec->makeDecoder();
|
||
|
|
||
|
TQString unicodeString;
|
||
|
while( receiving_data ) {
|
||
|
TQByteArray chunk = new_data;
|
||
|
unicodeString += decoder->toUnicode( chunk.data(), chunk.length() );
|
||
|
}
|
||
|
\endcode
|
||
|
|
||
|
The TQTextDecoder object maintains state between chunks and therefore
|
||
|
works correctly even if a multi-byte character is split between
|
||
|
chunks.
|
||
|
|
||
|
\target subclassing
|
||
|
\section1 Creating your own Codec class
|
||
|
|
||
|
Support for new text encodings can be added to TQt by creating
|
||
|
TQTextCodec subclasses.
|
||
|
|
||
|
Built-in codecs can be overridden by custom codecs since more
|
||
|
recently created TQTextCodec objects take precedence over earlier
|
||
|
ones.
|
||
|
|
||
|
You may find it more convenient to make your codec class available
|
||
|
as a plugin; see the \link plugins-howto.html plugin
|
||
|
documentation\endlink for more details.
|
||
|
|
||
|
The abstract virtual functions describe the encoder to the
|
||
|
system and the coder is used as retquired in the different
|
||
|
text file formats supported by TQTextStream, and under X11, for the
|
||
|
locale-specific character input and output.
|
||
|
|
||
|
To add support for another 8-bit encoding to TQt, make a subclass
|
||
|
of TQTextCodec and implement at least the following methods:
|
||
|
|
||
|
\code
|
||
|
const char* name() const
|
||
|
\endcode
|
||
|
Return the official name for the encoding.
|
||
|
|
||
|
\code
|
||
|
int mibEnum() const
|
||
|
\endcode
|
||
|
Return the MIB enum for the encoding if it is listed in the
|
||
|
\link http://www.iana.org/assignments/character-sets
|
||
|
IANA character-sets encoding file\endlink.
|
||
|
|
||
|
If the encoding is multi-byte then it will have "state"; that is,
|
||
|
the interpretation of some bytes will be dependent on some preceding
|
||
|
bytes. For such encodings, you must implement:
|
||
|
|
||
|
\code
|
||
|
TQTextDecoder* makeDecoder() const
|
||
|
\endcode
|
||
|
Return a TQTextDecoder that remembers incomplete multi-byte sequence
|
||
|
prefixes or other retquired state.
|
||
|
|
||
|
If the encoding does \e not retquire state, you should implement:
|
||
|
|
||
|
\code
|
||
|
TQString toUnicode(const char* chars, int len) const
|
||
|
\endcode
|
||
|
Converts \e len characters from \e chars to Unicode.
|
||
|
|
||
|
The base TQTextCodec class has default implementations of the above
|
||
|
two functions, \e{but they are mutually recursive}, so you must
|
||
|
re-implement at least one of them, or both for improved efficiency.
|
||
|
|
||
|
For conversion from Unicode to 8-bit encodings, it is rarely necessary
|
||
|
to maintain state. However, two functions similar to the two above
|
||
|
are used for encoding:
|
||
|
|
||
|
\code
|
||
|
TQTextEncoder* makeEncoder() const
|
||
|
\endcode
|
||
|
Return a TQTextEncoder.
|
||
|
|
||
|
\code
|
||
|
TQCString fromUnicode(const TQString& uc, int& lenInOut ) const
|
||
|
\endcode
|
||
|
Converts \e lenInOut characters (of type TQChar) from the start of
|
||
|
the string \e uc, returning a TQCString result, and also returning
|
||
|
the \link TQCString::length() length\endlink of the result in
|
||
|
\e lenInOut.
|
||
|
|
||
|
Again, these are mutually recursive so only one needs to be implemented,
|
||
|
or both if greater efficiency is possible.
|
||
|
|
||
|
Finally, you must implement:
|
||
|
|
||
|
\code
|
||
|
int heuristicContentMatch(const char* chars, int len) const
|
||
|
\endcode
|
||
|
Gives a value indicating how likely it is that \e len characters
|
||
|
from \e chars are in the encoding.
|
||
|
|
||
|
A good model for this function is the
|
||
|
TQWindowsLocalCodec::heuristicContentMatch function found in the TQt
|
||
|
sources.
|
||
|
|
||
|
A TQTextCodec subclass might have improved performance if you also
|
||
|
re-implement:
|
||
|
|
||
|
\code
|
||
|
bool canEncode( TQChar ) const
|
||
|
\endcode
|
||
|
Test if a Unicode character can be encoded.
|
||
|
|
||
|
\code
|
||
|
bool canEncode( const TQString& ) const
|
||
|
\endcode
|
||
|
Test if a string of Unicode characters can be encoded.
|
||
|
|
||
|
\code
|
||
|
int heuristicNameMatch(const char* hint) const
|
||
|
\endcode
|
||
|
Test if a possibly non-standard name is referring to the codec.
|
||
|
|
||
|
Codecs can also be created as \link plugins-howto.html plugins\endlink.
|
||
|
*/
|
||
|
|
||
|
|
||
|
/*!
|
||
|
\nonreentrant
|
||
|
|
||
|
Constructs a TQTextCodec, and gives it the highest precedence. The
|
||
|
TQTextCodec should always be constructed on the heap (i.e. with \c
|
||
|
new). TQt takes ownership and will delete it when the application
|
||
|
terminates.
|
||
|
*/
|
||
|
TQTextCodec::TQTextCodec()
|
||
|
{
|
||
|
setup();
|
||
|
all->insert( all->begin(), this );
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
\nonreentrant
|
||
|
|
||
|
Destroys the TQTextCodec. Note that you should not delete codecs
|
||
|
yourself: once created they become TQt's responsibility.
|
||
|
*/
|
||
|
TQTextCodec::~TQTextCodec()
|
||
|
{
|
||
|
if ( !destroying_is_ok )
|
||
|
qWarning("TQTextCodec::~TQTextCodec() called by application");
|
||
|
if ( all )
|
||
|
all->remove( this );
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
Returns a value indicating how likely it is that this decoder is
|
||
|
appropriate for decoding some format that has the given name. The
|
||
|
name is compared with the \a hint.
|
||
|
|
||
|
A good match returns a positive number around the length of the
|
||
|
string. A bad match is negative.
|
||
|
|
||
|
The default implementation calls simpleHeuristicNameMatch() with
|
||
|
the name of the codec.
|
||
|
*/
|
||
|
int TQTextCodec::heuristicNameMatch(const char* hint) const
|
||
|
{
|
||
|
return simpleHeuristicNameMatch(name(),hint);
|
||
|
}
|
||
|
|
||
|
|
||
|
// returns a string containing the letters and numbers from input,
|
||
|
// with a space separating run of a character class. e.g. "iso8859-1"
|
||
|
// becomes "iso 8859 1"
|
||
|
static TQString lettersAndNumbers( const char * input )
|
||
|
{
|
||
|
TQString result;
|
||
|
TQChar c;
|
||
|
|
||
|
while( input && *input ) {
|
||
|
c = *input;
|
||
|
if ( c.isLetter() || c.isNumber() )
|
||
|
result += c.lower();
|
||
|
if ( input[1] ) {
|
||
|
// add space at character class transition, except
|
||
|
// transition from upper-case to lower-case letter
|
||
|
TQChar n( input[1] );
|
||
|
if ( c.isLetter() && n.isLetter() ) {
|
||
|
if ( c == c.lower() && n == n.upper() )
|
||
|
result += ' ';
|
||
|
} else if ( c.category() != n.category() ) {
|
||
|
result += ' ';
|
||
|
}
|
||
|
}
|
||
|
input++;
|
||
|
}
|
||
|
return result.simplifyWhiteSpace();
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
A simple utility function for heuristicNameMatch(): it does some
|
||
|
very minor character-skipping so that almost-exact matches score
|
||
|
high. \a name is the text we're matching and \a hint is used for
|
||
|
the comparison.
|
||
|
*/
|
||
|
int TQTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)
|
||
|
{
|
||
|
// if they're the same, return a perfect score.
|
||
|
if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 )
|
||
|
return qstrlen( hint );
|
||
|
|
||
|
// if the letters and numbers are the same, we have an "almost"
|
||
|
// perfect match.
|
||
|
TQString h( lettersAndNumbers( hint ) );
|
||
|
TQString n( lettersAndNumbers( name ) );
|
||
|
if ( h == n )
|
||
|
return qstrlen( hint )-1;
|
||
|
|
||
|
if ( h.stripWhiteSpace() == n.stripWhiteSpace() )
|
||
|
return qstrlen( hint )-2;
|
||
|
|
||
|
// could do some more here, but I don't think it's worth it
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
Returns the TQTextCodec \a i positions from the most recently
|
||
|
inserted codec, or 0 if there is no such TQTextCodec. Thus,
|
||
|
codecForIndex(0) returns the most recently created TQTextCodec.
|
||
|
*/
|
||
|
TQTextCodec* TQTextCodec::codecForIndex(int i)
|
||
|
{
|
||
|
setup();
|
||
|
return (uint)i >= all->count() ? 0 : *all->at(i);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
Returns the TQTextCodec which matches the \link
|
||
|
TQTextCodec::mibEnum() MIBenum\endlink \a mib.
|
||
|
*/
|
||
|
TQTextCodec* TQTextCodec::codecForMib(int mib)
|
||
|
{
|
||
|
setup();
|
||
|
TQValueList<TQTextCodec*>::ConstIterator i;
|
||
|
TQTextCodec* result=0;
|
||
|
for ( i = all->begin(); i != all->end(); ++i ) {
|
||
|
result = *i;
|
||
|
if ( result->mibEnum()==mib )
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
#if !defined(QT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT)
|
||
|
if ( !result || (result && result->mibEnum() != mib) ) {
|
||
|
TQTextCodec *codec = TQTextCodecFactory::createForMib(mib);
|
||
|
if (codec)
|
||
|
result = codec;
|
||
|
}
|
||
|
#endif // !QT_NO_COMPONENT !QT_LITE_COMPONENT
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
#ifdef Q_OS_WIN32
|
||
|
class TQWindowsLocalCodec: public TQTextCodec
|
||
|
{
|
||
|
public:
|
||
|
TQWindowsLocalCodec();
|
||
|
~TQWindowsLocalCodec();
|
||
|
|
||
|
TQString toUnicode(const char* chars, int len) const;
|
||
|
TQCString fromUnicode(const TQString& uc, int& lenInOut ) const;
|
||
|
|
||
|
const char* name() const;
|
||
|
int mibEnum() const;
|
||
|
|
||
|
int heuristicContentMatch(const char* chars, int len) const;
|
||
|
|
||
|
TQTextDecoder* makeDecoder() const;
|
||
|
};
|
||
|
|
||
|
TQWindowsLocalCodec::TQWindowsLocalCodec()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
TQWindowsLocalCodec::~TQWindowsLocalCodec()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
|
||
|
TQString TQWindowsLocalCodec::toUnicode(const char* chars, int len) const
|
||
|
{
|
||
|
if ( len == 1 && chars ) { // Optimization; avoids allocation
|
||
|
char c[2];
|
||
|
c[0] = *chars;
|
||
|
c[1] = 0;
|
||
|
return qt_winMB2TQString( c, 2 );
|
||
|
}
|
||
|
if ( len < 0 )
|
||
|
return qt_winMB2TQString( chars );
|
||
|
TQCString s(chars,len+1);
|
||
|
return qt_winMB2TQString(s);
|
||
|
}
|
||
|
|
||
|
TQCString TQWindowsLocalCodec::fromUnicode(const TQString& uc, int& lenInOut ) const
|
||
|
{
|
||
|
TQCString r = qt_winTQString2MB( uc, lenInOut );
|
||
|
lenInOut = r.length();
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
|
||
|
const char* TQWindowsLocalCodec::name() const
|
||
|
{
|
||
|
return "System";
|
||
|
}
|
||
|
|
||
|
int TQWindowsLocalCodec::mibEnum() const
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
int TQWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const
|
||
|
{
|
||
|
// ### Not a bad default implementation?
|
||
|
TQString t = toUnicode(chars,len);
|
||
|
int l = t.length();
|
||
|
TQCString mb = fromUnicode(t,l);
|
||
|
int i=0;
|
||
|
while ( i < len ) {
|
||
|
if ( chars[i] == mb[i] )
|
||
|
i++;
|
||
|
else
|
||
|
break;
|
||
|
}
|
||
|
return i;
|
||
|
}
|
||
|
|
||
|
class TQWindowsLocalDecoder: public TQTextDecoder
|
||
|
{
|
||
|
const TQWindowsLocalCodec* codec;
|
||
|
int nbuf;
|
||
|
uchar buf[4]; // hopefully this will be enough
|
||
|
public:
|
||
|
TQWindowsLocalDecoder(const TQWindowsLocalCodec *c) : codec(c), nbuf(0)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
TQString toUnicode(const char* chars, int len)
|
||
|
{
|
||
|
if (len != 1 && nbuf == 0)
|
||
|
return codec->toUnicode(chars, len);
|
||
|
if (len == 1) {
|
||
|
char c[sizeof buf + 2];
|
||
|
memcpy(c, buf, nbuf);
|
||
|
c[nbuf] = *chars;
|
||
|
c[nbuf+1] = 0;
|
||
|
|
||
|
// try to decode this:
|
||
|
TQString retval = codec->toUnicode(c, -1);
|
||
|
if ( retval.isEmpty() ) {
|
||
|
// it didn't return anything; we probably stopped mid-way in a multi-byte
|
||
|
// character
|
||
|
buf[nbuf++] = *chars;
|
||
|
if (nbuf + 1 == sizeof buf) {
|
||
|
qWarning("TQWindowsLocalDecoder: exceeded max internal buffer size");
|
||
|
nbuf = 0;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
nbuf = 0; // decoded successfully
|
||
|
|
||
|
return retval;
|
||
|
}
|
||
|
|
||
|
if (len == -1)
|
||
|
len = (int)strlen(chars);
|
||
|
|
||
|
// Ugh! We need to allocate memory
|
||
|
char *s = new char[nbuf + len + 1];
|
||
|
memcpy(s, buf, nbuf);
|
||
|
memcpy(s + nbuf, chars, len);
|
||
|
s[nbuf + len] = 0;
|
||
|
|
||
|
TQString retval = codec->toUnicode(s, -1);
|
||
|
nbuf = 0;
|
||
|
delete[] s;
|
||
|
return retval;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
TQTextDecoder* TQWindowsLocalCodec::makeDecoder() const
|
||
|
{
|
||
|
return new TQWindowsLocalDecoder(this);
|
||
|
}
|
||
|
|
||
|
#else
|
||
|
|
||
|
/* locale names mostly copied from XFree86 */
|
||
|
static const char * const iso8859_2locales[] = {
|
||
|
"croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
|
||
|
"hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
|
||
|
"ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
|
||
|
"sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
|
||
|
|
||
|
static const char * const iso8859_3locales[] = {
|
||
|
"eo", 0 };
|
||
|
|
||
|
static const char * const iso8859_4locales[] = {
|
||
|
"ee", "ee_EE", 0 };
|
||
|
|
||
|
static const char * const iso8859_5locales[] = {
|
||
|
"mk", "mk_MK", "sp", "sp_YU", 0 };
|
||
|
|
||
|
static const char * const cp_1251locales[] = {
|
||
|
"be", "be_BY", "bg", "bg_BG", "bulgarian", 0 };
|
||
|
|
||
|
static const char * const pt_154locales[] = {
|
||
|
"ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 };
|
||
|
|
||
|
static const char * const iso8859_6locales[] = {
|
||
|
"ar_AA", "ar_SA", "arabic", 0 };
|
||
|
|
||
|
static const char * const iso8859_7locales[] = {
|
||
|
"el", "el_GR", "greek", 0 };
|
||
|
|
||
|
static const char * const iso8859_8locales[] = {
|
||
|
"hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
|
||
|
|
||
|
static const char * const iso8859_9locales[] = {
|
||
|
"tr", "tr_TR", "turkish", 0 };
|
||
|
|
||
|
static const char * const iso8859_13locales[] = {
|
||
|
"lt", "lt_LT", "lv", "lv_LV", 0 };
|
||
|
|
||
|
static const char * const iso8859_15locales[] = {
|
||
|
"et", "et_EE",
|
||
|
// Euro countries
|
||
|
"br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
|
||
|
"es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
|
||
|
"fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
|
||
|
"nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
|
||
|
0 };
|
||
|
|
||
|
static const char * const koi8_ulocales[] = {
|
||
|
"uk", "uk_UA", "ru_UA", "ukrainian", 0 };
|
||
|
|
||
|
static const char * const tis_620locales[] = {
|
||
|
"th", "th_TH", "thai", 0 };
|
||
|
|
||
|
static const char * const tcvnlocales[] = {
|
||
|
"vi", "vi_VN", 0 };
|
||
|
|
||
|
static bool try_locale_list( const char * const locale[], const char * lang )
|
||
|
{
|
||
|
int i;
|
||
|
for( i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++ )
|
||
|
;
|
||
|
return locale[i] != 0;
|
||
|
}
|
||
|
|
||
|
// For the probably_koi8_locales we have to look. the standard says
|
||
|
// these are 8859-5, but almost all Russian users use KOI8-R and
|
||
|
// incorrectly set $LANG to ru_RU. We'll check tolower() to see what
|
||
|
// tolower() thinks ru_RU means.
|
||
|
|
||
|
// If you read the history, it seems that many Russians blame ISO and
|
||
|
// Perestroika for the confusion.
|
||
|
//
|
||
|
// The real bug is that some programs break if the user specifies
|
||
|
// ru_RU.KOI8-R.
|
||
|
|
||
|
static const char * const probably_koi8_rlocales[] = {
|
||
|
"ru", "ru_SU", "ru_RU", "russian", 0 };
|
||
|
|
||
|
static TQTextCodec * ru_RU_hack( const char * i ) {
|
||
|
TQTextCodec * ru_RU_codec = 0;
|
||
|
|
||
|
TQCString origlocale = setlocale( LC_CTYPE, i );
|
||
|
// unicode koi8r latin5 name
|
||
|
// 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
|
||
|
// 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
|
||
|
int latin5 = tolower( 0xCE );
|
||
|
int koi8r = tolower( 0xE0 );
|
||
|
if ( koi8r == 0xC0 && latin5 != 0xEE ) {
|
||
|
ru_RU_codec = TQTextCodec::codecForName( "KOI8-R" );
|
||
|
} else if ( koi8r != 0xC0 && latin5 == 0xEE ) {
|
||
|
ru_RU_codec = TQTextCodec::codecForName( "ISO 8859-5" );
|
||
|
} else {
|
||
|
// something else again... let's assume... *throws dice*
|
||
|
ru_RU_codec = TQTextCodec::codecForName( "KOI8-R" );
|
||
|
qWarning( "TQTextCodec: using KOI8-R, probe failed (%02x %02x %s)",
|
||
|
koi8r, latin5, i );
|
||
|
}
|
||
|
setlocale( LC_CTYPE, origlocale.data() );
|
||
|
|
||
|
return ru_RU_codec;
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
|
||
|
/*!
|
||
|
Set the codec to \a c; this will be returned by codecForLocale().
|
||
|
This might be needed for some applications that want to use their
|
||
|
own mechanism for setting the locale.
|
||
|
|
||
|
\sa codecForLocale()
|
||
|
*/
|
||
|
void TQTextCodec::setCodecForLocale(TQTextCodec *c) {
|
||
|
localeMapper = c;
|
||
|
}
|
||
|
|
||
|
/*! Returns a pointer to the codec most suitable for this locale. */
|
||
|
|
||
|
TQTextCodec* TQTextCodec::codecForLocale()
|
||
|
{
|
||
|
if ( localeMapper )
|
||
|
return localeMapper;
|
||
|
|
||
|
setup();
|
||
|
|
||
|
return localeMapper;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
Searches all installed TQTextCodec objects and returns the one
|
||
|
which best matches \a name; the match is case-insensitive. Returns
|
||
|
0 if no codec's heuristicNameMatch() reports a match better than
|
||
|
\a accuracy, or if \a name is a null string.
|
||
|
|
||
|
\sa heuristicNameMatch()
|
||
|
*/
|
||
|
|
||
|
TQTextCodec* TQTextCodec::codecForName( const char* name, int accuracy )
|
||
|
{
|
||
|
if ( !name || !*name )
|
||
|
return 0;
|
||
|
|
||
|
setup();
|
||
|
TQValueList<TQTextCodec*>::ConstIterator i;
|
||
|
TQTextCodec* result = 0;
|
||
|
int best = accuracy;
|
||
|
TQTextCodec* cursor;
|
||
|
for ( i = all->begin(); i != all->end(); ++i ) {
|
||
|
cursor = *i;
|
||
|
int s = cursor->heuristicNameMatch( name );
|
||
|
if ( s > best ) {
|
||
|
best = s;
|
||
|
result = cursor;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if !defined(QT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT)
|
||
|
if ( !result )
|
||
|
result = TQTextCodecFactory::createForName(name);
|
||
|
#endif // !QT_NO_COMPONENT !QT_LITE_COMPONENT
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
Searches all installed TQTextCodec objects, returning the one which
|
||
|
most recognizes the given content. May return 0.
|
||
|
|
||
|
Note that this is often a poor choice, since character encodings
|
||
|
often use most of the available character sequences, and so only
|
||
|
by linguistic analysis could a true match be made.
|
||
|
|
||
|
\a chars contains the string to check, and \a len contains the
|
||
|
number of characters in the string to use.
|
||
|
|
||
|
\sa heuristicContentMatch()
|
||
|
*/
|
||
|
TQTextCodec* TQTextCodec::codecForContent(const char* chars, int len)
|
||
|
{
|
||
|
setup();
|
||
|
TQValueList<TQTextCodec*>::ConstIterator i;
|
||
|
TQTextCodec* result = 0;
|
||
|
int best=0;
|
||
|
TQTextCodec* cursor;
|
||
|
for ( i = all->begin(); i != all->end(); ++i ) {
|
||
|
cursor = *i;
|
||
|
int s = cursor->heuristicContentMatch(chars,len);
|
||
|
if ( s > best ) {
|
||
|
best = s;
|
||
|
result = cursor;
|
||
|
}
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
\fn const char* TQTextCodec::name() const
|
||
|
|
||
|
TQTextCodec subclasses must reimplement this function. It returns
|
||
|
the name of the encoding supported by the subclass. When choosing
|
||
|
a name for an encoding, consider these points:
|
||
|
\list
|
||
|
\i On X11, heuristicNameMatch( const char * hint )
|
||
|
is used to test if a the TQTextCodec
|
||
|
can convert between Unicode and the encoding of a font
|
||
|
with encoding \e hint, such as "iso8859-1" for Latin-1 fonts,
|
||
|
"koi8-r" for Russian KOI8 fonts.
|
||
|
The default algorithm of heuristicNameMatch() uses name().
|
||
|
\i Some applications may use this function to present
|
||
|
encodings to the end user.
|
||
|
\endlist
|
||
|
*/
|
||
|
|
||
|
/*!
|
||
|
\fn int TQTextCodec::mibEnum() const
|
||
|
|
||
|
Subclasses of TQTextCodec must reimplement this function. It
|
||
|
returns the MIBenum (see \link
|
||
|
http://www.iana.org/assignments/character-sets the
|
||
|
IANA character-sets encoding file\endlink for more information).
|
||
|
It is important that each TQTextCodec subclass returns the correct
|
||
|
unique value for this function.
|
||
|
*/
|
||
|
|
||
|
|
||
|
/*!
|
||
|
Returns the preferred mime name of the encoding as defined in the
|
||
|
\link http://www.iana.org/assignments/character-sets
|
||
|
IANA character-sets encoding file\endlink.
|
||
|
*/
|
||
|
const char* TQTextCodec::mimeName() const
|
||
|
{
|
||
|
return name();
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
\fn int TQTextCodec::heuristicContentMatch(const char* chars, int len) const
|
||
|
|
||
|
TQTextCodec subclasses must reimplement this function. It examines
|
||
|
the first \a len bytes of \a chars and returns a value indicating
|
||
|
how likely it is that the string is a prefix of text encoded in
|
||
|
the encoding of the subclass. A negative return value indicates
|
||
|
that the text is detectably not in the encoding (e.g. it contains
|
||
|
characters undefined in the encoding). A return value of 0
|
||
|
indicates that the text should be decoded with this codec rather
|
||
|
than as ASCII, but there is no particular evidence. The value
|
||
|
should range up to \a len. Thus, most decoders will return -1, 0,
|
||
|
or -\a len.
|
||
|
|
||
|
The characters are not null terminated.
|
||
|
|
||
|
\sa codecForContent().
|
||
|
*/
|
||
|
|
||
|
|
||
|
/*!
|
||
|
Creates a TQTextDecoder which stores enough state to decode chunks
|
||
|
of char* data to create chunks of Unicode data. The default
|
||
|
implementation creates a stateless decoder, which is only
|
||
|
sufficient for the simplest encodings where each byte corresponds
|
||
|
to exactly one Unicode character.
|
||
|
|
||
|
The caller is responsible for deleting the returned object.
|
||
|
*/
|
||
|
TQTextDecoder* TQTextCodec::makeDecoder() const
|
||
|
{
|
||
|
return new TQTextStatelessDecoder(this);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
Creates a TQTextEncoder which stores enough state to encode chunks
|
||
|
of Unicode data as char* data. The default implementation creates
|
||
|
a stateless encoder, which is only sufficient for the simplest
|
||
|
encodings where each Unicode character corresponds to exactly one
|
||
|
character.
|
||
|
|
||
|
The caller is responsible for deleting the returned object.
|
||
|
*/
|
||
|
TQTextEncoder* TQTextCodec::makeEncoder() const
|
||
|
{
|
||
|
return new TQTextStatelessEncoder(this);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
TQTextCodec subclasses must reimplement this function or
|
||
|
makeDecoder(). It converts the first \a len characters of \a chars
|
||
|
to Unicode.
|
||
|
|
||
|
The default implementation makes a decoder with makeDecoder() and
|
||
|
converts the input with that. Note that the default makeDecoder()
|
||
|
implementation makes a decoder that simply calls
|
||
|
this function, hence subclasses \e must reimplement one function or
|
||
|
the other to avoid infinite recursion.
|
||
|
*/
|
||
|
TQString TQTextCodec::toUnicode(const char* chars, int len) const
|
||
|
{
|
||
|
if ( chars == 0 )
|
||
|
return TQString::null;
|
||
|
TQTextDecoder* i = makeDecoder();
|
||
|
TQString result = i->toUnicode(chars,len);
|
||
|
delete i;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
TQTextCodec subclasses must reimplement either this function or
|
||
|
makeEncoder(). It converts the first \a lenInOut characters of \a
|
||
|
uc from Unicode to the encoding of the subclass. If \a lenInOut is
|
||
|
negative or too large, the length of \a uc is used instead.
|
||
|
|
||
|
Converts \a lenInOut characters (not bytes) from \a uc, producing
|
||
|
a TQCString. \a lenInOut will be set to the \link
|
||
|
TQCString::length() length\endlink of the result (in bytes).
|
||
|
|
||
|
The default implementation makes an encoder with makeEncoder() and
|
||
|
converts the input with that. Note that the default makeEncoder()
|
||
|
implementation makes an encoder that simply calls this function,
|
||
|
hence subclasses \e must reimplement one function or the other to
|
||
|
avoid infinite recursion.
|
||
|
*/
|
||
|
|
||
|
TQCString TQTextCodec::fromUnicode(const TQString& uc, int& lenInOut) const
|
||
|
{
|
||
|
TQTextEncoder* i = makeEncoder();
|
||
|
TQCString result = i->fromUnicode(uc, lenInOut);
|
||
|
delete i;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
\overload
|
||
|
\internal
|
||
|
*/
|
||
|
TQByteArray TQTextCodec::fromUnicode( const TQString &str, int pos, int len ) const
|
||
|
{
|
||
|
TQByteArray a;
|
||
|
if( len < 0 )
|
||
|
len = str.length() - pos;
|
||
|
a = fromUnicode( str.mid(pos, len) );
|
||
|
if( a.size() > 0 && a[(int)a.size() - 1] == '\0' )
|
||
|
a.resize( a.size() - 1 );
|
||
|
return a;
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
\overload
|
||
|
|
||
|
\a uc is the unicode source string.
|
||
|
*/
|
||
|
TQCString TQTextCodec::fromUnicode(const TQString& uc) const
|
||
|
{
|
||
|
int l = uc.length();
|
||
|
return fromUnicode(uc,l);
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
\overload
|
||
|
|
||
|
\a a contains the source characters; \a len contains the number of
|
||
|
characters in \a a to use.
|
||
|
*/
|
||
|
TQString TQTextCodec::toUnicode(const TQByteArray& a, int len) const
|
||
|
{
|
||
|
int l = a.size();
|
||
|
l = TQMIN( l, len );
|
||
|
return toUnicode( a.data(), l );
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
\overload
|
||
|
|
||
|
\a a contains the source characters.
|
||
|
*/
|
||
|
TQString TQTextCodec::toUnicode(const TQByteArray& a) const
|
||
|
{
|
||
|
int l = a.size();
|
||
|
return toUnicode( a.data(), l );
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
\overload
|
||
|
|
||
|
\a a contains the source characters; \a len contains the number of
|
||
|
characters in \a a to use.
|
||
|
*/
|
||
|
TQString TQTextCodec::toUnicode(const TQCString& a, int len) const
|
||
|
{
|
||
|
int l = a.length();
|
||
|
l = TQMIN( l, len );
|
||
|
return toUnicode( a.data(), l );
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
\overload
|
||
|
|
||
|
\a a contains the source characters.
|
||
|
*/
|
||
|
TQString TQTextCodec::toUnicode(const TQCString& a) const
|
||
|
{
|
||
|
int l = a.length();
|
||
|
return toUnicode( a.data(), l );
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
\overload
|
||
|
|
||
|
\a chars contains the source characters.
|
||
|
*/
|
||
|
TQString TQTextCodec::toUnicode(const char* chars) const
|
||
|
{
|
||
|
return toUnicode(chars,qstrlen(chars));
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
\internal
|
||
|
*/
|
||
|
unsigned short TQTextCodec::characterFromUnicode(const TQString &str, int pos) const
|
||
|
{
|
||
|
TQCString result = TQTextCodec::fromUnicode(TQString(str[pos]));
|
||
|
uchar *ch = (uchar *) result.data();
|
||
|
ushort retval = 0;
|
||
|
if (result.size() > 2) {
|
||
|
retval = (ushort) *ch << 8;
|
||
|
ch++;
|
||
|
}
|
||
|
return retval + *ch;
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
Returns TRUE if the Unicode character \a ch can be fully encoded
|
||
|
with this codec; otherwise returns FALSE. The default
|
||
|
implementation tests if the result of toUnicode(fromUnicode(ch))
|
||
|
is the original \a ch. Subclasses may be able to improve the
|
||
|
efficiency.
|
||
|
*/
|
||
|
bool TQTextCodec::canEncode( TQChar ch ) const
|
||
|
{
|
||
|
return toUnicode(fromUnicode(ch)) == ch;
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
\overload
|
||
|
|
||
|
\a s contains the string being tested for encode-ability.
|
||
|
*/
|
||
|
bool TQTextCodec::canEncode( const TQString& s ) const
|
||
|
{
|
||
|
if ( s.isEmpty() )
|
||
|
return TRUE;
|
||
|
return toUnicode(fromUnicode(s)) == s;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/*!
|
||
|
\class TQTextEncoder qtextcodec.h
|
||
|
\brief The TQTextEncoder class provides a state-based encoder.
|
||
|
\reentrant
|
||
|
\ingroup i18n
|
||
|
|
||
|
The encoder converts Unicode into another format, remembering any
|
||
|
state that is retquired between calls.
|
||
|
|
||
|
\sa TQTextCodec::makeEncoder()
|
||
|
*/
|
||
|
|
||
|
/*!
|
||
|
Destroys the encoder.
|
||
|
*/
|
||
|
TQTextEncoder::~TQTextEncoder()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
\fn TQCString TQTextEncoder::fromUnicode(const TQString& uc, int& lenInOut)
|
||
|
|
||
|
Converts \a lenInOut characters (not bytes) from \a uc, producing
|
||
|
a TQCString. \a lenInOut will be set to the \link
|
||
|
TQCString::length() length\endlink of the result (in bytes).
|
||
|
|
||
|
The encoder is free to record state to use when subsequent calls
|
||
|
are made to this function (for example, it might change modes with
|
||
|
escape sequences if needed during the encoding of one string, then
|
||
|
assume that mode applies when a subsequent call begins).
|
||
|
*/
|
||
|
|
||
|
/*!
|
||
|
\class TQTextDecoder qtextcodec.h
|
||
|
\brief The TQTextDecoder class provides a state-based decoder.
|
||
|
\reentrant
|
||
|
\ingroup i18n
|
||
|
|
||
|
The decoder converts a text format into Unicode, remembering any
|
||
|
state that is retquired between calls.
|
||
|
|
||
|
\sa TQTextCodec::makeEncoder()
|
||
|
*/
|
||
|
|
||
|
|
||
|
/*!
|
||
|
Destroys the decoder.
|
||
|
*/
|
||
|
TQTextDecoder::~TQTextDecoder()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
\fn TQString TQTextDecoder::toUnicode(const char* chars, int len)
|
||
|
|
||
|
Converts the first \a len bytes in \a chars to Unicode, returning
|
||
|
the result.
|
||
|
|
||
|
If not all characters are used (e.g. if only part of a multi-byte
|
||
|
encoding is at the end of the characters), the decoder remembers
|
||
|
enough state to continue with the next call to this function.
|
||
|
*/
|
||
|
|
||
|
#define CHAINED 0xffff
|
||
|
|
||
|
struct TQMultiByteUnicodeTable {
|
||
|
// If multiByte, ignore unicode and index into multiByte
|
||
|
// with the next character.
|
||
|
TQMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { }
|
||
|
|
||
|
~TQMultiByteUnicodeTable()
|
||
|
{
|
||
|
if ( multiByte )
|
||
|
delete [] multiByte;
|
||
|
}
|
||
|
|
||
|
ushort unicode;
|
||
|
TQMultiByteUnicodeTable* multiByte;
|
||
|
};
|
||
|
|
||
|
static int getByte(char* &cursor)
|
||
|
{
|
||
|
int byte = 0;
|
||
|
if ( *cursor ) {
|
||
|
if ( cursor[1] == 'x' )
|
||
|
byte = strtol(cursor+2,&cursor,16);
|
||
|
else if ( cursor[1] == 'd' )
|
||
|
byte = strtol(cursor+2,&cursor,10);
|
||
|
else
|
||
|
byte = strtol(cursor+2,&cursor,8);
|
||
|
}
|
||
|
return byte&0xff;
|
||
|
}
|
||
|
|
||
|
class TQTextCodecFromIOD;
|
||
|
|
||
|
class TQTextCodecFromIODDecoder : public TQTextDecoder {
|
||
|
const TQTextCodecFromIOD* codec;
|
||
|
TQMultiByteUnicodeTable* mb;
|
||
|
public:
|
||
|
TQTextCodecFromIODDecoder(const TQTextCodecFromIOD* c);
|
||
|
TQString toUnicode(const char* chars, int len);
|
||
|
};
|
||
|
|
||
|
class TQTextCodecFromIOD : public TQTextCodec {
|
||
|
friend class TQTextCodecFromIODDecoder;
|
||
|
|
||
|
TQCString n;
|
||
|
|
||
|
// If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte,
|
||
|
// use from_unicode_page_multiByte[row()][cell()] as string.
|
||
|
char** from_unicode_page;
|
||
|
char*** from_unicode_page_multiByte;
|
||
|
char unkn;
|
||
|
|
||
|
// Only one of these is used
|
||
|
ushort* to_unicode;
|
||
|
TQMultiByteUnicodeTable* to_unicode_multiByte;
|
||
|
int max_bytes_per_char;
|
||
|
TQStrList aliases;
|
||
|
|
||
|
bool stateless() const { return !to_unicode_multiByte; }
|
||
|
|
||
|
public:
|
||
|
TQTextCodecFromIOD(TQIODevice* iod)
|
||
|
{
|
||
|
from_unicode_page = 0;
|
||
|
to_unicode_multiByte = 0;
|
||
|
to_unicode = 0;
|
||
|
from_unicode_page_multiByte = 0;
|
||
|
max_bytes_per_char = 1;
|
||
|
|
||
|
const int maxlen=100;
|
||
|
char line[maxlen];
|
||
|
char esc='\\';
|
||
|
char comm='%';
|
||
|
bool incmap = FALSE;
|
||
|
while (iod->readLine(line,maxlen) > 0) {
|
||
|
if (0==qstrnicmp(line,"<code_set_name>",15))
|
||
|
n = line+15;
|
||
|
else if (0==qstrnicmp(line,"<escape_char> ",14))
|
||
|
esc = line[14];
|
||
|
else if (0==qstrnicmp(line,"<comment_char> ",15))
|
||
|
comm = line[15];
|
||
|
else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {
|
||
|
aliases.append(line+8);
|
||
|
} else if (0==qstrnicmp(line,"CHARMAP",7)) {
|
||
|
if (!from_unicode_page) {
|
||
|
from_unicode_page = new char*[256];
|
||
|
for (int i=0; i<256; i++)
|
||
|
from_unicode_page[i]=0;
|
||
|
}
|
||
|
if (!to_unicode) {
|
||
|
to_unicode = new ushort[256];
|
||
|
}
|
||
|
incmap = TRUE;
|
||
|
} else if (0==qstrnicmp(line,"END CHARMAP",11))
|
||
|
break;
|
||
|
else if (incmap) {
|
||
|
char* cursor = line;
|
||
|
int byte=-1,unicode=-1;
|
||
|
ushort* mb_unicode=0;
|
||
|
const int maxmb=8; // more -> we'll need to improve datastructures
|
||
|
char mb[maxmb+1];
|
||
|
int nmb=0;
|
||
|
|
||
|
while (*cursor) {
|
||
|
if (cursor[0]=='<' && cursor[1]=='U' &&
|
||
|
cursor[2]>='0' && cursor[2]<='9' &&
|
||
|
cursor[3]>='0' && cursor[3]<='9') {
|
||
|
|
||
|
unicode = strtol(cursor+2,&cursor,16);
|
||
|
|
||
|
} else if (*cursor==esc) {
|
||
|
|
||
|
byte = getByte(cursor);
|
||
|
|
||
|
if ( *cursor == esc ) {
|
||
|
if ( !to_unicode_multiByte ) {
|
||
|
to_unicode_multiByte =
|
||
|
new TQMultiByteUnicodeTable[256];
|
||
|
for (int i=0; i<256; i++) {
|
||
|
to_unicode_multiByte[i].unicode =
|
||
|
to_unicode[i];
|
||
|
to_unicode_multiByte[i].multiByte = 0;
|
||
|
}
|
||
|
delete [] to_unicode;
|
||
|
to_unicode = 0;
|
||
|
}
|
||
|
TQMultiByteUnicodeTable* mbut =
|
||
|
to_unicode_multiByte+byte;
|
||
|
mb[nmb++] = byte;
|
||
|
while ( nmb < maxmb && *cursor == esc ) {
|
||
|
// Always at least once
|
||
|
|
||
|
mbut->unicode = CHAINED;
|
||
|
byte = getByte(cursor);
|
||
|
mb[nmb++] = byte;
|
||
|
if (!mbut->multiByte) {
|
||
|
mbut->multiByte =
|
||
|
new TQMultiByteUnicodeTable[256];
|
||
|
}
|
||
|
mbut = mbut->multiByte+byte;
|
||
|
mb_unicode = & mbut->unicode;
|
||
|
}
|
||
|
|
||
|
if ( nmb > max_bytes_per_char )
|
||
|
max_bytes_per_char = nmb;
|
||
|
}
|
||
|
} else {
|
||
|
cursor++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (unicode >= 0 && unicode <= 0xffff)
|
||
|
{
|
||
|
TQChar ch((ushort)unicode);
|
||
|
if (!from_unicode_page[ch.row()]) {
|
||
|
from_unicode_page[ch.row()] = new char[256];
|
||
|
for (int i=0; i<256; i++)
|
||
|
from_unicode_page[ch.row()][i]=0;
|
||
|
}
|
||
|
if ( mb_unicode ) {
|
||
|
from_unicode_page[ch.row()][ch.cell()] = 0;
|
||
|
if (!from_unicode_page_multiByte) {
|
||
|
from_unicode_page_multiByte = new char**[256];
|
||
|
for (int i=0; i<256; i++)
|
||
|
from_unicode_page_multiByte[i]=0;
|
||
|
}
|
||
|
if (!from_unicode_page_multiByte[ch.row()]) {
|
||
|
from_unicode_page_multiByte[ch.row()] = new char*[256];
|
||
|
for (int i=0; i<256; i++)
|
||
|
from_unicode_page_multiByte[ch.row()][i] = 0;
|
||
|
}
|
||
|
mb[nmb++] = 0;
|
||
|
from_unicode_page_multiByte[ch.row()][ch.cell()]
|
||
|
= qstrdup(mb);
|
||
|
*mb_unicode = unicode;
|
||
|
} else {
|
||
|
from_unicode_page[ch.row()][ch.cell()] = (char)byte;
|
||
|
if ( to_unicode )
|
||
|
to_unicode[byte] = unicode;
|
||
|
else
|
||
|
to_unicode_multiByte[byte].unicode = unicode;
|
||
|
}
|
||
|
} else {
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
n = n.stripWhiteSpace();
|
||
|
|
||
|
unkn = '?'; // ##### Might be a bad choice.
|
||
|
}
|
||
|
|
||
|
~TQTextCodecFromIOD()
|
||
|
{
|
||
|
if ( from_unicode_page ) {
|
||
|
for (int i=0; i<256; i++)
|
||
|
if (from_unicode_page[i])
|
||
|
delete [] from_unicode_page[i];
|
||
|
}
|
||
|
if ( from_unicode_page_multiByte ) {
|
||
|
for (int i=0; i<256; i++)
|
||
|
if (from_unicode_page_multiByte[i])
|
||
|
for (int j=0; j<256; j++)
|
||
|
if (from_unicode_page_multiByte[i][j])
|
||
|
delete [] from_unicode_page_multiByte[i][j];
|
||
|
}
|
||
|
if ( to_unicode )
|
||
|
delete [] to_unicode;
|
||
|
if ( to_unicode_multiByte )
|
||
|
delete [] to_unicode_multiByte;
|
||
|
}
|
||
|
|
||
|
bool ok() const
|
||
|
{
|
||
|
return !!from_unicode_page;
|
||
|
}
|
||
|
|
||
|
TQTextDecoder* makeDecoder() const
|
||
|
{
|
||
|
if ( stateless() )
|
||
|
return TQTextCodec::makeDecoder();
|
||
|
else
|
||
|
return new TQTextCodecFromIODDecoder(this);
|
||
|
}
|
||
|
|
||
|
const char* name() const
|
||
|
{
|
||
|
return n;
|
||
|
}
|
||
|
|
||
|
int mibEnum() const
|
||
|
{
|
||
|
return 0; // #### Unknown.
|
||
|
}
|
||
|
|
||
|
int heuristicContentMatch(const char*, int) const
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
int heuristicNameMatch(const char* hint) const
|
||
|
{
|
||
|
int bestr = TQTextCodec::heuristicNameMatch(hint);
|
||
|
TQStrListIterator it(aliases);
|
||
|
char* a;
|
||
|
while ((a=it.current())) {
|
||
|
++it;
|
||
|
int r = simpleHeuristicNameMatch(a,hint);
|
||
|
if (r > bestr)
|
||
|
bestr = r;
|
||
|
}
|
||
|
return bestr;
|
||
|
}
|
||
|
|
||
|
TQString toUnicode(const char* chars, int len) const
|
||
|
{
|
||
|
const uchar* uchars = (const uchar*)chars;
|
||
|
TQString result;
|
||
|
TQMultiByteUnicodeTable* multiByte=to_unicode_multiByte;
|
||
|
if ( multiByte ) {
|
||
|
while (len--) {
|
||
|
TQMultiByteUnicodeTable& mb = multiByte[*uchars];
|
||
|
if ( mb.multiByte ) {
|
||
|
// Chained multi-byte
|
||
|
multiByte = mb.multiByte;
|
||
|
} else {
|
||
|
result += TQChar(mb.unicode);
|
||
|
multiByte=to_unicode_multiByte;
|
||
|
}
|
||
|
uchars++;
|
||
|
}
|
||
|
} else {
|
||
|
while (len--)
|
||
|
result += TQChar(to_unicode[*uchars++]);
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
#if !defined(Q_NO_USING_KEYWORD)
|
||
|
using TQTextCodec::fromUnicode;
|
||
|
#endif
|
||
|
TQCString fromUnicode(const TQString& uc, int& lenInOut) const
|
||
|
{
|
||
|
if (lenInOut > (int)uc.length())
|
||
|
lenInOut = uc.length();
|
||
|
int rlen = lenInOut*max_bytes_per_char;
|
||
|
TQCString rstr(rlen+1);
|
||
|
char* cursor = rstr.data();
|
||
|
char* s=0;
|
||
|
int l = lenInOut;
|
||
|
int lout = 0;
|
||
|
for (int i=0; i<l; i++) {
|
||
|
TQChar ch = uc[i];
|
||
|
if ( ch == TQChar::null ) {
|
||
|
// special
|
||
|
*cursor++ = 0;
|
||
|
} else if ( from_unicode_page[ch.row()] &&
|
||
|
from_unicode_page[ch.row()][ch.cell()] )
|
||
|
{
|
||
|
*cursor++ = from_unicode_page[ch.row()][ch.cell()];
|
||
|
lout++;
|
||
|
} else if ( from_unicode_page_multiByte &&
|
||
|
from_unicode_page_multiByte[ch.row()] &&
|
||
|
(s=from_unicode_page_multiByte[ch.row()][ch.cell()]) )
|
||
|
{
|
||
|
while (*s) {
|
||
|
*cursor++ = *s++;
|
||
|
lout++;
|
||
|
}
|
||
|
} else {
|
||
|
*cursor++ = unkn;
|
||
|
lout++;
|
||
|
}
|
||
|
}
|
||
|
*cursor = 0;
|
||
|
lenInOut = lout;
|
||
|
return rstr;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
TQTextCodecFromIODDecoder::TQTextCodecFromIODDecoder(const TQTextCodecFromIOD* c) :
|
||
|
codec(c)
|
||
|
{
|
||
|
mb = codec->to_unicode_multiByte;
|
||
|
}
|
||
|
|
||
|
TQString TQTextCodecFromIODDecoder::toUnicode(const char* chars, int len)
|
||
|
{
|
||
|
const uchar* uchars = (const uchar*)chars;
|
||
|
TQString result;
|
||
|
while (len--) {
|
||
|
TQMultiByteUnicodeTable& t = mb[*uchars];
|
||
|
if ( t.multiByte ) {
|
||
|
// Chained multi-byte
|
||
|
mb = t.multiByte;
|
||
|
} else {
|
||
|
if ( t.unicode )
|
||
|
result += TQChar(t.unicode);
|
||
|
mb=codec->to_unicode_multiByte;
|
||
|
}
|
||
|
uchars++;
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
#ifndef QT_NO_CODECS
|
||
|
// Cannot use <pre> or \code
|
||
|
/*!
|
||
|
Reads a POSIX2 charmap definition from \a iod.
|
||
|
The parser recognizes the following lines:
|
||
|
|
||
|
<font name="sans">
|
||
|
<code_set_name> <i>name</i></br>
|
||
|
<escape_char> <i>character</i></br>
|
||
|
% alias <i>alias</i></br>
|
||
|
CHARMAP</br>
|
||
|
<<i>token</i>> /x<i>hexbyte</i> <U<i>unicode</i>> ...</br>
|
||
|
<<i>token</i>> /d<i>decbyte</i> <U<i>unicode</i>> ...</br>
|
||
|
<<i>token</i>> /<i>octbyte</i> <U<i>unicode</i>> ...</br>
|
||
|
<<i>token</i>> /<i>any</i>/<i>any</i>... <U<i>unicode</i>> ...</br>
|
||
|
END CHARMAP</br>
|
||
|
</font>
|
||
|
|
||
|
The resulting TQTextCodec is returned (and also added to the global
|
||
|
list of codecs). The name() of the result is taken from the
|
||
|
code_set_name.
|
||
|
|
||
|
Note that a codec constructed in this way uses much more memory
|
||
|
and is slower than a hand-written TQTextCodec subclass, since
|
||
|
tables in code are kept in memory shared by all TQt applications.
|
||
|
|
||
|
\sa loadCharmapFile()
|
||
|
*/
|
||
|
TQTextCodec* TQTextCodec::loadCharmap(TQIODevice* iod)
|
||
|
{
|
||
|
TQTextCodecFromIOD* r = new TQTextCodecFromIOD(iod);
|
||
|
if ( !r->ok() ) {
|
||
|
delete r;
|
||
|
r = 0;
|
||
|
}
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
/*!
|
||
|
A convenience function for loadCharmap() that loads the charmap
|
||
|
definition from the file \a filename.
|
||
|
*/
|
||
|
TQTextCodec* TQTextCodec::loadCharmapFile(TQString filename)
|
||
|
{
|
||
|
TQFile f(filename);
|
||
|
if (f.open(IO_ReadOnly)) {
|
||
|
TQTextCodecFromIOD* r = new TQTextCodecFromIOD(&f);
|
||
|
if ( !r->ok() )
|
||
|
delete r;
|
||
|
else
|
||
|
return r;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
#endif //QT_NO_CODECS
|
||
|
|
||
|
/*!
|
||
|
Returns a string representing the current language and
|
||
|
sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
|
||
|
*/
|
||
|
|
||
|
const char* TQTextCodec::locale()
|
||
|
{
|
||
|
return TQLocalePrivate::systemLocaleName();
|
||
|
}
|
||
|
|
||
|
#ifndef QT_NO_CODECS
|
||
|
|
||
|
class TQSimpleTextCodec: public TQTextCodec
|
||
|
{
|
||
|
public:
|
||
|
TQSimpleTextCodec( int );
|
||
|
~TQSimpleTextCodec();
|
||
|
|
||
|
TQString toUnicode(const char* chars, int len) const;
|
||
|
#if !defined(Q_NO_USING_KEYWORD)
|
||
|
using TQTextCodec::fromUnicode;
|
||
|
#endif
|
||
|
TQCString fromUnicode(const TQString& uc, int& lenInOut ) const;
|
||
|
unsigned short characterFromUnicode(const TQString &str, int pos) const;
|
||
|
|
||
|
const char* name() const;
|
||
|
const char* mimeName() const;
|
||
|
int mibEnum() const;
|
||
|
|
||
|
int heuristicContentMatch(const char* chars, int len) const;
|
||
|
|
||
|
int heuristicNameMatch(const char* hint) const;
|
||
|
#if !defined(Q_NO_USING_KEYWORD)
|
||
|
using TQTextCodec::canEncode;
|
||
|
#endif
|
||
|
bool canEncode( TQChar ch ) const;
|
||
|
|
||
|
void fromUnicode( const TQChar *in, unsigned short *out, int length ) const;
|
||
|
|
||
|
private:
|
||
|
void buildReverseMap();
|
||
|
|
||
|
int forwardIndex;
|
||
|
#ifndef Q_WS_QWS
|
||
|
TQMemArray<unsigned char> *reverseMap;
|
||
|
#endif
|
||
|
};
|
||
|
|
||
|
#ifdef Q_WS_QWS
|
||
|
static const TQSimpleTextCodec * reverseOwner = 0;
|
||
|
static TQMemArray<unsigned char> * reverseMap = 0;
|
||
|
#endif
|
||
|
|
||
|
#define LAST_MIB 2004
|
||
|
|
||
|
static const struct {
|
||
|
const char *mime;
|
||
|
const char * cs;
|
||
|
int mib;
|
||
|
Q_UINT16 values[128];
|
||
|
} unicodevalues[] = {
|
||
|
// from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt
|
||
|
{ "KOI8-R", "KOI8-R", 2084,
|
||
|
{ 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
|
||
|
0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
|
||
|
0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248,
|
||
|
0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
|
||
|
0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
|
||
|
0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,
|
||
|
0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
|
||
|
0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,
|
||
|
0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
|
||
|
0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
|
||
|
0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
|
||
|
0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
|
||
|
0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
|
||
|
0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
|
||
|
0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
|
||
|
0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
|
||
|
// /**/ - The BULLET OPERATOR is confused. Some people think
|
||
|
// it should be 0x2022 (BULLET).
|
||
|
|
||
|
// from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt
|
||
|
{ "KOI8-U", "KOI8-U", 2088,
|
||
|
{ 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
|
||
|
0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
|
||
|
0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
|
||
|
0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
|
||
|
0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
|
||
|
0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
|
||
|
0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
|
||
|
0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
|
||
|
0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
|
||
|
0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
|
||
|
0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
|
||
|
0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
|
||
|
0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
|
||
|
0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
|
||
|
0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
|
||
|
0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
|
||
|
|
||
|
// next bits generated from tables on the Unicode 2.0 CD. we can
|
||
|
// use these tables since this is part of the transition to using
|
||
|
// unicode everywhere in qt.
|
||
|
|
||
|
// $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
|
||
|
|
||
|
// then I inserted the files manually.
|
||
|
{ "ISO-8859-2", "ISO 8859-2", 5,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
|
||
|
0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
|
||
|
0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
|
||
|
0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
|
||
|
0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
|
||
|
0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
|
||
|
0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
|
||
|
0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
|
||
|
0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
|
||
|
0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
|
||
|
0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
|
||
|
0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
|
||
|
{ "ISO-8859-3", "ISO 8859-3", 6,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7,
|
||
|
0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B,
|
||
|
0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
|
||
|
0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C,
|
||
|
0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7,
|
||
|
0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
||
|
0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
|
||
|
0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
|
||
|
0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7,
|
||
|
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
||
|
0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
|
||
|
0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} },
|
||
|
{ "ISO-8859-4", "ISO 8859-4", 7,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
|
||
|
0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
|
||
|
0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
|
||
|
0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
|
||
|
0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
|
||
|
0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
|
||
|
0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
|
||
|
0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
|
||
|
0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
|
||
|
0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
|
||
|
0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
|
||
|
0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} },
|
||
|
{ "ISO-8859-5", "ISO 8859-5", 8,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
|
||
|
0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
|
||
|
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
|
||
|
0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
|
||
|
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
|
||
|
0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
|
||
|
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
|
||
|
0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
|
||
|
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
|
||
|
0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
|
||
|
0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
|
||
|
0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} },
|
||
|
{ "ISO-8859-6", "ISO 8859-6", 82,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,
|
||
|
0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
|
||
|
0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
|
||
|
0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
|
||
|
0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
|
||
|
0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
|
||
|
0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
|
||
|
{ "ISO-8859-7", "ISO 8859-7", 10,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7,
|
||
|
0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015,
|
||
|
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
|
||
|
0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
|
||
|
0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
|
||
|
0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
|
||
|
0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
|
||
|
0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
|
||
|
0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
|
||
|
0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
|
||
|
0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
|
||
|
0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
|
||
|
{ "ISO-8859-8-I", "ISO 8859-8-I", 85,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
||
|
0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E,
|
||
|
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,
|
||
|
0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
|
||
|
0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
|
||
|
0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
|
||
|
0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
|
||
|
{ "ISO-8859-9", "ISO 8859-9", 12,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
||
|
0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
||
|
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
|
||
|
0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
||
|
0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
||
|
0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
|
||
|
0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
|
||
|
0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
||
|
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
||
|
0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
|
||
|
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
|
||
|
{ "ISO-8859-10", "ISO 8859-10", 13,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
|
||
|
0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
|
||
|
0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
|
||
|
0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
|
||
|
0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
|
||
|
0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
|
||
|
0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
|
||
|
0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
|
||
|
0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
|
||
|
0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
|
||
|
0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
|
||
|
0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} },
|
||
|
{ "ISO-8859-13", "ISO 8859-13", 109,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
|
||
|
0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
|
||
|
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
|
||
|
0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
|
||
|
0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
|
||
|
0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
|
||
|
0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
|
||
|
0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
|
||
|
0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
|
||
|
0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
|
||
|
0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} },
|
||
|
{ "ISO-8859-14", "ISO 8859-14", 110,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
|
||
|
0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
|
||
|
0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
|
||
|
0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
|
||
|
0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
||
|
0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
||
|
0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
|
||
|
0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
|
||
|
0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
||
|
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
||
|
0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
|
||
|
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} },
|
||
|
{ "ISO-8859-16", "ISO 8859-16", 112,
|
||
|
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||
|
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||
|
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||
|
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||
|
0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7,
|
||
|
0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B,
|
||
|
0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7,
|
||
|
0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C,
|
||
|
0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7,
|
||
|
0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
||
|
0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A,
|
||
|
0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF,
|
||
|
0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7,
|
||
|
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
||
|
0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B,
|
||
|
0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF} },
|
||
|
|
||
|
// next bits generated again from tables on the Unicode 3.0 CD.
|
||
|
|
||
|
// $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
|
||
|
|
||
|
{ "CP 850", "IBM 850", 2009,
|
||
|
{ 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
|
||
|
0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
|
||
|
0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
|
||
|
0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
|
||
|
0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
|
||
|
0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
|
||
|
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
|
||
|
0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
|
||
|
0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
|
||
|
0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
|
||
|
0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
|
||
|
0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
|
||
|
0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
|
||
|
0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
|
||
|
0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
|
||
|
0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0} },
|
||
|
{ "CP 874", "CP 874", 0, //### what is the mib?
|
||
|
{ 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
|
||
|
0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
|
||
|
0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
|
||
|
0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
|
||
|
0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
|
||
|
0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
|
||
|
0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
|
||
|
0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
|
||
|
0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
|
||
|
0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
|
||
|
0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
|
||
|
0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
|
||
|
{ "IBM 866", "IBM 866", 2086,
|
||
|
{ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
|
||
|
0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
|
||
|
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
|
||
|
0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
|
||
|
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
|
||
|
0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
|
||
|
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
|
||
|
0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
|
||
|
0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
|
||
|
0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
|
||
|
0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
|
||
|
0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
|
||
|
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
|
||
|
0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
|
||
|
0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E,
|
||
|
0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0} },
|
||
|
|
||
|
{ "windows-1250", "CP 1250", 2250,
|
||
|
{ 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
|
||
|
0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,
|
||
|
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A,
|
||
|
0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7,
|
||
|
0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B,
|
||
|
0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C,
|
||
|
0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
|
||
|
0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
|
||
|
0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
|
||
|
0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
|
||
|
0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
|
||
|
0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
|
||
|
0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
|
||
|
0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
|
||
|
{ "windows-1251", "CP 1251", 2251,
|
||
|
{ 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
|
||
|
0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
|
||
|
0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
|
||
|
0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
|
||
|
0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
|
||
|
0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
|
||
|
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
|
||
|
0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
|
||
|
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
|
||
|
0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
|
||
|
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
|
||
|
0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
|
||
|
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
|
||
|
0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} },
|
||
|
{ "windows-1252", "CP 1252", 2252,
|
||
|
{ 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
||
|
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
|
||
|
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178,
|
||
|
0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
||
|
0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
||
|
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
|
||
|
0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
||
|
0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
||
|
0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
|
||
|
0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
|
||
|
0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
||
|
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
||
|
0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
|
||
|
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
|
||
|
{ "windows-1253", "CP 1253", 2253,
|
||
|
{ 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
||
|
0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
||
|
0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015,
|
||
|
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
|
||
|
0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
|
||
|
0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
|
||
|
0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
|
||
|
0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
|
||
|
0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
|
||
|
0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
|
||
|
0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
|
||
|
0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
|
||
|
{ "windows-1254", "CP 1254", 2254,
|
||
|
{ 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
||
|
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
|
||
|
0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
||
|
0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
||
|
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
|
||
|
0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
||
|
0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
||
|
0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
|
||
|
0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
|
||
|
0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
||
|
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
||
|
0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
|
||
|
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
|
||
|
{ "windows-1255", "CP 1255", 2255,
|
||
|
{ 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
||
|
0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7,
|
||
|
0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
||
|
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
|
||
|
0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
|
||
|
0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
|
||
|
0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3,
|
||
|
0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
|
||
|
0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
|
||
|
0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
|
||
|
0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} },
|
||
|
{ "windows-1256", "CP 1256", 2256,
|
||
|
{ 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
||
|
0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688,
|
||
|
0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA,
|
||
|
0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
||
|
0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
||
|
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F,
|
||
|
0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
|
||
|
0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
|
||
|
0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7,
|
||
|
0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643,
|
||
|
0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7,
|
||
|
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF,
|
||
|
0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7,
|
||
|
0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} },
|
||
|
{ "windows-1257", "CP 1257", 2257,
|
||
|
{ 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
|
||
|
0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8,
|
||
|
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD,
|
||
|
0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7,
|
||
|
0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
|
||
|
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
|
||
|
0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
|
||
|
0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
|
||
|
0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
|
||
|
0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
|
||
|
0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
|
||
|
0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
|
||
|
0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
|
||
|
0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} },
|
||
|
{ "windows-1258", "CP 1258", 2258,
|
||
|
{ 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
||
|
0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
|
||
|
0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
||
|
0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
||
|
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
|
||
|
0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
||
|
0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF,
|
||
|
0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7,
|
||
|
0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF,
|
||
|
0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
||
|
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF,
|
||
|
0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
|
||
|
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} },
|
||
|
|
||
|
{ "Apple Roman", "Apple Roman", 0,
|
||
|
{ 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
|
||
|
0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
|
||
|
0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
|
||
|
0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
|
||
|
0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
|
||
|
0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
|
||
|
0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
|
||
|
0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
|
||
|
0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
|
||
|
0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
|
||
|
0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
|
||
|
0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
|
||
|
0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
|
||
|
0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
|
||
|
0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
|
||
|
0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7} },
|
||
|
|
||
|
|
||
|
|
||
|
// This one is based on the charmap file
|
||
|
// /usr/share/i18n/charmaps/SAMI-WS2.gz, which is manually adapted
|
||
|
// to this format by Børre Gaup <boerre@subdimension.com>
|
||
|
{ "WINSAMI2", "WS2", 0,
|
||
|
{ 0x20AC, 0xFFFD, 0x010C, 0x0192, 0x010D, 0x01B7, 0x0292, 0x01EE,
|
||
|
0x01EF, 0x0110, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0x0111, 0x01E6, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
|
||
|
0x00A0, 0x01E7, 0x01E4, 0x00A3, 0x00A4, 0x01E5, 0x00A6, 0x00A7,
|
||
|
0x00A8, 0x00A9, 0x021E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x021F,
|
||
|
0x00B0, 0x00B1, 0x01E8, 0x01E9, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
||
|
0x014A, 0x014B, 0x0166, 0x00BB, 0x0167, 0x00BD, 0x017D, 0x017E,
|
||
|
0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
||
|
0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
||
|
0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
|
||
|
0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
|
||
|
0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
||
|
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
||
|
0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
|
||
|
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
|
||
|
|
||
|
|
||
|
// this one is generated from the charmap file located in /usr/share/i18n/charmaps
|
||
|
// on most Linux distributions. The thai character set tis620 is byte by byte equivalent
|
||
|
// to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too.
|
||
|
|
||
|
// $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620
|
||
|
{ "TIS-620", "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent)
|
||
|
{ 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
|
||
|
0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
|
||
|
0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
|
||
|
0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
|
||
|
0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
|
||
|
0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
|
||
|
0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
|
||
|
0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
|
||
|
0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
|
||
|
0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
|
||
|
0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
|
||
|
0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD } },
|
||
|
|
||
|
/*
|
||
|
Name: hp-roman8 [HP-PCL5,RFC1345,KXS2]
|
||
|
MIBenum: 2004
|
||
|
Source: LaserJet IIP Printer User's Manual,
|
||
|
HP part no 33471-90901, Hewlet-Packard, June 1989.
|
||
|
Alias: roman8
|
||
|
Alias: r8
|
||
|
Alias: csHPRoman8
|
||
|
*/
|
||
|
{ "Roman8", "HP-Roman8", 2004,
|
||
|
{ 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
||
|
0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF,
|
||
|
0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4,
|
||
|
0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1,
|
||
|
0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2,
|
||
|
0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA,
|
||
|
0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC,
|
||
|
0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6,
|
||
|
0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4,
|
||
|
0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3,
|
||
|
0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF,
|
||
|
0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC,
|
||
|
0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, 0xFFFD } }
|
||
|
|
||
|
// if you add more chacater sets at the end, change LAST_MIB above
|
||
|
};
|
||
|
|
||
|
TQSimpleTextCodec::TQSimpleTextCodec( int i )
|
||
|
: TQTextCodec(), forwardIndex( i )
|
||
|
{
|
||
|
#ifndef Q_WS_QWS
|
||
|
reverseMap = 0;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
|
||
|
TQSimpleTextCodec::~TQSimpleTextCodec()
|
||
|
{
|
||
|
#ifndef Q_WS_QWS
|
||
|
delete reverseMap;
|
||
|
#else
|
||
|
if ( reverseOwner == this ) {
|
||
|
delete reverseMap;
|
||
|
reverseMap = 0;
|
||
|
reverseOwner = 0;
|
||
|
}
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
void TQSimpleTextCodec::buildReverseMap()
|
||
|
{
|
||
|
#ifdef Q_WS_QWS
|
||
|
if ( reverseOwner != this ) {
|
||
|
int m = 0;
|
||
|
int i = 0;
|
||
|
while( i < 128 ) {
|
||
|
if ( unicodevalues[forwardIndex].values[i] > m &&
|
||
|
unicodevalues[forwardIndex].values[i] < 0xfffd )
|
||
|
m = unicodevalues[forwardIndex].values[i];
|
||
|
i++;
|
||
|
}
|
||
|
m++;
|
||
|
if ( !reverseMap )
|
||
|
reverseMap = new TQMemArray<unsigned char>( m );
|
||
|
if ( m > (int)(reverseMap->size()) )
|
||
|
reverseMap->resize( m );
|
||
|
for( i = 0; i < 128 && i < m; i++ )
|
||
|
(*reverseMap)[i] = (char)i;
|
||
|
for( ;i < m; i++ )
|
||
|
(*reverseMap)[i] = 0;
|
||
|
for( i=128; i<256; i++ ) {
|
||
|
int u = unicodevalues[forwardIndex].values[i-128];
|
||
|
if ( u < m )
|
||
|
(*reverseMap)[u] = (char)(unsigned char)(i);
|
||
|
}
|
||
|
reverseOwner = this;
|
||
|
}
|
||
|
#else
|
||
|
if ( !reverseMap ) {
|
||
|
TQMemArray<unsigned char> **map = &((TQSimpleTextCodec *)this)->reverseMap;
|
||
|
int m = 0;
|
||
|
int i = 0;
|
||
|
while( i < 128 ) {
|
||
|
if ( unicodevalues[forwardIndex].values[i] > m &&
|
||
|
unicodevalues[forwardIndex].values[i] < 0xfffd )
|
||
|
m = unicodevalues[forwardIndex].values[i];
|
||
|
i++;
|
||
|
}
|
||
|
m++;
|
||
|
*map = new TQMemArray<unsigned char>( m );
|
||
|
for( i = 0; i < 128 && i < m; i++ )
|
||
|
(**map)[i] = (char)i;
|
||
|
for( ;i < m; i++ )
|
||
|
(**map)[i] = 0;
|
||
|
for( i=128; i<256; i++ ) {
|
||
|
int u = unicodevalues[forwardIndex].values[i-128];
|
||
|
if ( u < m )
|
||
|
(**map)[u] = (char)(unsigned char)(i);
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
TQString TQSimpleTextCodec::toUnicode(const char* chars, int len) const
|
||
|
{
|
||
|
if ( len <= 0 || chars == 0 )
|
||
|
return TQString::null;
|
||
|
|
||
|
const unsigned char * c = (const unsigned char *)chars;
|
||
|
int i;
|
||
|
|
||
|
for ( i = 0; i < len; i++ )
|
||
|
if ( c[i] == '\0' ) {
|
||
|
len = i;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
TQString r;
|
||
|
r.setUnicode(0, len);
|
||
|
TQChar* uc = (TQChar*)r.unicode(); // const_cast
|
||
|
|
||
|
for ( i = 0; i < len; i++ ) {
|
||
|
if ( c[i] > 127 )
|
||
|
uc[i] = unicodevalues[forwardIndex].values[c[i]-128];
|
||
|
else
|
||
|
uc[i] = c[i];
|
||
|
}
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
|
||
|
TQCString TQSimpleTextCodec::fromUnicode(const TQString& uc, int& len ) const
|
||
|
{
|
||
|
#ifdef Q_WS_QWS
|
||
|
if ( this != reverseOwner )
|
||
|
#else
|
||
|
if ( !reverseMap )
|
||
|
#endif
|
||
|
((TQSimpleTextCodec *)this)->buildReverseMap();
|
||
|
|
||
|
if ( len <0 || len > (int)uc.length() )
|
||
|
len = uc.length();
|
||
|
TQCString r( len+1 );
|
||
|
int i = len;
|
||
|
int u;
|
||
|
const TQChar* ucp = uc.unicode();
|
||
|
unsigned char* rp = (unsigned char *)r.data();
|
||
|
unsigned char* rmp = reverseMap->data();
|
||
|
int rmsize = (int) reverseMap->size();
|
||
|
while( i-- )
|
||
|
{
|
||
|
u = ucp->unicode();
|
||
|
*rp = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' );
|
||
|
if ( *rp == 0 ) *rp = '?';
|
||
|
rp++;
|
||
|
ucp++;
|
||
|
}
|
||
|
r[len] = 0;
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
void TQSimpleTextCodec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const
|
||
|
{
|
||
|
#ifdef Q_WS_QWS
|
||
|
if ( this != reverseOwner )
|
||
|
#else
|
||
|
if ( !reverseMap )
|
||
|
#endif
|
||
|
((TQSimpleTextCodec *)this)->buildReverseMap();
|
||
|
|
||
|
unsigned char* rmp = reverseMap->data();
|
||
|
int rmsize = (int) reverseMap->size();
|
||
|
while ( length-- ) {
|
||
|
unsigned short u = in->unicode();
|
||
|
*out = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 );
|
||
|
++in;
|
||
|
++out;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
unsigned short TQSimpleTextCodec::characterFromUnicode(const TQString &str, int pos) const
|
||
|
{
|
||
|
#ifdef Q_WS_QWS
|
||
|
if ( this != reverseOwner )
|
||
|
#else
|
||
|
if ( !reverseMap )
|
||
|
#endif
|
||
|
((TQSimpleTextCodec *)this)->buildReverseMap();
|
||
|
|
||
|
unsigned short u = str[pos].unicode();
|
||
|
unsigned char* rmp = reverseMap->data();
|
||
|
int rmsize = (int) reverseMap->size();
|
||
|
return u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 );
|
||
|
}
|
||
|
|
||
|
bool TQSimpleTextCodec::canEncode( TQChar ch ) const
|
||
|
{
|
||
|
#ifdef Q_WS_QWS
|
||
|
if ( this != reverseOwner )
|
||
|
#else
|
||
|
if ( !reverseMap )
|
||
|
#endif
|
||
|
((TQSimpleTextCodec *)this)->buildReverseMap();
|
||
|
|
||
|
unsigned short u = ch.unicode();
|
||
|
unsigned char* rmp = reverseMap->data();
|
||
|
int rmsize = (int) reverseMap->size();
|
||
|
return u < 128 ? TRUE : (( u < rmsize ) ? (*(rmp+u) != 0) : FALSE );
|
||
|
}
|
||
|
|
||
|
const char* TQSimpleTextCodec::name() const
|
||
|
{
|
||
|
return unicodevalues[forwardIndex].cs;
|
||
|
}
|
||
|
|
||
|
const char* TQSimpleTextCodec::mimeName() const
|
||
|
{
|
||
|
return unicodevalues[forwardIndex].mime;
|
||
|
}
|
||
|
|
||
|
|
||
|
int TQSimpleTextCodec::mibEnum() const
|
||
|
{
|
||
|
return unicodevalues[forwardIndex].mib;
|
||
|
}
|
||
|
|
||
|
int TQSimpleTextCodec::heuristicNameMatch(const char* hint) const
|
||
|
{
|
||
|
if ( qstricmp( hint, mimeName() ) == 0 )
|
||
|
return 10000; // return a large value
|
||
|
if ( hint[0]=='k' ) {
|
||
|
TQCString lhint = TQCString(hint).lower();
|
||
|
// Help people with messy fonts
|
||
|
if ( lhint == "koi8-1" )
|
||
|
return TQTextCodec::heuristicNameMatch("koi8-r")-1;
|
||
|
if ( lhint == "koi8-ru" )
|
||
|
return TQTextCodec::heuristicNameMatch("koi8-r")-1;
|
||
|
} else if ( hint[0] == 't' && mibEnum() == 2259 /* iso8859-11 */ ) {
|
||
|
// 8859-11 and tis620 are byte by byte equivalent
|
||
|
int i = simpleHeuristicNameMatch("tis620-0", hint);
|
||
|
if( !i )
|
||
|
i = simpleHeuristicNameMatch("tis-620", hint);
|
||
|
if( i ) return i;
|
||
|
} else if ( mibEnum() == 82 /* ISO 8859-6 */ ) {
|
||
|
int i = simpleHeuristicNameMatch("ISO 8859-6-I", hint);
|
||
|
if ( i )
|
||
|
return i;
|
||
|
}
|
||
|
return TQTextCodec::heuristicNameMatch(hint);
|
||
|
}
|
||
|
|
||
|
int TQSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const
|
||
|
{
|
||
|
if ( len<1 || !chars )
|
||
|
return -1;
|
||
|
int i = 0;
|
||
|
const uchar * c = (const unsigned char *)chars;
|
||
|
int r = 0;
|
||
|
while( i<len && c && *c ) {
|
||
|
if ( *c >= 128 ) {
|
||
|
if ( unicodevalues[forwardIndex].values[(*c)-128] == 0xfffd )
|
||
|
return -1;
|
||
|
}
|
||
|
if ( (*c >= ' ' && *c < 127) ||
|
||
|
*c == '\n' || *c == '\t' || *c == '\r' )
|
||
|
r++;
|
||
|
i++;
|
||
|
c++;
|
||
|
}
|
||
|
if ( mibEnum()==4 )
|
||
|
r+=1;
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
|
||
|
class TQLatin1Codec : public TQTextCodec
|
||
|
{
|
||
|
public:
|
||
|
#if !defined(Q_NO_USING_KEYWORD)
|
||
|
using TQTextCodec::fromUnicode;
|
||
|
using TQTextCodec::toUnicode;
|
||
|
#endif
|
||
|
TQString toUnicode(const char* chars, int len) const;
|
||
|
TQCString fromUnicode(const TQString& uc, int& lenInOut ) const;
|
||
|
void fromUnicode( const TQChar *in, unsigned short *out, int length ) const;
|
||
|
unsigned short characterFromUnicode(const TQString &str, int pos) const;
|
||
|
|
||
|
const char* name() const;
|
||
|
const char* mimeName() const;
|
||
|
int mibEnum() const;
|
||
|
|
||
|
int heuristicContentMatch(const char* chars, int len) const;
|
||
|
|
||
|
private:
|
||
|
int forwardIndex;
|
||
|
};
|
||
|
|
||
|
|
||
|
TQString TQLatin1Codec::toUnicode(const char* chars, int len) const
|
||
|
{
|
||
|
if ( chars == 0 )
|
||
|
return TQString::null;
|
||
|
|
||
|
return TQString::fromLatin1(chars, len);
|
||
|
}
|
||
|
|
||
|
|
||
|
TQCString TQLatin1Codec::fromUnicode(const TQString& uc, int& len ) const
|
||
|
{
|
||
|
if ( len <0 || len > (int)uc.length() )
|
||
|
len = uc.length();
|
||
|
TQCString r( len+1 );
|
||
|
char *d = r.data();
|
||
|
int i = 0;
|
||
|
const TQChar *ch = uc.unicode();
|
||
|
while ( i < len ) {
|
||
|
d[i] = ch->row() ? '?' : ch->cell();
|
||
|
i++;
|
||
|
ch++;
|
||
|
}
|
||
|
r[len] = 0;
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
void TQLatin1Codec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const
|
||
|
{
|
||
|
while ( length-- ) {
|
||
|
*out = in->row() ? 0 : in->cell();
|
||
|
++in;
|
||
|
++out;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
unsigned short TQLatin1Codec::characterFromUnicode(const TQString &str, int pos) const
|
||
|
{
|
||
|
const TQChar *ch = str.unicode() + pos;
|
||
|
if (ch->row())
|
||
|
return 0;
|
||
|
return (unsigned short) ch->cell();
|
||
|
}
|
||
|
|
||
|
|
||
|
const char* TQLatin1Codec::name() const
|
||
|
{
|
||
|
return "ISO 8859-1";
|
||
|
}
|
||
|
|
||
|
const char* TQLatin1Codec::mimeName() const
|
||
|
{
|
||
|
return "ISO-8859-1";
|
||
|
}
|
||
|
|
||
|
|
||
|
int TQLatin1Codec::mibEnum() const
|
||
|
{
|
||
|
return 4;
|
||
|
}
|
||
|
|
||
|
int TQLatin1Codec::heuristicContentMatch(const char* chars, int len) const
|
||
|
{
|
||
|
if ( len<1 || !chars )
|
||
|
return -1;
|
||
|
int i = 0;
|
||
|
const uchar * c = (const unsigned char *)chars;
|
||
|
int r = 0;
|
||
|
while( i<len && c && *c ) {
|
||
|
if ( *c >= 0x80 && *c < 0xa0 )
|
||
|
return -1;
|
||
|
if ( (*c >= ' ' && *c < 127) ||
|
||
|
*c == '\n' || *c == '\t' || *c == '\r' )
|
||
|
r++;
|
||
|
i++;
|
||
|
c++;
|
||
|
}
|
||
|
if ( this == (const TQTextCodec *)codecForLocale() )
|
||
|
r += 5;
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
class TQLatin15Codec: public TQLatin1Codec
|
||
|
{
|
||
|
public:
|
||
|
TQString toUnicode(const char* chars, int len) const;
|
||
|
#if !defined(Q_NO_USING_KEYWORD)
|
||
|
using TQLatin1Codec::fromUnicode;
|
||
|
#endif
|
||
|
TQCString fromUnicode(const TQString& uc, int& lenInOut ) const;
|
||
|
void fromUnicode( const TQChar *in, unsigned short *out, int length ) const;
|
||
|
unsigned short characterFromUnicode(const TQString &str, int pos) const;
|
||
|
|
||
|
const char* name() const;
|
||
|
const char* mimeName() const;
|
||
|
int mibEnum() const;
|
||
|
|
||
|
private:
|
||
|
int forwardIndex;
|
||
|
};
|
||
|
|
||
|
|
||
|
TQString TQLatin15Codec::toUnicode(const char* chars, int len) const
|
||
|
{
|
||
|
if ( chars == 0 )
|
||
|
return TQString::null;
|
||
|
|
||
|
TQString str = TQString::fromLatin1(chars, len);
|
||
|
TQChar *uc = (TQChar *)str.unicode();
|
||
|
while( len-- ) {
|
||
|
switch( uc->unicode() ) {
|
||
|
case 0xa4:
|
||
|
*uc = 0x20ac;
|
||
|
break;
|
||
|
case 0xa6:
|
||
|
*uc = 0x0160;
|
||
|
break;
|
||
|
case 0xa8:
|
||
|
*uc = 0x0161;
|
||
|
break;
|
||
|
case 0xb4:
|
||
|
*uc = 0x017d;
|
||
|
break;
|
||
|
case 0xb8:
|
||
|
*uc = 0x017e;
|
||
|
break;
|
||
|
case 0xbc:
|
||
|
*uc = 0x0152;
|
||
|
break;
|
||
|
case 0xbd:
|
||
|
*uc = 0x0153;
|
||
|
break;
|
||
|
case 0xbe:
|
||
|
*uc = 0x0178;
|
||
|
break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
uc++;
|
||
|
}
|
||
|
return str;
|
||
|
}
|
||
|
|
||
|
static inline unsigned char
|
||
|
latin15CharFromUnicode( unsigned short uc, bool replacement = TRUE )
|
||
|
{
|
||
|
uchar c;
|
||
|
if ( uc < 0x0100 ) {
|
||
|
if ( uc > 0xa3 && uc < 0xbf ) {
|
||
|
switch( uc ) {
|
||
|
case 0xa4:
|
||
|
case 0xa6:
|
||
|
case 0xa8:
|
||
|
case 0xb4:
|
||
|
case 0xb8:
|
||
|
case 0xbc:
|
||
|
case 0xbd:
|
||
|
case 0xbe:
|
||
|
c = replacement ? '?' : 0;
|
||
|
break;
|
||
|
default:
|
||
|
c = (unsigned char) uc;
|
||
|
break;
|
||
|
}
|
||
|
} else {
|
||
|
c = (unsigned char) uc;
|
||
|
}
|
||
|
} else {
|
||
|
if ( uc == 0x20ac )
|
||
|
c = 0xa4;
|
||
|
else if ( (uc & 0xff00) == 0x0100 ) {
|
||
|
switch( uc ) {
|
||
|
case 0x0160:
|
||
|
c = 0xa6;
|
||
|
break;
|
||
|
case 0x0161:
|
||
|
c = 0xa8;
|
||
|
break;
|
||
|
case 0x017d:
|
||
|
c = 0xb4;
|
||
|
break;
|
||
|
case 0x017e:
|
||
|
c = 0xb8;
|
||
|
break;
|
||
|
case 0x0152:
|
||
|
c = 0xbc;
|
||
|
break;
|
||
|
case 0x0153:
|
||
|
c = 0xbd;
|
||
|
break;
|
||
|
case 0x0178:
|
||
|
c = 0xbe;
|
||
|
break;
|
||
|
default:
|
||
|
c = replacement ? '?' : 0;
|
||
|
}
|
||
|
} else {
|
||
|
c = replacement ? '?' : 0;
|
||
|
}
|
||
|
}
|
||
|
return c;
|
||
|
}
|
||
|
|
||
|
|
||
|
void TQLatin15Codec::fromUnicode( const TQChar *in, unsigned short *out, int length ) const
|
||
|
{
|
||
|
while ( length-- ) {
|
||
|
*out = latin15CharFromUnicode( in->unicode(), FALSE );
|
||
|
++in;
|
||
|
++out;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
TQCString TQLatin15Codec::fromUnicode(const TQString& uc, int& len ) const
|
||
|
{
|
||
|
if ( len <0 || len > (int)uc.length() )
|
||
|
len = uc.length();
|
||
|
TQCString r( len+1 );
|
||
|
char *d = r.data();
|
||
|
int i = 0;
|
||
|
const TQChar *ch = uc.unicode();
|
||
|
while ( i < len ) {
|
||
|
d[i] = latin15CharFromUnicode( ch->unicode() );
|
||
|
i++;
|
||
|
ch++;
|
||
|
}
|
||
|
r[len] = 0;
|
||
|
return r;
|
||
|
}
|
||
|
|
||
|
unsigned short TQLatin15Codec::characterFromUnicode(const TQString &str, int pos) const
|
||
|
{
|
||
|
return latin15CharFromUnicode( str.unicode()[pos].unicode(), FALSE );
|
||
|
}
|
||
|
|
||
|
|
||
|
const char* TQLatin15Codec::name() const
|
||
|
{
|
||
|
return "ISO 8859-15";
|
||
|
}
|
||
|
|
||
|
const char* TQLatin15Codec::mimeName() const
|
||
|
{
|
||
|
return "ISO-8859-15";
|
||
|
}
|
||
|
|
||
|
|
||
|
int TQLatin15Codec::mibEnum() const
|
||
|
{
|
||
|
return 111;
|
||
|
}
|
||
|
|
||
|
static TQTextCodec *checkForCodec(const char *name) {
|
||
|
TQTextCodec *c = TQTextCodec::codecForName(name);
|
||
|
if (!c) {
|
||
|
const char *at = strchr(name, '@');
|
||
|
if (at) {
|
||
|
TQCString n(name, at - name + 1);
|
||
|
c = TQTextCodec::codecForName(n.data());
|
||
|
}
|
||
|
}
|
||
|
return c;
|
||
|
}
|
||
|
|
||
|
/* the next two functions are implicitely thread safe,
|
||
|
as they are only called by setup() which uses a mutex.
|
||
|
*/
|
||
|
static void setupLocaleMapper()
|
||
|
{
|
||
|
#ifdef Q_OS_WIN32
|
||
|
localeMapper = TQTextCodec::codecForName( "System" );
|
||
|
#else
|
||
|
|
||
|
#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF) && !defined(Q_OS_MAC)
|
||
|
char *charset = nl_langinfo (CODESET);
|
||
|
if ( charset )
|
||
|
localeMapper = TQTextCodec::codecForName( charset );
|
||
|
#endif
|
||
|
|
||
|
if ( !localeMapper ) {
|
||
|
// Very poorly defined and followed standards causes lots of code
|
||
|
// to try to get all the cases...
|
||
|
|
||
|
// Try to determine locale codeset from locale name assigned to
|
||
|
// LC_CTYPE category.
|
||
|
|
||
|
// First part is getting that locale name. First try setlocale() which
|
||
|
// definitely knows it, but since we cannot fully trust it, get ready
|
||
|
// to fall back to environment variables.
|
||
|
char * ctype = qstrdup( setlocale( LC_CTYPE, 0 ) );
|
||
|
|
||
|
// Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
|
||
|
// environment variables.
|
||
|
char * lang = qstrdup( getenv("LC_ALL") );
|
||
|
if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) {
|
||
|
if ( lang ) delete [] lang;
|
||
|
lang = qstrdup( getenv("LC_CTYPE") );
|
||
|
}
|
||
|
if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) {
|
||
|
if ( lang ) delete [] lang;
|
||
|
lang = qstrdup( getenv("LANG") );
|
||
|
}
|
||
|
|
||
|
// Now try these in order:
|
||
|
// 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
|
||
|
// 2. CODESET from lang if it contains a .CODESET part
|
||
|
// 3. ctype (maybe the locale is named "ISO-8859-1" or something)
|
||
|
// 4. locale (ditto)
|
||
|
// 5. check for "@euro"
|
||
|
// 6. guess locale from ctype unless ctype is "C"
|
||
|
// 7. guess locale from lang
|
||
|
|
||
|
// 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
|
||
|
char * codeset = ctype ? strchr( ctype, '.' ) : 0;
|
||
|
if ( codeset && *codeset == '.' )
|
||
|
localeMapper = checkForCodec( codeset + 1 );
|
||
|
|
||
|
// 2. CODESET from lang if it contains a .CODESET part
|
||
|
codeset = lang ? strchr( lang, '.' ) : 0;
|
||
|
if ( !localeMapper && codeset && *codeset == '.' )
|
||
|
localeMapper = checkForCodec( codeset + 1 );
|
||
|
|
||
|
// 3. ctype (maybe the locale is named "ISO-8859-1" or something)
|
||
|
if ( !localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0 )
|
||
|
localeMapper = checkForCodec( ctype );
|
||
|
|
||
|
// 4. locale (ditto)
|
||
|
if ( !localeMapper && lang && *lang != 0 )
|
||
|
localeMapper = checkForCodec( lang );
|
||
|
|
||
|
// 5. "@euro"
|
||
|
if ( !localeMapper && ctype && strstr( ctype, "@euro" ) || lang && strstr( lang, "@euro" ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-15" );
|
||
|
|
||
|
// 6. guess locale from ctype unless ctype is "C"
|
||
|
// 7. guess locale from lang
|
||
|
char * try_by_name = ctype;
|
||
|
if ( ctype && *ctype != 0 && strcmp (ctype, "C") != 0 )
|
||
|
try_by_name = lang;
|
||
|
|
||
|
// Now do the guessing.
|
||
|
if ( lang && *lang && !localeMapper && try_by_name && *try_by_name ) {
|
||
|
if ( try_locale_list( iso8859_15locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-15" );
|
||
|
else if ( try_locale_list( iso8859_2locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-2" );
|
||
|
else if ( try_locale_list( iso8859_3locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-3" );
|
||
|
else if ( try_locale_list( iso8859_4locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-4" );
|
||
|
else if ( try_locale_list( iso8859_5locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-5" );
|
||
|
else if ( try_locale_list( iso8859_6locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-6" );
|
||
|
else if ( try_locale_list( iso8859_7locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-7" );
|
||
|
else if ( try_locale_list( iso8859_8locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-8-I" );
|
||
|
else if ( try_locale_list( iso8859_9locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-9" );
|
||
|
else if ( try_locale_list( iso8859_13locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-13" );
|
||
|
else if ( try_locale_list( tis_620locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-11" );
|
||
|
else if ( try_locale_list( koi8_ulocales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "KOI8-U" );
|
||
|
else if ( try_locale_list( cp_1251locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "CP 1251" );
|
||
|
else if ( try_locale_list( pt_154locales, lang ) )
|
||
|
localeMapper = TQTextCodec::codecForName( "PT 154" );
|
||
|
else if ( try_locale_list( probably_koi8_rlocales, lang ) )
|
||
|
localeMapper = ru_RU_hack( lang );
|
||
|
}
|
||
|
|
||
|
delete [] ctype;
|
||
|
delete [] lang;
|
||
|
}
|
||
|
if ( localeMapper && localeMapper->mibEnum() == 11 )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-8-I" );
|
||
|
|
||
|
// If everything failed, we default to 8859-1
|
||
|
// We could perhaps default to 8859-15.
|
||
|
if ( !localeMapper )
|
||
|
localeMapper = TQTextCodec::codecForName( "ISO 8859-1" );
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
|
||
|
static void realSetup()
|
||
|
{
|
||
|
#if defined(QT_CHECK_STATE)
|
||
|
if ( destroying_is_ok )
|
||
|
qWarning( "TQTextCodec: creating new codec during codec cleanup!" );
|
||
|
#endif
|
||
|
all = new TQValueList<TQTextCodec*>;
|
||
|
|
||
|
(void)new TQLatin1Codec;
|
||
|
(void)new TQLatin15Codec;
|
||
|
(void)new TQUtf8Codec;
|
||
|
(void)new TQUtf16Codec;
|
||
|
|
||
|
#ifndef QT_NO_CODECS
|
||
|
int i = 0;
|
||
|
do {
|
||
|
(void)new TQSimpleTextCodec( i );
|
||
|
} while( unicodevalues[i++].mib != LAST_MIB );
|
||
|
|
||
|
(void)new TQTsciiCodec;
|
||
|
|
||
|
for (i = 0; i < 9; ++i) {
|
||
|
(void)new TQIsciiCodec(i);
|
||
|
}
|
||
|
#endif // QT_NO_CODECS
|
||
|
#ifndef QT_NO_CODEC_HEBREW
|
||
|
(void)new TQHebrewCodec;
|
||
|
#endif
|
||
|
#ifndef QT_NO_BIG_CODECS
|
||
|
(void)new TQBig5Codec;
|
||
|
(void)new TQBig5hkscsCodec;
|
||
|
(void)new TQEucJpCodec;
|
||
|
(void)new TQEucKrCodec;
|
||
|
(void)new TQGb2312Codec;
|
||
|
(void)new TQGbkCodec;
|
||
|
(void)new TQGb18030Codec;
|
||
|
(void)new TQJisCodec;
|
||
|
(void)new TQSjisCodec;
|
||
|
#endif // QT_NO_BIG_CODECS
|
||
|
|
||
|
#ifdef Q_OS_WIN32
|
||
|
(void) new TQWindowsLocalCodec;
|
||
|
#endif // Q_OS_WIN32
|
||
|
|
||
|
if ( !localeMapper )
|
||
|
setupLocaleMapper();
|
||
|
}
|
||
|
|
||
|
void TQTextCodec::fromUnicodeInternal( const TQChar *in, unsigned short *out, int length )
|
||
|
{
|
||
|
switch( mibEnum() ) {
|
||
|
#ifndef QT_NO_CODECS
|
||
|
case 2084:
|
||
|
case 2088:
|
||
|
case 5:
|
||
|
case 6:
|
||
|
case 7:
|
||
|
case 8:
|
||
|
case 82:
|
||
|
case 10:
|
||
|
case 85:
|
||
|
case 12:
|
||
|
case 13:
|
||
|
case 109:
|
||
|
case 110:
|
||
|
case 2004:
|
||
|
case 2009:
|
||
|
case 2086:
|
||
|
case 2250:
|
||
|
case 2251:
|
||
|
case 2252:
|
||
|
case 2253:
|
||
|
case 2254:
|
||
|
case 2255:
|
||
|
case 2256:
|
||
|
case 2257:
|
||
|
case 2258:
|
||
|
case 2259:
|
||
|
((TQSimpleTextCodec *)this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
|
||
|
#if !defined(QT_NO_BIG_CODECS) && defined(Q_WS_X11)
|
||
|
// the TQFont*Codecs are only used on X11
|
||
|
|
||
|
case 15:
|
||
|
((TQFontJis0201Codec *) this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
|
||
|
case 63:
|
||
|
((TQFontJis0208Codec *) this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
|
||
|
case 36:
|
||
|
((TQFontKsc5601Codec *) this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
|
||
|
case 57:
|
||
|
((TQFontGb2312Codec *) this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
|
||
|
case -113:
|
||
|
((TQFontGbkCodec *) this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
|
||
|
case -114:
|
||
|
((TQFontGb18030_0Codec *) this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
|
||
|
case -2026:
|
||
|
((TQFontBig5Codec *) this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
|
||
|
case -2101:
|
||
|
((TQFontBig5hkscsCodec *) this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
|
||
|
case -4242:
|
||
|
((TQFontLaoCodec *) this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
#endif
|
||
|
#endif // QT_NO_CODECS
|
||
|
|
||
|
case 4:
|
||
|
((TQLatin1Codec *) this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
|
||
|
case 111:
|
||
|
((TQLatin15Codec *) this)->fromUnicode( in, out, length );
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
{
|
||
|
TQConstString string( in, length );
|
||
|
TQString str = string.string();
|
||
|
for ( int i = 0; i < length; i++ )
|
||
|
out[i] = characterFromUnicode( str, i );
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/*!
|
||
|
\fn TQTextCodec* TQTextCodec::codecForTr()
|
||
|
|
||
|
Returns the codec used by TQObject::tr() on its argument. If this
|
||
|
function returns 0 (the default), tr() assumes Latin-1.
|
||
|
|
||
|
\sa setCodecForTr()
|
||
|
*/
|
||
|
|
||
|
/*!
|
||
|
\fn void TQTextCodec::setCodecForTr(TQTextCodec *c)
|
||
|
\nonreentrant
|
||
|
|
||
|
Sets the codec used by TQObject::tr() on its argument to \a c. If
|
||
|
\a c is 0 (the default), tr() assumes Latin-1.
|
||
|
|
||
|
If the literal quoted text in the program is not in the Latin-1
|
||
|
encoding, this function can be used to set the appropriate
|
||
|
encoding. For example, software developed by Korean programmers
|
||
|
might use eucKR for all the text in the program, in which case the
|
||
|
main() function might look like this:
|
||
|
|
||
|
\code
|
||
|
int main(int argc, char** argv)
|
||
|
{
|
||
|
TQApplication app(argc, argv);
|
||
|
... install any additional codecs ...
|
||
|
TQTextCodec::setCodecForTr( TQTextCodec::codecForName("eucKR") );
|
||
|
...
|
||
|
}
|
||
|
\endcode
|
||
|
|
||
|
Note that this is not the way to select the encoding that the \e
|
||
|
user has chosen. For example, to convert an application containing
|
||
|
literal English strings to Korean, all that is needed is for the
|
||
|
English strings to be passed through tr() and for translation
|
||
|
files to be loaded. For details of internationalization, see the
|
||
|
\link i18n.html TQt internationalization documentation\endlink.
|
||
|
|
||
|
\sa codecForTr(), setCodecForTr(), setCodecForCStrings()
|
||
|
*/
|
||
|
|
||
|
|
||
|
/*!
|
||
|
\fn TQTextCodec* TQTextCodec::codecForCStrings()
|
||
|
|
||
|
Returns the codec used by TQString to convert to and from const
|
||
|
char* and TQCStrings. If this function returns 0 (the default),
|
||
|
TQString assumes Latin-1.
|
||
|
|
||
|
\sa setCodecForCStrings()
|
||
|
*/
|
||
|
|
||
|
/*!
|
||
|
\fn void TQTextCodec::setCodecForCStrings(TQTextCodec *c)
|
||
|
\nonreentrant
|
||
|
|
||
|
Sets the codec used by TQString to convert to and from const char*
|
||
|
and TQCStrings. If \a c is 0 (the default), TQString assumes Latin-1.
|
||
|
|
||
|
\warning Some codecs do not preserve the characters in the ascii
|
||
|
range (0x00 to 0x7f). For example, the Japanese Shift-JIS
|
||
|
encoding maps the backslash character (0x5a) to the Yen character.
|
||
|
This leads to unexpected results when using the backslash
|
||
|
character to escape characters in strings used in e.g. regular
|
||
|
expressions. Use TQString::fromLatin1() to preserve characters in
|
||
|
the ascii range when needed.
|
||
|
|
||
|
\sa codecForCStrings(), setCodecForTr(), setCodecForCStrings()
|
||
|
*/
|
||
|
|
||
|
|
||
|
TQTextCodec *TQTextCodec::cftr = 0;
|
||
|
TQTextCodec *TQTextCodec::cfcs = 0;
|
||
|
|
||
|
|
||
|
#endif // QT_NO_TEXTCODEC
|