You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3753 lines
126 KiB
3753 lines
126 KiB
14 years ago
|
/****************************************************************************
|
||
|
**
|
||
|
** Copyright (C) 2003-2008 Trolltech ASA. All rights reserved.
|
||
|
**
|
||
|
** This file is part of the kernel module of the Qt GUI Toolkit.
|
||
|
**
|
||
|
** This file may be used under the terms of the GNU General
|
||
|
** Public License versions 2.0 or 3.0 as published by the Free
|
||
|
** Software Foundation and appearing in the files LICENSE.GPL2
|
||
|
** and LICENSE.GPL3 included in the packaging of this file.
|
||
|
** Alternatively you may (at your option) use any later version
|
||
|
** of the GNU General Public License if such license has been
|
||
|
** publicly approved by Trolltech ASA (or its successors, if any)
|
||
|
** and the KDE Free Qt Foundation.
|
||
|
**
|
||
|
** Please review the following information to ensure GNU General
|
||
|
** Public Licensing requirements will be met:
|
||
|
** http://trolltech.com/products/qt/licenses/licensing/opensource/.
|
||
|
** If you are unsure which license is appropriate for your use, please
|
||
|
** review the following information:
|
||
|
** http://trolltech.com/products/qt/licenses/licensing/licensingoverview
|
||
|
** or contact the sales department at sales@trolltech.com.
|
||
|
**
|
||
|
** This file may be used under the terms of the Q Public License as
|
||
|
** defined by Trolltech ASA and appearing in the file LICENSE.QPL
|
||
|
** included in the packaging of this file. Licensees holding valid Qt
|
||
|
** Commercial licenses may use this file in accordance with the Qt
|
||
|
** Commercial License Agreement provided with the Software.
|
||
|
**
|
||
|
** This file is provided "AS IS" with NO WARRANTY OF ANY KIND,
|
||
|
** INCLUDING THE WARRANTIES OF DESIGN, MERCHANTABILITY AND FITNESS FOR
|
||
|
** A PARTICULAR PURPOSE. Trolltech reserves all rights not granted
|
||
|
** herein.
|
||
|
**
|
||
|
**********************************************************************/
|
||
|
|
||
|
// ------------------------------------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// Continuation of middle eastern languages
|
||
|
//
|
||
|
// ------------------------------------------------------------------------------------------------------------------
|
||
|
|
||
|
// #### stil missing: identify invalid character combinations
|
||
|
static bool syriac_shape(QShaperItem *item)
|
||
|
{
|
||
|
Q_ASSERT(item->script == QFont::Syriac);
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
QOpenType *openType = item->font->openType();
|
||
|
if (openType && openType->supportsScript(QFont::Syriac)) {
|
||
|
bool ot_ok;
|
||
|
if (arabicSyriacOpenTypeShape(openType, item, &ot_ok))
|
||
|
return true;
|
||
|
if (ot_ok)
|
||
|
return false;
|
||
|
// fall through to the non OT code
|
||
|
}
|
||
|
#endif
|
||
|
return basic_shape(item);
|
||
|
}
|
||
|
|
||
|
|
||
|
static bool thaana_shape(QShaperItem *item)
|
||
|
{
|
||
|
Q_ASSERT(item->script == QFont::Thaana);
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
QOpenType *openType = item->font->openType();
|
||
|
|
||
|
if (openType && openType->supportsScript(item->script)) {
|
||
|
openType->selectScript(QFont::Thaana);
|
||
|
if (item->font->stringToCMap(item->string->unicode()+item->from, item->length, item->glyphs, item->advances,
|
||
|
&item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError)
|
||
|
return FALSE;
|
||
|
heuristicSetGlyphAttributes(item);
|
||
|
openType->shape(item);
|
||
|
return openType->positionAndAdd(item);
|
||
|
}
|
||
|
#endif
|
||
|
return basic_shape(item);
|
||
|
}
|
||
|
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// Indic languages
|
||
|
//
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
|
||
|
enum Form {
|
||
|
Invalid = 0x0,
|
||
|
Unknown = Invalid,
|
||
|
Consonant,
|
||
|
Nukta,
|
||
|
Halant,
|
||
|
Matra,
|
||
|
VowelMark,
|
||
|
StressMark,
|
||
|
IndependentVowel,
|
||
|
LengthMark,
|
||
|
Control,
|
||
|
Other
|
||
|
};
|
||
|
|
||
|
static const unsigned char indicForms[0xe00-0x900] = {
|
||
|
// Devangari
|
||
|
Invalid, VowelMark, VowelMark, VowelMark,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Unknown, Unknown,
|
||
|
Nukta, Other, Matra, Matra,
|
||
|
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
Matra, Halant, Unknown, Unknown,
|
||
|
|
||
|
Other, StressMark, StressMark, StressMark,
|
||
|
StressMark, Unknown, Unknown, Unknown,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
IndependentVowel, IndependentVowel, VowelMark, VowelMark,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Consonant,
|
||
|
Consonant, Consonant /* ??? */, Consonant, Consonant,
|
||
|
|
||
|
// Bengali
|
||
|
Invalid, VowelMark, VowelMark, VowelMark,
|
||
|
Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, Invalid, Invalid, IndependentVowel,
|
||
|
|
||
|
IndependentVowel, Invalid, Invalid, IndependentVowel,
|
||
|
IndependentVowel, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Invalid, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Invalid, Consonant, Invalid,
|
||
|
Invalid, Invalid, Consonant, Consonant,
|
||
|
Consonant, Consonant, Unknown, Unknown,
|
||
|
Nukta, Other, Matra, Matra,
|
||
|
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
Matra, Invalid, Invalid, Matra,
|
||
|
Matra, Invalid, Invalid, Matra,
|
||
|
Matra, Halant, Consonant, Unknown,
|
||
|
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Invalid, Invalid, VowelMark,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Consonant, Consonant, Invalid, Consonant,
|
||
|
|
||
|
IndependentVowel, IndependentVowel, VowelMark, VowelMark,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
Consonant, Consonant, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
// Gurmukhi
|
||
|
Invalid, VowelMark, VowelMark, VowelMark,
|
||
|
Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
|
||
|
Invalid, Invalid, Invalid, IndependentVowel,
|
||
|
|
||
|
IndependentVowel, Invalid, Invalid, IndependentVowel,
|
||
|
IndependentVowel, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Invalid, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Invalid, Consonant, Consonant,
|
||
|
Invalid, Consonant, Consonant, Invalid,
|
||
|
Consonant, Consonant, Unknown, Unknown,
|
||
|
Nukta, Other, Matra, Matra,
|
||
|
|
||
|
Matra, Matra, Matra, Invalid,
|
||
|
Invalid, Invalid, Invalid, Matra,
|
||
|
Matra, Invalid, Invalid, Matra,
|
||
|
Matra, Halant, Unknown, Unknown,
|
||
|
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Unknown, Unknown, Unknown,
|
||
|
Invalid, Consonant, Consonant, Consonant,
|
||
|
Consonant, Invalid, Consonant, Invalid,
|
||
|
|
||
|
Other, Other, Invalid, Invalid,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
StressMark, StressMark, Consonant, Consonant,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
// Gujarati
|
||
|
Invalid, VowelMark, VowelMark, VowelMark,
|
||
|
Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
|
||
|
|
||
|
IndependentVowel, IndependentVowel, Invalid, IndependentVowel,
|
||
|
IndependentVowel, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Invalid, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Invalid, Consonant, Consonant,
|
||
|
Invalid, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Unknown, Unknown,
|
||
|
Nukta, Other, Matra, Matra,
|
||
|
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
Matra, Matra, Invalid, Matra,
|
||
|
Matra, Matra, Invalid, Matra,
|
||
|
Matra, Halant, Unknown, Unknown,
|
||
|
|
||
|
Other, Unknown, Unknown, Unknown,
|
||
|
Unknown, Unknown, Unknown, Unknown,
|
||
|
Unknown, Unknown, Unknown, Unknown,
|
||
|
Unknown, Unknown, Unknown, Unknown,
|
||
|
|
||
|
IndependentVowel, IndependentVowel, VowelMark, VowelMark,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
// Oriya
|
||
|
Invalid, VowelMark, VowelMark, VowelMark,
|
||
|
Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, Invalid, Invalid, IndependentVowel,
|
||
|
|
||
|
IndependentVowel, Invalid, Invalid, IndependentVowel,
|
||
|
IndependentVowel, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Invalid, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Invalid, Consonant, Consonant,
|
||
|
Invalid, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Unknown, Unknown,
|
||
|
Nukta, Other, Matra, Matra,
|
||
|
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
Invalid, Invalid, Invalid, Matra,
|
||
|
Matra, Invalid, Invalid, Matra,
|
||
|
Matra, Halant, Unknown, Unknown,
|
||
|
|
||
|
Other, Invalid, Invalid, Invalid,
|
||
|
Invalid, Unknown, LengthMark, LengthMark,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Consonant, Consonant, Invalid, Consonant,
|
||
|
|
||
|
IndependentVowel, IndependentVowel, Invalid, Invalid,
|
||
|
Invalid, Invalid, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
Other, Consonant, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
//Tamil
|
||
|
Invalid, Invalid, VowelMark, Other,
|
||
|
Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
|
||
|
Invalid, Invalid, IndependentVowel, IndependentVowel,
|
||
|
|
||
|
IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, Consonant, Invalid, Invalid,
|
||
|
Invalid, Consonant, Consonant, Invalid,
|
||
|
Consonant, Invalid, Consonant, Consonant,
|
||
|
|
||
|
Invalid, Invalid, Invalid, Consonant,
|
||
|
Consonant, Invalid, Invalid, Invalid,
|
||
|
Consonant, Consonant, Consonant, Invalid,
|
||
|
Invalid, Invalid, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Unknown, Unknown,
|
||
|
Invalid, Invalid, Matra, Matra,
|
||
|
|
||
|
Matra, Matra, Matra, Invalid,
|
||
|
Invalid, Invalid, Matra, Matra,
|
||
|
Matra, Invalid, Matra, Matra,
|
||
|
Matra, Halant, Invalid, Invalid,
|
||
|
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Invalid, Invalid, LengthMark,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Invalid, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
// Telugu
|
||
|
Invalid, VowelMark, VowelMark, VowelMark,
|
||
|
Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
|
||
|
|
||
|
IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Invalid, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Invalid, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Unknown, Unknown,
|
||
|
Invalid, Invalid, Matra, Matra,
|
||
|
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
Matra, Invalid, Matra, Matra,
|
||
|
Matra, Invalid, Matra, Matra,
|
||
|
Matra, Halant, Invalid, Invalid,
|
||
|
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, LengthMark, Matra, Invalid,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
|
||
|
IndependentVowel, IndependentVowel, Invalid, Invalid,
|
||
|
Invalid, Invalid, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
// Kannada
|
||
|
Invalid, Invalid, VowelMark, VowelMark,
|
||
|
Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
|
||
|
|
||
|
IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Invalid, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Invalid, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Unknown, Unknown,
|
||
|
Nukta, Other, Matra, Matra,
|
||
|
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
Matra, Invalid, Matra, Matra,
|
||
|
Matra, Invalid, Matra, Matra,
|
||
|
Matra, Halant, Invalid, Invalid,
|
||
|
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, LengthMark, LengthMark, Invalid,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Invalid, Consonant, Invalid,
|
||
|
|
||
|
IndependentVowel, IndependentVowel, VowelMark, VowelMark,
|
||
|
Invalid, Invalid, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
// Malayalam
|
||
|
Invalid, Invalid, VowelMark, VowelMark,
|
||
|
Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
|
||
|
|
||
|
IndependentVowel, Invalid, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Invalid, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Unknown, Unknown,
|
||
|
Invalid, Invalid, Matra, Matra,
|
||
|
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
Invalid, Invalid, Matra, Matra,
|
||
|
Matra, Invalid, Matra, Matra,
|
||
|
Matra, Halant, Invalid, Invalid,
|
||
|
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Invalid, Invalid, LengthMark,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
|
||
|
IndependentVowel, IndependentVowel, Invalid, Invalid,
|
||
|
Invalid, Invalid, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
|
||
|
// Sinhala
|
||
|
Invalid, Invalid, VowelMark, VowelMark,
|
||
|
Invalid, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, IndependentVowel,
|
||
|
IndependentVowel, IndependentVowel, IndependentVowel, Invalid,
|
||
|
Invalid, Invalid, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
|
||
|
Consonant, Consonant, Invalid, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Invalid, Consonant, Invalid, Invalid,
|
||
|
|
||
|
Consonant, Consonant, Consonant, Consonant,
|
||
|
Consonant, Consonant, Consonant, Invalid,
|
||
|
Invalid, Invalid, Halant, Invalid,
|
||
|
Invalid, Invalid, Invalid, Matra,
|
||
|
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
Matra, Invalid, Matra, Invalid,
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
Matra, Matra, Matra, Matra,
|
||
|
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
Invalid, Invalid, Invalid, Invalid,
|
||
|
|
||
|
Invalid, Invalid, Matra, Matra,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
Other, Other, Other, Other,
|
||
|
};
|
||
|
|
||
|
enum Position {
|
||
|
None,
|
||
|
Pre,
|
||
|
Above,
|
||
|
Below,
|
||
|
Post,
|
||
|
Split,
|
||
|
Base,
|
||
|
Reph,
|
||
|
Vattu,
|
||
|
Inherit
|
||
|
};
|
||
|
|
||
|
static const unsigned char indicPosition[0xe00-0x900] = {
|
||
|
// Devanagari
|
||
|
None, Above, Above, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
Below, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, Post, Pre,
|
||
|
|
||
|
Post, Below, Below, Below,
|
||
|
Below, Above, Above, Above,
|
||
|
Above, Post, Post, Post,
|
||
|
Post, None, None, None,
|
||
|
|
||
|
None, Above, Below, Above,
|
||
|
Above, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, Below, Below,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
// Bengali
|
||
|
None, Above, Post, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
Below, None, None, Post,
|
||
|
|
||
|
Below, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
Below, None, Post, Pre,
|
||
|
|
||
|
Post, Below, Below, Below,
|
||
|
Below, None, None, Pre,
|
||
|
Pre, None, None, Split,
|
||
|
Split, Below, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, Below, Below,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
// Gurmukhi
|
||
|
None, Above, Above, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, Post,
|
||
|
|
||
|
Below, None, None, None,
|
||
|
None, Below, None, None,
|
||
|
None, Below, None, None,
|
||
|
Below, None, Post, Pre,
|
||
|
|
||
|
Post, Below, Below, None,
|
||
|
None, None, None, Above,
|
||
|
Above, None, None, Above,
|
||
|
Above, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
Above, Above, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
// Gujarati
|
||
|
None, Above, Above, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
Below, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, Post, Pre,
|
||
|
|
||
|
Post, Below, Below, Below,
|
||
|
Below, Above, None, Above,
|
||
|
Above, Post, None, Post,
|
||
|
Post, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, Below, Below,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
// Oriya
|
||
|
None, Above, Post, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
Below, None, None, None,
|
||
|
Below, None, None, None,
|
||
|
Below, Below, Below, Post,
|
||
|
|
||
|
Below, None, Below, Below,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, Post, Above,
|
||
|
|
||
|
Post, Below, Below, Below,
|
||
|
None, None, None, Pre,
|
||
|
Split, None, None, Split,
|
||
|
Split, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, Above, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, Post,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, Below, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
// Tamil
|
||
|
None, None, Above, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, Post, Post,
|
||
|
|
||
|
Above, Below, Below, None,
|
||
|
None, None, Pre, Pre,
|
||
|
Pre, None, Split, Split,
|
||
|
Split, Halant, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
// Telugu
|
||
|
None, Post, Post, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, Below, Below, Below,
|
||
|
Below, Below, Below, Below,
|
||
|
Below, Below, Below, Below,
|
||
|
|
||
|
Below, Below, Below, Below,
|
||
|
Below, Below, Below, Below,
|
||
|
Below, None, Below, Below,
|
||
|
Below, Below, Below, Below,
|
||
|
|
||
|
Below, None, Below, Below,
|
||
|
None, Below, Below, Below,
|
||
|
Below, Below, None, None,
|
||
|
None, None, Post, Above,
|
||
|
|
||
|
Above, Post, Post, Post,
|
||
|
Post, None, Above, Above,
|
||
|
Split, None, Post, Above,
|
||
|
Above, Halant, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, Above, Below, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
// Kannada
|
||
|
None, None, Post, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, Below, Below, Below,
|
||
|
Below, Below, Below, Below,
|
||
|
Below, Below, Below, Below,
|
||
|
|
||
|
Below, Below, Below, Below,
|
||
|
Below, Below, Below, Below,
|
||
|
Below, Below, Below, Below,
|
||
|
Below, Below, Below, Below,
|
||
|
|
||
|
Below, None, Below, Below,
|
||
|
None, Below, Below, Below,
|
||
|
Below, Below, None, None,
|
||
|
None, None, Post, Above,
|
||
|
|
||
|
Split, Post, Post, Post,
|
||
|
Post, None, Above, Split,
|
||
|
Split, None, Split, Split,
|
||
|
Above, Halant, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, Post, Post, None,
|
||
|
None, None, None, None,
|
||
|
None, None, Below, None,
|
||
|
|
||
|
None, None, Below, Below,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
// Malayalam
|
||
|
None, None, Post, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, Post,
|
||
|
|
||
|
Post, None, Below, None,
|
||
|
None, Post, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, Post, Post,
|
||
|
|
||
|
Post, Post, Post, Post,
|
||
|
None, None, Pre, Pre,
|
||
|
Pre, None, Split, Split,
|
||
|
Split, Halant, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
// Sinhala
|
||
|
None, None, Post, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, Post,
|
||
|
|
||
|
Post, Post, Above, Above,
|
||
|
Below, None, Below, None,
|
||
|
Post, Pre, Split, Pre,
|
||
|
Split, Split, Split, Post,
|
||
|
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
|
||
|
None, None, Post, Post,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None,
|
||
|
None, None, None, None
|
||
|
};
|
||
|
|
||
|
static inline Form form(unsigned short uc) {
|
||
|
if (uc < 0x900 || uc > 0xdff) {
|
||
|
if (uc == 0x25cc)
|
||
|
return Consonant;
|
||
|
if (uc == 0x200c || uc == 0x200d)
|
||
|
return Control;
|
||
|
return Other;
|
||
|
}
|
||
|
return (Form)indicForms[uc-0x900];
|
||
|
}
|
||
|
|
||
|
static inline Position indic_position(unsigned short uc) {
|
||
|
if (uc < 0x900 || uc > 0xdff)
|
||
|
return None;
|
||
|
return (Position) indicPosition[uc-0x900];
|
||
|
}
|
||
|
|
||
|
|
||
|
enum IndicScriptProperties {
|
||
|
HasReph = 0x01,
|
||
|
HasSplit = 0x02
|
||
|
};
|
||
|
|
||
|
const uchar scriptProperties[10] = {
|
||
|
// Devanagari,
|
||
|
HasReph,
|
||
|
// Bengali,
|
||
|
HasReph|HasSplit,
|
||
|
// Gurmukhi,
|
||
|
0,
|
||
|
// Gujarati,
|
||
|
HasReph,
|
||
|
// Oriya,
|
||
|
HasReph|HasSplit,
|
||
|
// Tamil,
|
||
|
HasSplit,
|
||
|
// Telugu,
|
||
|
HasSplit,
|
||
|
// Kannada,
|
||
|
HasSplit|HasReph,
|
||
|
// Malayalam,
|
||
|
HasSplit,
|
||
|
// Sinhala,
|
||
|
HasSplit
|
||
|
};
|
||
|
|
||
|
struct IndicOrdering {
|
||
|
Form form;
|
||
|
Position position;
|
||
|
};
|
||
|
|
||
|
static const IndicOrdering devanagari_order [] = {
|
||
|
{ Consonant, Below },
|
||
|
{ Matra, Below },
|
||
|
{ VowelMark, Below },
|
||
|
{ StressMark, Below },
|
||
|
{ Matra, Above },
|
||
|
{ Matra, Post },
|
||
|
{ Consonant, Reph },
|
||
|
{ VowelMark, Above },
|
||
|
{ StressMark, Above },
|
||
|
{ VowelMark, Post },
|
||
|
{ (Form)0, None }
|
||
|
};
|
||
|
|
||
|
static const IndicOrdering bengali_order [] = {
|
||
|
{ Consonant, Below },
|
||
|
{ Matra, Below },
|
||
|
{ Matra, Above },
|
||
|
{ Consonant, Reph },
|
||
|
{ VowelMark, Above },
|
||
|
{ Consonant, Post },
|
||
|
{ Matra, Post },
|
||
|
{ VowelMark, Post },
|
||
|
{ (Form)0, None }
|
||
|
};
|
||
|
|
||
|
static const IndicOrdering gurmukhi_order [] = {
|
||
|
{ Consonant, Below },
|
||
|
{ Matra, Below },
|
||
|
{ Matra, Above },
|
||
|
{ Consonant, Post },
|
||
|
{ Matra, Post },
|
||
|
{ VowelMark, Above },
|
||
|
{ (Form)0, None }
|
||
|
};
|
||
|
|
||
|
static const IndicOrdering tamil_order [] = {
|
||
|
{ Matra, Above },
|
||
|
{ Matra, Post },
|
||
|
{ VowelMark, Post },
|
||
|
{ (Form)0, None }
|
||
|
};
|
||
|
|
||
|
static const IndicOrdering telugu_order [] = {
|
||
|
{ Matra, Above },
|
||
|
{ Matra, Below },
|
||
|
{ Matra, Post },
|
||
|
{ Consonant, Below },
|
||
|
{ Consonant, Post },
|
||
|
{ VowelMark, Post },
|
||
|
{ (Form)0, None }
|
||
|
};
|
||
|
|
||
|
static const IndicOrdering kannada_order [] = {
|
||
|
{ Matra, Above },
|
||
|
{ Matra, Post },
|
||
|
{ Consonant, Below },
|
||
|
{ Consonant, Post },
|
||
|
{ LengthMark, Post },
|
||
|
{ Consonant, Reph },
|
||
|
{ VowelMark, Post },
|
||
|
{ (Form)0, None }
|
||
|
};
|
||
|
|
||
|
static const IndicOrdering malayalam_order [] = {
|
||
|
{ Consonant, Below },
|
||
|
{ Matra, Below },
|
||
|
{ Consonant, Reph },
|
||
|
{ Consonant, Post },
|
||
|
{ Matra, Post },
|
||
|
{ VowelMark, Post },
|
||
|
{ (Form)0, None }
|
||
|
};
|
||
|
|
||
|
static const IndicOrdering sinhala_order [] = {
|
||
|
{ Matra, Below },
|
||
|
{ Matra, Above },
|
||
|
{ Matra, Post },
|
||
|
{ VowelMark, Post },
|
||
|
{ (Form)0, None }
|
||
|
};
|
||
|
|
||
|
static const IndicOrdering * const indic_order[] = {
|
||
|
devanagari_order, // Devanagari
|
||
|
bengali_order, // Bengali
|
||
|
gurmukhi_order, // Gurmukhi
|
||
|
devanagari_order, // Gujarati
|
||
|
bengali_order, // Oriya
|
||
|
tamil_order, // Tamil
|
||
|
telugu_order, // Telugu
|
||
|
kannada_order, // Kannada
|
||
|
malayalam_order, // Malayalam
|
||
|
sinhala_order // Sinhala
|
||
|
};
|
||
|
|
||
|
|
||
|
|
||
|
// vowel matras that have to be split into two parts.
|
||
|
static const unsigned short split_matras[] = {
|
||
|
// matra, split1, split2
|
||
|
|
||
|
// bengalis
|
||
|
0x9cb, 0x9c7, 0x9be,
|
||
|
0x9cc, 0x9c7, 0x9d7,
|
||
|
// oriya
|
||
|
0xb48, 0xb47, 0xb56,
|
||
|
0xb4b, 0xb47, 0xb3e,
|
||
|
0xb4c, 0xb47, 0xb57,
|
||
|
// tamil
|
||
|
0xbca, 0xbc6, 0xbbe,
|
||
|
0xbcb, 0xbc7, 0xbbe,
|
||
|
0xbcc, 0xbc6, 0xbd7,
|
||
|
// telugu
|
||
|
0xc48, 0xc46, 0xc56,
|
||
|
// kannada
|
||
|
0xcc0, 0xcbf, 0xcd5,
|
||
|
0xcc7, 0xcc6, 0xcd5,
|
||
|
0xcc8, 0xcc6, 0xcd6,
|
||
|
0xcca, 0xcc6, 0xcc2,
|
||
|
0xccb, 0xcca, 0xcd5,
|
||
|
// malayalam
|
||
|
0xd4a, 0xd46, 0xd3e,
|
||
|
0xd4b, 0xd47, 0xd3e,
|
||
|
0xd4c, 0xd46, 0xd57,
|
||
|
// sinhala
|
||
|
0xdda, 0xdd9, 0xdca,
|
||
|
0xddc, 0xdd9, 0xdcf,
|
||
|
0xddd, 0xddc, 0xdca,
|
||
|
0xdde, 0xdd9, 0xddf,
|
||
|
0xffff
|
||
|
};
|
||
|
|
||
|
static inline void splitMatra(unsigned short *reordered, int matra, int &len, int &base)
|
||
|
{
|
||
|
unsigned short matra_uc = reordered[matra];
|
||
|
//qDebug("matra=%d, reordered[matra]=%x", matra, reordered[matra]);
|
||
|
|
||
|
const unsigned short *split = split_matras;
|
||
|
while (split[0] < matra_uc)
|
||
|
split += 3;
|
||
|
|
||
|
assert(*split == matra_uc);
|
||
|
++split;
|
||
|
|
||
|
if (indic_position(*split) == Pre) {
|
||
|
reordered[matra] = split[1];
|
||
|
memmove(reordered + 1, reordered, len*sizeof(unsigned short));
|
||
|
reordered[0] = split[0];
|
||
|
base++;
|
||
|
} else {
|
||
|
memmove(reordered + matra + 1, reordered + matra, (len-matra)*sizeof(unsigned short));
|
||
|
reordered[matra] = split[0];
|
||
|
reordered[matra+1] = split[1];
|
||
|
}
|
||
|
len++;
|
||
|
}
|
||
|
|
||
|
enum IndicProperties {
|
||
|
// these two are already defined
|
||
|
// CcmpProperty = 0x1,
|
||
|
// InitProperty = 0x2,
|
||
|
NuktaProperty = 0x4,
|
||
|
AkhantProperty = 0x8,
|
||
|
RephProperty = 0x10,
|
||
|
PreFormProperty = 0x20,
|
||
|
BelowFormProperty = 0x40,
|
||
|
AboveFormProperty = 0x80,
|
||
|
HalfFormProperty = 0x100,
|
||
|
PostFormProperty = 0x200,
|
||
|
VattuProperty = 0x400,
|
||
|
PreSubstProperty = 0x800,
|
||
|
BelowSubstProperty = 0x1000,
|
||
|
AboveSubstProperty = 0x2000,
|
||
|
PostSubstProperty = 0x4000,
|
||
|
HalantProperty = 0x8000,
|
||
|
CligProperty = 0x10000
|
||
|
};
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
static const QOpenType::Features indic_features[] = {
|
||
|
{ FT_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
|
||
|
{ FT_MAKE_TAG('i', 'n', 'i', 't'), InitProperty },
|
||
|
{ FT_MAKE_TAG('n', 'u', 'k', 't'), NuktaProperty },
|
||
|
{ FT_MAKE_TAG('a', 'k', 'h', 'n'), AkhantProperty },
|
||
|
{ FT_MAKE_TAG('r', 'p', 'h', 'f'), RephProperty },
|
||
|
{ FT_MAKE_TAG('b', 'l', 'w', 'f'), BelowFormProperty },
|
||
|
{ FT_MAKE_TAG('h', 'a', 'l', 'f'), HalfFormProperty },
|
||
|
{ FT_MAKE_TAG('p', 's', 't', 'f'), PostFormProperty },
|
||
|
{ FT_MAKE_TAG('v', 'a', 't', 'u'), VattuProperty },
|
||
|
{ FT_MAKE_TAG('p', 'r', 'e', 's'), PreSubstProperty },
|
||
|
{ FT_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty },
|
||
|
{ FT_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty },
|
||
|
{ FT_MAKE_TAG('p', 's', 't', 's'), PostSubstProperty },
|
||
|
{ FT_MAKE_TAG('h', 'a', 'l', 'n'), HalantProperty },
|
||
|
{ 0, 0 }
|
||
|
};
|
||
|
#endif
|
||
|
|
||
|
// #define INDIC_DEBUG
|
||
|
#ifdef INDIC_DEBUG
|
||
|
#define IDEBUG qDebug
|
||
|
#else
|
||
|
#define IDEBUG if(0) qDebug
|
||
|
#endif
|
||
|
|
||
|
#ifdef INDIC_DEBUG
|
||
|
static QString propertiesToString(int properties)
|
||
|
{
|
||
|
QString res;
|
||
|
properties = ~properties;
|
||
|
if (properties & CcmpProperty)
|
||
|
res += "Ccmp ";
|
||
|
if (properties & InitProperty)
|
||
|
res += "Init ";
|
||
|
if (properties & NuktaProperty)
|
||
|
res += "Nukta ";
|
||
|
if (properties & AkhantProperty)
|
||
|
res += "Akhant ";
|
||
|
if (properties & RephProperty)
|
||
|
res += "Reph ";
|
||
|
if (properties & PreFormProperty)
|
||
|
res += "PreForm ";
|
||
|
if (properties & BelowFormProperty)
|
||
|
res += "BelowForm ";
|
||
|
if (properties & AboveFormProperty)
|
||
|
res += "AboveForm ";
|
||
|
if (properties & HalfFormProperty)
|
||
|
res += "HalfForm ";
|
||
|
if (properties & PostFormProperty)
|
||
|
res += "PostForm ";
|
||
|
if (properties & VattuProperty)
|
||
|
res += "Vattu ";
|
||
|
if (properties & PreSubstProperty)
|
||
|
res += "PreSubst ";
|
||
|
if (properties & BelowSubstProperty)
|
||
|
res += "BelowSubst ";
|
||
|
if (properties & AboveSubstProperty)
|
||
|
res += "AboveSubst ";
|
||
|
if (properties & PostSubstProperty)
|
||
|
res += "PostSubst ";
|
||
|
if (properties & HalantProperty)
|
||
|
res += "Halant ";
|
||
|
if (properties & CligProperty)
|
||
|
res += "Clig ";
|
||
|
return res;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
static bool indic_shape_syllable(QOpenType *openType, QShaperItem *item, bool invalid)
|
||
|
{
|
||
|
Q_UNUSED(openType)
|
||
|
int script = item->script;
|
||
|
Q_ASSERT(script >= QFont::Devanagari && script <= QFont::Sinhala);
|
||
|
const unsigned short script_base = 0x0900 + 0x80*(script-QFont::Devanagari);
|
||
|
const unsigned short ra = script_base + 0x30;
|
||
|
const unsigned short halant = script_base + 0x4d;
|
||
|
const unsigned short nukta = script_base + 0x3c;
|
||
|
|
||
|
int len = item->length;
|
||
|
IDEBUG(">>>>> indic shape: from=%d, len=%d invalid=%d", item->from, item->length, invalid);
|
||
|
|
||
|
if (item->num_glyphs < len+4) {
|
||
|
item->num_glyphs = len+4;
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
QVarLengthArray<unsigned short> reordered(len+4);
|
||
|
QVarLengthArray<unsigned char> position(len+4);
|
||
|
|
||
|
unsigned char properties = scriptProperties[script-QFont::Devanagari];
|
||
|
|
||
|
if (invalid) {
|
||
|
*reordered.data() = 0x25cc;
|
||
|
memcpy(reordered.data()+1, item->string->unicode() + item->from, len*sizeof(QChar));
|
||
|
len++;
|
||
|
} else {
|
||
|
memcpy(reordered.data(), item->string->unicode() + item->from, len*sizeof(QChar));
|
||
|
}
|
||
|
if (reordered[len-1] == 0x200c) // zero width non joiner
|
||
|
len--;
|
||
|
|
||
|
int i;
|
||
|
int base = 0;
|
||
|
int reph = -1;
|
||
|
|
||
|
#ifdef INDIC_DEBUG
|
||
|
IDEBUG("original:");
|
||
|
for (i = 0; i < len; i++) {
|
||
|
IDEBUG(" %d: %4x", i, reordered[i]);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
if (len != 1) {
|
||
|
unsigned short *uc = reordered.data();
|
||
|
bool beginsWithRa = FALSE;
|
||
|
|
||
|
// Rule 1: find base consonant
|
||
|
//
|
||
|
// The shaping engine finds the base consonant of the
|
||
|
// syllable, using the following algorithm: starting from the
|
||
|
// end of the syllable, move backwards until a consonant is
|
||
|
// found that does not have a below-base or post-base form
|
||
|
// (post-base forms have to follow below-base forms), or
|
||
|
// arrive at the first consonant. The consonant stopped at
|
||
|
// will be the base.
|
||
|
//
|
||
|
// * If the syllable starts with Ra + H (in a script that has
|
||
|
// 'Reph'), Ra is excluded from candidates for base
|
||
|
// consonants.
|
||
|
//
|
||
|
// * In Kannada and Telugu, the base consonant cannot be
|
||
|
// farther than 3 consonants from the end of the syllable.
|
||
|
// #### replace the HasReph property by testing if the feature exists in the font!
|
||
|
if (form(*uc) == Consonant || (script == QFont::Bengali && form(*uc) == IndependentVowel)) {
|
||
|
beginsWithRa = (properties & HasReph) && ((len > 2) && *uc == ra && *(uc+1) == halant);
|
||
|
|
||
|
if (beginsWithRa && form(*(uc+2)) == Control)
|
||
|
beginsWithRa = FALSE;
|
||
|
|
||
|
base = (beginsWithRa ? 2 : 0);
|
||
|
IDEBUG(" length = %d, beginsWithRa = %d, base=%d", len, beginsWithRa, base);
|
||
|
|
||
|
int lastConsonant = 0;
|
||
|
int matra = -1;
|
||
|
// we remember:
|
||
|
// * the last consonant since we need it for rule 2
|
||
|
// * the matras position for rule 3 and 4
|
||
|
|
||
|
// figure out possible base glyphs
|
||
|
memset(position.data(), 0, len);
|
||
|
if (script == QFont::Devanagari || script == QFont::Gujarati) {
|
||
|
bool vattu = FALSE;
|
||
|
for (i = base; i < len; ++i) {
|
||
|
position[i] = form(uc[i]);
|
||
|
if (position[i] == Consonant) {
|
||
|
lastConsonant = i;
|
||
|
vattu = (!vattu && uc[i] == ra);
|
||
|
if (vattu) {
|
||
|
IDEBUG("excluding vattu glyph at %d from base candidates", i);
|
||
|
position[i] = Vattu;
|
||
|
}
|
||
|
} else if (position[i] == Matra) {
|
||
|
matra = i;
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
for (i = base; i < len; ++i) {
|
||
|
position[i] = form(uc[i]);
|
||
|
if (position[i] == Consonant)
|
||
|
lastConsonant = i;
|
||
|
else if (matra < 0 && position[i] == Matra)
|
||
|
matra = i;
|
||
|
}
|
||
|
}
|
||
|
int skipped = 0;
|
||
|
Position pos = Post;
|
||
|
for (i = len-1; i > base; i--) {
|
||
|
if (position[i] != Consonant && (position[i] != Control || script == QFont::Kannada))
|
||
|
continue;
|
||
|
|
||
|
Position charPosition = indic_position(uc[i]);
|
||
|
if (pos == Post && charPosition == Post) {
|
||
|
pos = Post;
|
||
|
} else if ((pos == Post || pos == Below) && charPosition == Below) {
|
||
|
if (script == QFont::Devanagari || script == QFont::Gujarati)
|
||
|
base = i;
|
||
|
pos = Below;
|
||
|
} else {
|
||
|
base = i;
|
||
|
break;
|
||
|
}
|
||
|
if (skipped == 2 && (script == QFont::Kannada || script == QFont::Telugu)) {
|
||
|
base = i;
|
||
|
break;
|
||
|
}
|
||
|
++skipped;
|
||
|
}
|
||
|
|
||
|
IDEBUG(" base consonant at %d skipped=%d, lastConsonant=%d", base, skipped, lastConsonant);
|
||
|
|
||
|
// Rule 2:
|
||
|
//
|
||
|
// If the base consonant is not the last one, Uniscribe
|
||
|
// moves the halant from the base consonant to the last
|
||
|
// one.
|
||
|
if (lastConsonant > base) {
|
||
|
int halantPos = 0;
|
||
|
if (uc[base+1] == halant)
|
||
|
halantPos = base + 1;
|
||
|
else if (uc[base+1] == nukta && uc[base+2] == halant)
|
||
|
halantPos = base + 2;
|
||
|
if (halantPos > 0) {
|
||
|
IDEBUG(" moving halant from %d to %d!", base+1, lastConsonant);
|
||
|
for (i = halantPos; i < lastConsonant; i++)
|
||
|
uc[i] = uc[i+1];
|
||
|
uc[lastConsonant] = halant;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Rule 3:
|
||
|
//
|
||
|
// If the syllable starts with Ra + H, Uniscribe moves
|
||
|
// this combination so that it follows either:
|
||
|
|
||
|
// * the post-base 'matra' (if any) or the base consonant
|
||
|
// (in scripts that show similarity to Devanagari, i.e.,
|
||
|
// Devanagari, Gujarati, Bengali)
|
||
|
// * the base consonant (other scripts)
|
||
|
// * the end of the syllable (Kannada)
|
||
|
|
||
|
Position matra_position = None;
|
||
|
if (matra > 0)
|
||
|
matra_position = indic_position(uc[matra]);
|
||
|
IDEBUG(" matra at %d with form %d, base=%d", matra, matra_position, base);
|
||
|
|
||
|
if (beginsWithRa && base != 0) {
|
||
|
int toPos = base+1;
|
||
|
if (toPos < len && uc[toPos] == nukta)
|
||
|
toPos++;
|
||
|
if (toPos < len && uc[toPos] == halant)
|
||
|
toPos++;
|
||
|
if (toPos < len && uc[toPos] == 0x200d)
|
||
|
toPos++;
|
||
|
if (toPos < len-1 && uc[toPos] == ra && uc[toPos+1] == halant)
|
||
|
toPos += 2;
|
||
|
if (script == QFont::Devanagari || script == QFont::Gujarati || script == QFont::Bengali) {
|
||
|
if (matra_position == Post || matra_position == Split) {
|
||
|
toPos = matra+1;
|
||
|
matra -= 2;
|
||
|
}
|
||
|
} else if (script == QFont::Kannada) {
|
||
|
toPos = len;
|
||
|
matra -= 2;
|
||
|
}
|
||
|
|
||
|
IDEBUG("moving leading ra+halant to position %d", toPos);
|
||
|
for (i = 2; i < toPos; i++)
|
||
|
uc[i-2] = uc[i];
|
||
|
uc[toPos-2] = ra;
|
||
|
uc[toPos-1] = halant;
|
||
|
base -= 2;
|
||
|
if (properties & HasReph)
|
||
|
reph = toPos-2;
|
||
|
}
|
||
|
|
||
|
// Rule 4:
|
||
|
|
||
|
// Uniscribe splits two- or three-part matras into their
|
||
|
// parts. This splitting is a character-to-character
|
||
|
// operation).
|
||
|
//
|
||
|
// Uniscribe describes some moving operations for these
|
||
|
// matras here. For shaping however all pre matras need
|
||
|
// to be at the begining of the syllable, so we just move
|
||
|
// them there now.
|
||
|
if (matra_position == Split) {
|
||
|
splitMatra(uc, matra, len, base);
|
||
|
// Handle three-part matras (0xccb in Kannada)
|
||
|
matra_position = indic_position(uc[matra]);
|
||
|
if (matra_position == Split)
|
||
|
splitMatra(uc, matra, len, base);
|
||
|
} else if (matra_position == Pre) {
|
||
|
unsigned short m = uc[matra];
|
||
|
while (matra--)
|
||
|
uc[matra+1] = uc[matra];
|
||
|
uc[0] = m;
|
||
|
base++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Rule 5:
|
||
|
//
|
||
|
// Uniscribe classifies consonants and 'matra' parts as
|
||
|
// pre-base, above-base (Reph), below-base or post-base. This
|
||
|
// classification exists on the character code level and is
|
||
|
// language-dependent, not font-dependent.
|
||
|
for (i = 0; i < base; ++i)
|
||
|
position[i] = Pre;
|
||
|
position[base] = Base;
|
||
|
for (i = base+1; i < len; ++i) {
|
||
|
position[i] = indic_position(uc[i]);
|
||
|
// #### replace by adjusting table
|
||
|
if (uc[i] == nukta || uc[i] == halant)
|
||
|
position[i] = Inherit;
|
||
|
}
|
||
|
if (reph > 0) {
|
||
|
// recalculate reph, it might have changed.
|
||
|
for (i = base+1; i < len; ++i)
|
||
|
if (uc[i] == ra)
|
||
|
reph = i;
|
||
|
position[reph] = Reph;
|
||
|
position[reph+1] = Inherit;
|
||
|
}
|
||
|
|
||
|
// all reordering happens now to the chars after the base
|
||
|
int fixed = base+1;
|
||
|
if (fixed < len && uc[fixed] == nukta)
|
||
|
fixed++;
|
||
|
if (fixed < len && uc[fixed] == halant)
|
||
|
fixed++;
|
||
|
if (fixed < len && uc[fixed] == 0x200d)
|
||
|
fixed++;
|
||
|
|
||
|
#ifdef INDIC_DEBUG
|
||
|
for (i = fixed; i < len; ++i)
|
||
|
IDEBUG("position[%d] = %d, form=%d", i, position[i], form(uc[i]));
|
||
|
#endif
|
||
|
// we continuosly position the matras and vowel marks and increase the fixed
|
||
|
// until we reached the end.
|
||
|
const IndicOrdering *finalOrder = indic_order[script-QFont::Devanagari];
|
||
|
|
||
|
IDEBUG(" reordering pass:");
|
||
|
//IDEBUG(" base=%d fixed=%d", base, fixed);
|
||
|
int toMove = 0;
|
||
|
while (finalOrder[toMove].form && fixed < len-1) {
|
||
|
//IDEBUG(" fixed = %d, moving form %d with pos %d", fixed, finalOrder[toMove].form, finalOrder[toMove].position);
|
||
|
for (i = fixed; i < len; i++) {
|
||
|
if (form(uc[i]) == finalOrder[toMove].form &&
|
||
|
position[i] == finalOrder[toMove].position) {
|
||
|
// need to move this glyph
|
||
|
int to = fixed;
|
||
|
if (i < len-1 && position[i+1] == Inherit) {
|
||
|
IDEBUG(" moving two chars from %d to %d", i, to);
|
||
|
unsigned short ch = uc[i];
|
||
|
unsigned short ch2 = uc[i+1];
|
||
|
unsigned char pos = position[i];
|
||
|
for (int j = i+1; j > to+1; j--) {
|
||
|
uc[j] = uc[j-2];
|
||
|
position[j] = position[j-2];
|
||
|
}
|
||
|
uc[to] = ch;
|
||
|
uc[to+1] = ch2;
|
||
|
position[to] = pos;
|
||
|
position[to+1] = pos;
|
||
|
fixed += 2;
|
||
|
} else {
|
||
|
IDEBUG(" moving one char from %d to %d", i, to);
|
||
|
unsigned short ch = uc[i];
|
||
|
unsigned char pos = position[i];
|
||
|
for (int j = i; j > to; j--) {
|
||
|
uc[j] = uc[j-1];
|
||
|
position[j] = position[j-1];
|
||
|
}
|
||
|
uc[to] = ch;
|
||
|
position[to] = pos;
|
||
|
fixed++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
toMove++;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
if (reph > 0) {
|
||
|
// recalculate reph, it might have changed.
|
||
|
for (i = base+1; i < len; ++i)
|
||
|
if (reordered[i] == ra)
|
||
|
reph = i;
|
||
|
}
|
||
|
|
||
|
if (item->font->stringToCMap((const QChar *)reordered.data(), len, item->glyphs, item->advances,
|
||
|
&item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError)
|
||
|
return FALSE;
|
||
|
|
||
|
|
||
|
IDEBUG(" base=%d, reph=%d", base, reph);
|
||
|
IDEBUG("reordered:");
|
||
|
for (i = 0; i < len; i++) {
|
||
|
item->attributes[i].mark = FALSE;
|
||
|
item->attributes[i].clusterStart = FALSE;
|
||
|
item->attributes[i].justification = 0;
|
||
|
item->attributes[i].zeroWidth = FALSE;
|
||
|
IDEBUG(" %d: %4x", i, reordered[i]);
|
||
|
}
|
||
|
|
||
|
// now we have the syllable in the right order, and can start running it through open type.
|
||
|
|
||
|
bool control = FALSE;
|
||
|
for (i = 0; i < len; ++i)
|
||
|
control |= (form(reordered[i]) == Control);
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
if (openType) {
|
||
|
|
||
|
// we need to keep track of where the base glyph is for some
|
||
|
// scripts and use the cluster feature for this. This
|
||
|
// also means we have to correct the logCluster output from
|
||
|
// the open type engine manually afterwards. for indic this
|
||
|
// is rather simple, as all chars just point to the first
|
||
|
// glyph in the syllable.
|
||
|
QVarLengthArray<unsigned short> clusters(len);
|
||
|
QVarLengthArray<unsigned int> properties(len);
|
||
|
|
||
|
for (i = 0; i < len; ++i)
|
||
|
clusters[i] = i;
|
||
|
|
||
|
// features we should always apply
|
||
|
for (i = 0; i < len; ++i)
|
||
|
properties[i] = ~(CcmpProperty
|
||
|
| NuktaProperty
|
||
|
| VattuProperty
|
||
|
| PreSubstProperty
|
||
|
| BelowSubstProperty
|
||
|
| AboveSubstProperty
|
||
|
| HalantProperty
|
||
|
| PositioningProperties);
|
||
|
|
||
|
// Ccmp always applies
|
||
|
// Init
|
||
|
if (item->from == 0
|
||
|
|| !(item->string->unicode()[item->from-1].isLetter() || item->string->unicode()[item->from-1].isMark()))
|
||
|
properties[0] &= ~InitProperty;
|
||
|
|
||
|
// Nukta always applies
|
||
|
// Akhant
|
||
|
for (i = 0; i <= base; ++i)
|
||
|
properties[i] &= ~AkhantProperty;
|
||
|
// Reph
|
||
|
if (reph >= 0) {
|
||
|
properties[reph] &= ~RephProperty;
|
||
|
properties[reph+1] &= ~RephProperty;
|
||
|
}
|
||
|
// BelowForm
|
||
|
for (i = base+1; i < len; ++i)
|
||
|
properties[i] &= ~BelowFormProperty;
|
||
|
|
||
|
if (script == QFont::Devanagari || script == QFont::Gujarati) {
|
||
|
// vattu glyphs need this aswell
|
||
|
bool vattu = FALSE;
|
||
|
for (i = base-2; i > 1; --i) {
|
||
|
if (form(reordered[i]) == Consonant) {
|
||
|
vattu = (!vattu && reordered[i] == ra);
|
||
|
if (vattu) {
|
||
|
IDEBUG("forming vattu ligature at %d", i);
|
||
|
properties[i] &= ~BelowFormProperty;
|
||
|
properties[i+1] &= ~BelowFormProperty;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// HalfFormProperty
|
||
|
for (i = 0; i < base; ++i)
|
||
|
properties[i] &= ~HalfFormProperty;
|
||
|
if (control) {
|
||
|
for (i = 2; i < len; ++i) {
|
||
|
if (reordered[i] == 0x200d /* ZWJ */) {
|
||
|
properties[i-1] &= ~HalfFormProperty;
|
||
|
properties[i-2] &= ~HalfFormProperty;
|
||
|
} else if (reordered[i] == 0x200c /* ZWNJ */) {
|
||
|
properties[i-1] &= ~HalfFormProperty;
|
||
|
properties[i-2] &= ~HalfFormProperty;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// PostFormProperty
|
||
|
for (i = base+1; i < len; ++i)
|
||
|
properties[i] &= ~PostFormProperty;
|
||
|
// vattu always applies
|
||
|
// pres always applies
|
||
|
// blws always applies
|
||
|
// abvs always applies
|
||
|
|
||
|
// psts
|
||
|
// ### this looks slightly different from before, but I believe it's correct
|
||
|
if (reordered[len-1] != halant || base != len-2)
|
||
|
properties[base] &= ~PostSubstProperty;
|
||
|
for (i = base+1; i < len; ++i)
|
||
|
properties[i] &= ~PostSubstProperty;
|
||
|
|
||
|
// halant always applies
|
||
|
|
||
|
#ifdef INDIC_DEBUG
|
||
|
{
|
||
|
IDEBUG("OT properties:");
|
||
|
for (int i = 0; i < len; ++i)
|
||
|
qDebug(" i: %s", ::propertiesToString(properties[i]).toLatin1().data());
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
// initialize
|
||
|
item->log_clusters = clusters.data();
|
||
|
openType->shape(item, properties.data());
|
||
|
|
||
|
int newLen = openType->len();
|
||
|
OTL_GlyphItem otl_glyphs = openType->glyphs();
|
||
|
|
||
|
// move the left matra back to it's correct position in malayalam and tamil
|
||
|
if ((script == QFont::Malayalam || script == QFont::Tamil) && (form(reordered[0]) == Matra)) {
|
||
|
// qDebug("reordering matra, len=%d", newLen);
|
||
|
// need to find the base in the shaped string and move the matra there
|
||
|
int basePos = 0;
|
||
|
while (basePos < newLen && (int)otl_glyphs[basePos].cluster <= base)
|
||
|
basePos++;
|
||
|
--basePos;
|
||
|
if (basePos < newLen && basePos > 1) {
|
||
|
// qDebug("moving prebase matra to position %d in syllable newlen=%d", basePos, newLen);
|
||
|
OTL_GlyphItemRec m = otl_glyphs[0];
|
||
|
--basePos;
|
||
|
for (i = 0; i < basePos; ++i)
|
||
|
otl_glyphs[i] = otl_glyphs[i+1];
|
||
|
otl_glyphs[basePos] = m;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!openType->positionAndAdd(item, FALSE))
|
||
|
return FALSE;
|
||
|
|
||
|
if (control) {
|
||
|
IDEBUG("found a control char in the syllable");
|
||
|
int i = 0, j = 0;
|
||
|
while (i < item->num_glyphs) {
|
||
|
if (form(reordered[otl_glyphs[i].cluster]) == Control) {
|
||
|
++i;
|
||
|
if (i >= item->num_glyphs)
|
||
|
break;
|
||
|
}
|
||
|
item->glyphs[j] = item->glyphs[i];
|
||
|
++i;
|
||
|
++j;
|
||
|
}
|
||
|
item->num_glyphs = j;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
item->attributes[0].clusterStart = TRUE;
|
||
|
IDEBUG("<<<<<<");
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* syllables are of the form:
|
||
|
|
||
|
(Consonant Nukta? Halant)* Consonant Matra? VowelMark? StressMark?
|
||
|
(Consonant Nukta? Halant)* Consonant Halant
|
||
|
IndependentVowel VowelMark? StressMark?
|
||
|
|
||
|
We return syllable boundaries on invalid combinations aswell
|
||
|
*/
|
||
|
static int indic_nextSyllableBoundary(int script, const QString &s, int start, int end, bool *invalid)
|
||
|
{
|
||
|
*invalid = FALSE;
|
||
|
IDEBUG("indic_nextSyllableBoundary: start=%d, end=%d", start, end);
|
||
|
const QChar *uc = s.unicode()+start;
|
||
|
|
||
|
int pos = 0;
|
||
|
Form state = form(uc[pos].unicode());
|
||
|
IDEBUG("state[%d]=%d (uc=%4x)", pos, state, uc[pos].unicode());
|
||
|
pos++;
|
||
|
|
||
|
if (state != Consonant && state != IndependentVowel) {
|
||
|
if (state != Other)
|
||
|
*invalid = TRUE;
|
||
|
goto finish;
|
||
|
}
|
||
|
|
||
|
while (pos < end - start) {
|
||
|
Form newState = form(uc[pos].unicode());
|
||
|
IDEBUG("state[%d]=%d (uc=%4x)", pos, newState, uc[pos].unicode());
|
||
|
switch(newState) {
|
||
|
case Control:
|
||
|
newState = state;
|
||
|
if (state == Halant && uc[pos].unicode() == 0x200d /* ZWJ */)
|
||
|
break;
|
||
|
// the control character should be the last char in the item
|
||
|
++pos;
|
||
|
goto finish;
|
||
|
case Consonant:
|
||
|
if (state == Halant && (script != QFont::Sinhala || uc[pos-1].unicode() == 0x200d /* ZWJ */))
|
||
|
break;
|
||
|
goto finish;
|
||
|
case Halant:
|
||
|
if (state == Nukta || state == Consonant)
|
||
|
break;
|
||
|
// Bengali has a special exception allowing the combination Vowel_A/E + Halant + Ya
|
||
|
if (script == QFont::Bengali && pos == 1 &&
|
||
|
(uc[0].unicode() == 0x0985 || uc[0].unicode() == 0x098f))
|
||
|
break;
|
||
|
goto finish;
|
||
|
case Nukta:
|
||
|
if (state == Consonant)
|
||
|
break;
|
||
|
goto finish;
|
||
|
case StressMark:
|
||
|
if (state == VowelMark)
|
||
|
break;
|
||
|
// fall through
|
||
|
case VowelMark:
|
||
|
if (state == Matra || state == IndependentVowel)
|
||
|
break;
|
||
|
// fall through
|
||
|
case Matra:
|
||
|
if (state == Consonant || state == Nukta)
|
||
|
break;
|
||
|
// ### not sure if this is correct. If it is, does it apply only to Bengali or should
|
||
|
// it work for all Indic languages?
|
||
|
// the combination Independent_A + Vowel Sign AA is allowed.
|
||
|
if (script == QFont::Bengali && uc[pos].unicode() == 0x9be && uc[pos-1].unicode() == 0x985)
|
||
|
break;
|
||
|
if (script == QFont::Tamil && state == Matra) {
|
||
|
if (uc[pos-1].unicode() == 0x0bc6 &&
|
||
|
(uc[pos].unicode() == 0xbbe || uc[pos].unicode() == 0xbd7))
|
||
|
break;
|
||
|
if (uc[pos-1].unicode() == 0x0bc7 && uc[pos].unicode() == 0xbbe)
|
||
|
break;
|
||
|
}
|
||
|
goto finish;
|
||
|
|
||
|
case LengthMark:
|
||
|
case IndependentVowel:
|
||
|
case Invalid:
|
||
|
case Other:
|
||
|
goto finish;
|
||
|
}
|
||
|
state = newState;
|
||
|
pos++;
|
||
|
}
|
||
|
finish:
|
||
|
return pos+start;
|
||
|
}
|
||
|
|
||
|
static bool indic_shape(QShaperItem *item)
|
||
|
{
|
||
|
Q_ASSERT(item->script >= QFont::Devanagari && item->script <= QFont::Sinhala);
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
QOpenType *openType = item->font->openType();
|
||
|
if (openType)
|
||
|
openType->selectScript(item->script, indic_features);
|
||
|
#else
|
||
|
QOpenType *openType = 0;
|
||
|
#endif
|
||
|
unsigned short *logClusters = item->log_clusters;
|
||
|
|
||
|
QShaperItem syllable = *item;
|
||
|
int first_glyph = 0;
|
||
|
|
||
|
int sstart = item->from;
|
||
|
int end = sstart + item->length;
|
||
|
IDEBUG("indic_shape: from %d length %d", item->from, item->length);
|
||
|
while (sstart < end) {
|
||
|
bool invalid;
|
||
|
int send = indic_nextSyllableBoundary(item->script, *item->string, sstart, end, &invalid);
|
||
|
IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
|
||
|
invalid ? "TRUE" : "FALSE");
|
||
|
syllable.from = sstart;
|
||
|
syllable.length = send-sstart;
|
||
|
syllable.glyphs = item->glyphs + first_glyph;
|
||
|
syllable.offsets = item->offsets + first_glyph;
|
||
|
syllable.advances = item->advances + first_glyph;
|
||
|
syllable.attributes = item->attributes + first_glyph;
|
||
|
syllable.num_glyphs = item->num_glyphs - first_glyph;
|
||
|
if (!indic_shape_syllable(openType, &syllable, invalid)) {
|
||
|
IDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
|
||
|
item->num_glyphs += syllable.num_glyphs;
|
||
|
return FALSE;
|
||
|
}
|
||
|
item->has_positioning |= syllable.has_positioning;
|
||
|
|
||
|
// fix logcluster array
|
||
|
IDEBUG("syllable:");
|
||
|
int i;
|
||
|
for (i = first_glyph; i < first_glyph + syllable.num_glyphs; ++i)
|
||
|
IDEBUG(" %d -> glyph %x", i, item->glyphs[i]);
|
||
|
IDEBUG(" logclusters:");
|
||
|
for (i = sstart; i < send; ++i) {
|
||
|
IDEBUG(" %d -> glyph %d", i, first_glyph);
|
||
|
logClusters[i-item->from] = first_glyph;
|
||
|
}
|
||
|
sstart = send;
|
||
|
first_glyph += syllable.num_glyphs;
|
||
|
}
|
||
|
item->num_glyphs = first_glyph;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
|
||
|
static void indic_attributes(int script, const QString &text, int from, int len, QCharAttributes *attributes)
|
||
|
{
|
||
|
int end = from + len;
|
||
|
const QChar *uc = text.unicode() + from;
|
||
|
attributes += from;
|
||
|
int i = 0;
|
||
|
while (i < len) {
|
||
|
bool invalid;
|
||
|
int boundary = indic_nextSyllableBoundary(script, text, from+i, end, &invalid) - from;
|
||
|
attributes[i].charStop = TRUE;
|
||
|
|
||
|
if (boundary > len-1) boundary = len;
|
||
|
i++;
|
||
|
while (i < boundary) {
|
||
|
attributes[i].charStop = FALSE;
|
||
|
++uc;
|
||
|
++i;
|
||
|
}
|
||
|
assert(i == boundary);
|
||
|
}
|
||
|
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// Thai and Lao
|
||
|
//
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
|
||
|
#include <qtextcodec.h>
|
||
|
#include <qlibrary.h>
|
||
|
|
||
|
|
||
|
static void thaiWordBreaks(const QChar *string, const int len, QCharAttributes *attributes)
|
||
|
{
|
||
|
#ifndef QT_NO_TEXTCODEC
|
||
|
typedef int (*th_brk_def)(const char*, int[], int);
|
||
|
static QTextCodec *thaiCodec = QTextCodec::codecForMib(2259);
|
||
|
static th_brk_def th_brk = 0;
|
||
|
|
||
|
#ifndef QT_NO_LIBRARY
|
||
|
/* load libthai dynamically */
|
||
|
if (!th_brk && thaiCodec) {
|
||
|
th_brk = (th_brk_def)QLibrary::resolve("thai", "th_brk");
|
||
|
if (!th_brk)
|
||
|
thaiCodec = 0;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
if (!th_brk)
|
||
|
return;
|
||
|
|
||
|
QCString cstr = thaiCodec->fromUnicode(QConstString(string, len).string());
|
||
|
|
||
|
int brp[128];
|
||
|
int *break_positions = brp;
|
||
|
int numbreaks = th_brk(cstr.data(), break_positions, 128);
|
||
|
if (numbreaks > 128) {
|
||
|
break_positions = new int[numbreaks];
|
||
|
numbreaks = th_brk(cstr.data(),break_positions, numbreaks);
|
||
|
}
|
||
|
|
||
|
attributes[0].softBreak = TRUE;
|
||
|
int i;
|
||
|
for (i = 1; i < len; ++i)
|
||
|
attributes[i].softBreak = FALSE;
|
||
|
|
||
|
for (i = 0; i < numbreaks; ++i)
|
||
|
attributes[break_positions[i]].softBreak = TRUE;
|
||
|
|
||
|
if (break_positions != brp)
|
||
|
delete [] break_positions;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
|
||
|
static void thai_attributes( int script, const QString &text, int from, int len, QCharAttributes *attributes )
|
||
|
{
|
||
|
Q_UNUSED(script);
|
||
|
Q_ASSERT(script == QFont::Thai);
|
||
|
thaiWordBreaks(text.unicode() + from, len, attributes);
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// Tibetan
|
||
|
//
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
|
||
|
// tibetan syllables are of the form:
|
||
|
// head position consonant
|
||
|
// first sub-joined consonant
|
||
|
// ....intermediate sub-joined consonants (if any)
|
||
|
// last sub-joined consonant
|
||
|
// sub-joined vowel (a-chung U+0F71)
|
||
|
// standard or compound vowel sign (or 'virama' for devanagari transliteration)
|
||
|
|
||
|
enum TibetanForm {
|
||
|
TibetanOther,
|
||
|
TibetanHeadConsonant,
|
||
|
TibetanSubjoinedConsonant,
|
||
|
TibetanSubjoinedVowel,
|
||
|
TibetanVowel
|
||
|
};
|
||
|
|
||
|
// this table starts at U+0f40
|
||
|
static const unsigned char tibetanForm[0x80] = {
|
||
|
TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
|
||
|
TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
|
||
|
TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
|
||
|
TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
|
||
|
|
||
|
TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
|
||
|
TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
|
||
|
TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
|
||
|
TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
|
||
|
|
||
|
TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
|
||
|
TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
|
||
|
TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant, TibetanHeadConsonant,
|
||
|
TibetanOther, TibetanOther, TibetanOther, TibetanOther,
|
||
|
|
||
|
TibetanOther, TibetanVowel, TibetanVowel, TibetanVowel,
|
||
|
TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
|
||
|
TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
|
||
|
TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
|
||
|
|
||
|
TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
|
||
|
TibetanVowel, TibetanVowel, TibetanVowel, TibetanVowel,
|
||
|
TibetanOther, TibetanOther, TibetanOther, TibetanOther,
|
||
|
TibetanOther, TibetanOther, TibetanOther, TibetanOther,
|
||
|
|
||
|
TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
|
||
|
TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
|
||
|
TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
|
||
|
TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
|
||
|
|
||
|
TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
|
||
|
TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
|
||
|
TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
|
||
|
TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
|
||
|
|
||
|
TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
|
||
|
TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
|
||
|
TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant, TibetanSubjoinedConsonant,
|
||
|
TibetanSubjoinedConsonant, TibetanOther, TibetanOther, TibetanOther
|
||
|
};
|
||
|
|
||
|
|
||
|
static inline TibetanForm tibetan_form(const QChar &c)
|
||
|
{
|
||
|
return (TibetanForm)tibetanForm[c.unicode() - 0x0f40];
|
||
|
}
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
static const QOpenType::Features tibetan_features[] = {
|
||
|
{ FT_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
|
||
|
{ FT_MAKE_TAG('a', 'b', 'v', 's'), AboveSubstProperty },
|
||
|
{ FT_MAKE_TAG('b', 'l', 'w', 's'), BelowSubstProperty },
|
||
|
{0, 0}
|
||
|
};
|
||
|
#endif
|
||
|
|
||
|
static bool tibetan_shape_syllable(QOpenType *openType, QShaperItem *item, bool invalid)
|
||
|
{
|
||
|
Q_UNUSED(openType)
|
||
|
int len = item->length;
|
||
|
|
||
|
if (item->num_glyphs < item->length + 4) {
|
||
|
item->num_glyphs = item->length + 4;
|
||
|
return FALSE;
|
||
|
}
|
||
|
|
||
|
int i;
|
||
|
QVarLengthArray<unsigned short> reordered(len+4);
|
||
|
|
||
|
const QChar *str = item->string->unicode() + item->from;
|
||
|
if (invalid) {
|
||
|
*reordered.data() = 0x25cc;
|
||
|
memcpy(reordered.data()+1, str, len*sizeof(QChar));
|
||
|
len++;
|
||
|
str = (QChar *)reordered.data();
|
||
|
}
|
||
|
|
||
|
if (item->font->stringToCMap(str, len, item->glyphs, item->advances,
|
||
|
&item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError)
|
||
|
return FALSE;
|
||
|
|
||
|
for (i = 0; i < item->length; i++) {
|
||
|
item->attributes[i].mark = FALSE;
|
||
|
item->attributes[i].clusterStart = FALSE;
|
||
|
item->attributes[i].justification = 0;
|
||
|
item->attributes[i].zeroWidth = FALSE;
|
||
|
IDEBUG(" %d: %4x", i, str[i].unicode());
|
||
|
}
|
||
|
|
||
|
// now we have the syllable in the right order, and can start running it through open type.
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
if (openType && openType->supportsScript(QFont::Tibetan)) {
|
||
|
openType->selectScript(QFont::Tibetan, tibetan_features);
|
||
|
|
||
|
openType->shape(item);
|
||
|
if (!openType->positionAndAdd(item, FALSE))
|
||
|
return FALSE;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
item->attributes[0].clusterStart = TRUE;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
|
||
|
static int tibetan_nextSyllableBoundary(const QString &s, int start, int end, bool *invalid)
|
||
|
{
|
||
|
const QChar *uc = s.unicode() + start;
|
||
|
|
||
|
int pos = 0;
|
||
|
TibetanForm state = tibetan_form(*uc);
|
||
|
|
||
|
// qDebug("state[%d]=%d (uc=%4x)", pos, state, uc[pos].unicode());
|
||
|
pos++;
|
||
|
|
||
|
if (state != TibetanHeadConsonant) {
|
||
|
if (state != TibetanOther)
|
||
|
*invalid = TRUE;
|
||
|
goto finish;
|
||
|
}
|
||
|
|
||
|
while (pos < end - start) {
|
||
|
TibetanForm newState = tibetan_form(uc[pos]);
|
||
|
switch(newState) {
|
||
|
case TibetanSubjoinedConsonant:
|
||
|
case TibetanSubjoinedVowel:
|
||
|
if (state != TibetanHeadConsonant &&
|
||
|
state != TibetanSubjoinedConsonant)
|
||
|
goto finish;
|
||
|
state = newState;
|
||
|
break;
|
||
|
case TibetanVowel:
|
||
|
if (state != TibetanHeadConsonant &&
|
||
|
state != TibetanSubjoinedConsonant &&
|
||
|
state != TibetanSubjoinedVowel)
|
||
|
goto finish;
|
||
|
break;
|
||
|
case TibetanOther:
|
||
|
case TibetanHeadConsonant:
|
||
|
goto finish;
|
||
|
}
|
||
|
pos++;
|
||
|
}
|
||
|
|
||
|
finish:
|
||
|
*invalid = FALSE;
|
||
|
return start+pos;
|
||
|
}
|
||
|
|
||
|
static bool tibetan_shape(QShaperItem *item)
|
||
|
{
|
||
|
Q_ASSERT(item->script == QFont::Tibetan);
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
QOpenType *openType = item->font->openType();
|
||
|
if (openType && !openType->supportsScript(item->script))
|
||
|
openType = 0;
|
||
|
#else
|
||
|
QOpenType *openType = 0;
|
||
|
#endif
|
||
|
unsigned short *logClusters = item->log_clusters;
|
||
|
|
||
|
QShaperItem syllable = *item;
|
||
|
int first_glyph = 0;
|
||
|
|
||
|
int sstart = item->from;
|
||
|
int end = sstart + item->length;
|
||
|
while (sstart < end) {
|
||
|
bool invalid;
|
||
|
int send = tibetan_nextSyllableBoundary(*(item->string), sstart, end, &invalid);
|
||
|
IDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
|
||
|
invalid ? "TRUE" : "FALSE");
|
||
|
syllable.from = sstart;
|
||
|
syllable.length = send-sstart;
|
||
|
syllable.glyphs = item->glyphs + first_glyph;
|
||
|
syllable.offsets = item->offsets + first_glyph;
|
||
|
syllable.advances = item->advances + first_glyph;
|
||
|
syllable.attributes = item->attributes + first_glyph;
|
||
|
syllable.num_glyphs = item->num_glyphs - first_glyph;
|
||
|
if (!tibetan_shape_syllable(openType, &syllable, invalid)) {
|
||
|
item->num_glyphs += syllable.num_glyphs;
|
||
|
return FALSE;
|
||
|
}
|
||
|
item->has_positioning |= syllable.has_positioning;
|
||
|
|
||
|
// fix logcluster array
|
||
|
for (int i = sstart; i < send; ++i)
|
||
|
logClusters[i-item->from] = first_glyph;
|
||
|
sstart = send;
|
||
|
first_glyph += syllable.num_glyphs;
|
||
|
}
|
||
|
item->num_glyphs = first_glyph;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
static void tibetan_attributes(int script, const QString &text, int from, int len, QCharAttributes *attributes)
|
||
|
{
|
||
|
Q_UNUSED(script);
|
||
|
|
||
|
int end = from + len;
|
||
|
const QChar *uc = text.unicode() + from;
|
||
|
attributes += from;
|
||
|
int i = 0;
|
||
|
while (i < len) {
|
||
|
bool invalid;
|
||
|
int boundary = tibetan_nextSyllableBoundary(text, from+i, end, &invalid) - from;
|
||
|
|
||
|
attributes[i].charStop = TRUE;
|
||
|
|
||
|
if (boundary > len-1) boundary = len;
|
||
|
i++;
|
||
|
while (i < boundary) {
|
||
|
attributes[i].charStop = FALSE;
|
||
|
++uc;
|
||
|
++i;
|
||
|
}
|
||
|
assert(i == boundary);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// Khmer
|
||
|
//
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
|
||
|
|
||
|
// Vocabulary
|
||
|
// Base -> A consonant or an independent vowel in its full (not subscript) form. It is the
|
||
|
// center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,
|
||
|
// split vowels, signs... but there is only one base in a syllable, it has to be coded as
|
||
|
// the first character of the syllable.
|
||
|
// split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
|
||
|
// Khmer language has five of them. Khmer split vowels either have one part before the
|
||
|
// base and one after the base or they have a part before the base and a part above the base.
|
||
|
// The first part of all Khmer split vowels is the same character, identical to
|
||
|
// the glyph of Khmer dependent vowel SRA EI
|
||
|
// coeng --> modifier used in Khmer to construct coeng (subscript) consonants
|
||
|
// Differently than indian languages, the coeng modifies the consonant that follows it,
|
||
|
// not the one preceding it Each consonant has two forms, the base form and the subscript form
|
||
|
// the base form is the normal one (using the consonants code-point), the subscript form is
|
||
|
// displayed when the combination coeng + consonant is encountered.
|
||
|
// Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
|
||
|
// Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
|
||
|
// Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
|
||
|
// Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
|
||
|
// if it is attached to a consonant of the first series or a consonant of the second series
|
||
|
// Most consonants have an equivalent in the other series, but some of theme exist only in
|
||
|
// one series (for example SA). If we want to use the consonant SA with a vowel sound that
|
||
|
// can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
|
||
|
// of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
|
||
|
// x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
|
||
|
// MUSIKATOAN a second series consonant to have a first series vowel sound.
|
||
|
// Consonant shifter are both normally supercript marks, but, when they are followed by a
|
||
|
// superscript, they change shape and take the form of subscript dependent vowel SRA U.
|
||
|
// If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
|
||
|
// should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
|
||
|
// be placed after the coeng consonant.
|
||
|
// Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base
|
||
|
// Each vowel has its own position. Only one vowel per syllable is allowed.
|
||
|
// Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are
|
||
|
// Allowed in a syllable.
|
||
|
//
|
||
|
//
|
||
|
// order is important here! This order must be the same that is found in each horizontal
|
||
|
// line in the statetable for Khmer (see khmerStateTable) .
|
||
|
//
|
||
|
enum KhmerCharClassValues {
|
||
|
CC_RESERVED = 0,
|
||
|
CC_CONSONANT = 1, // Consonant of type 1 or independent vowel
|
||
|
CC_CONSONANT2 = 2, // Consonant of type 2
|
||
|
CC_CONSONANT3 = 3, // Consonant of type 3
|
||
|
CC_ZERO_WIDTH_NJ_MARK = 4, // Zero Width non joiner character (0x200C)
|
||
|
CC_CONSONANT_SHIFTER = 5,
|
||
|
CC_ROBAT = 6, // Khmer special diacritic accent -treated differently in state table
|
||
|
CC_COENG = 7, // Subscript consonant combining character
|
||
|
CC_DEPENDENT_VOWEL = 8,
|
||
|
CC_SIGN_ABOVE = 9,
|
||
|
CC_SIGN_AFTER = 10,
|
||
|
CC_ZERO_WIDTH_J_MARK = 11, // Zero width joiner character
|
||
|
CC_COUNT = 12 // This is the number of character classes
|
||
|
};
|
||
|
|
||
|
|
||
|
enum KhmerCharClassFlags {
|
||
|
CF_CLASS_MASK = 0x0000FFFF,
|
||
|
|
||
|
CF_CONSONANT = 0x01000000, // flag to speed up comparing
|
||
|
CF_SPLIT_VOWEL = 0x02000000, // flag for a split vowel -> the first part is added in front of the syllable
|
||
|
CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable
|
||
|
CF_COENG = 0x08000000, // flag to speed up comparing
|
||
|
CF_SHIFTER = 0x10000000, // flag to speed up comparing
|
||
|
CF_ABOVE_VOWEL = 0x20000000, // flag to speed up comparing
|
||
|
|
||
|
// position flags
|
||
|
CF_POS_BEFORE = 0x00080000,
|
||
|
CF_POS_BELOW = 0x00040000,
|
||
|
CF_POS_ABOVE = 0x00020000,
|
||
|
CF_POS_AFTER = 0x00010000,
|
||
|
CF_POS_MASK = 0x000f0000
|
||
|
};
|
||
|
|
||
|
|
||
|
// Characters that get refered to by name
|
||
|
enum KhmerChar {
|
||
|
C_SIGN_ZWNJ = 0x200C,
|
||
|
C_SIGN_ZWJ = 0x200D,
|
||
|
C_DOTTED_CIRCLE = 0x25CC,
|
||
|
C_RO = 0x179A,
|
||
|
C_VOWEL_AA = 0x17B6,
|
||
|
C_SIGN_NIKAHIT = 0x17C6,
|
||
|
C_VOWEL_E = 0x17C1,
|
||
|
C_COENG = 0x17D2
|
||
|
};
|
||
|
|
||
|
|
||
|
// simple classes, they are used in the statetable (in this file) to control the length of a syllable
|
||
|
// they are also used to know where a character should be placed (location in reference to the base character)
|
||
|
// and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
|
||
|
// indicate error in syllable construction
|
||
|
//
|
||
|
enum {
|
||
|
_xx = CC_RESERVED,
|
||
|
_sa = CC_SIGN_ABOVE | CF_DOTTED_CIRCLE | CF_POS_ABOVE,
|
||
|
_sp = CC_SIGN_AFTER | CF_DOTTED_CIRCLE| CF_POS_AFTER,
|
||
|
_c1 = CC_CONSONANT | CF_CONSONANT,
|
||
|
_c2 = CC_CONSONANT2 | CF_CONSONANT,
|
||
|
_c3 = CC_CONSONANT3 | CF_CONSONANT,
|
||
|
_rb = CC_ROBAT | CF_POS_ABOVE | CF_DOTTED_CIRCLE,
|
||
|
_cs = CC_CONSONANT_SHIFTER | CF_DOTTED_CIRCLE | CF_SHIFTER,
|
||
|
_dl = CC_DEPENDENT_VOWEL | CF_POS_BEFORE | CF_DOTTED_CIRCLE,
|
||
|
_db = CC_DEPENDENT_VOWEL | CF_POS_BELOW | CF_DOTTED_CIRCLE,
|
||
|
_da = CC_DEPENDENT_VOWEL | CF_POS_ABOVE | CF_DOTTED_CIRCLE | CF_ABOVE_VOWEL,
|
||
|
_dr = CC_DEPENDENT_VOWEL | CF_POS_AFTER | CF_DOTTED_CIRCLE,
|
||
|
_co = CC_COENG | CF_COENG | CF_DOTTED_CIRCLE,
|
||
|
|
||
|
// split vowel
|
||
|
_va = _da | CF_SPLIT_VOWEL,
|
||
|
_vr = _dr | CF_SPLIT_VOWEL
|
||
|
};
|
||
|
|
||
|
|
||
|
// Character class: a character class value
|
||
|
// ORed with character class flags.
|
||
|
//
|
||
|
typedef unsigned long KhmerCharClass;
|
||
|
|
||
|
|
||
|
// Character class tables
|
||
|
// _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
|
||
|
// _sa Sign placed above the base
|
||
|
// _sp Sign placed after the base
|
||
|
// _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
|
||
|
// _c2 Consonant of type 2 (only RO)
|
||
|
// _c3 Consonant of type 3
|
||
|
// _rb Khmer sign robat u17CC. combining mark for subscript consonants
|
||
|
// _cd Consonant-shifter
|
||
|
// _dl Dependent vowel placed before the base (left of the base)
|
||
|
// _db Dependent vowel placed below the base
|
||
|
// _da Dependent vowel placed above the base
|
||
|
// _dr Dependent vowel placed behind the base (right of the base)
|
||
|
// _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
|
||
|
// it to create a subscript consonant or independent vowel
|
||
|
// _va Khmer split vowel in wich the first part is before the base and the second one above the base
|
||
|
// _vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base
|
||
|
//
|
||
|
static const KhmerCharClass khmerCharClasses[] = {
|
||
|
_c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, // 1780 - 178F
|
||
|
_c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, // 1790 - 179F
|
||
|
_c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, // 17A0 - 17AF
|
||
|
_c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, // 17B0 - 17BF
|
||
|
_vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, // 17C0 - 17CF
|
||
|
_sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx // 17D0 - 17DF
|
||
|
};
|
||
|
|
||
|
// this enum must reflect the range of khmerCharClasses
|
||
|
enum KhmerCharClassesRange {
|
||
|
KhmerFirstChar = 0x1780,
|
||
|
KhmerLastChar = 0x17df
|
||
|
};
|
||
|
|
||
|
// Below we define how a character in the input string is either in the khmerCharClasses table
|
||
|
// (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear
|
||
|
// within the syllable, but are not in the table) we also get their type back, or an unknown object
|
||
|
// in which case we get _xx (CC_RESERVED) back
|
||
|
//
|
||
|
static inline KhmerCharClass getKhmerCharClass(const QChar &uc)
|
||
|
{
|
||
|
if (uc.unicode() == C_SIGN_ZWJ) {
|
||
|
return CC_ZERO_WIDTH_J_MARK;
|
||
|
}
|
||
|
|
||
|
if (uc.unicode() == C_SIGN_ZWNJ) {
|
||
|
return CC_ZERO_WIDTH_NJ_MARK;
|
||
|
}
|
||
|
|
||
|
if (uc.unicode() < KhmerFirstChar || uc.unicode() > KhmerLastChar) {
|
||
|
return CC_RESERVED;
|
||
|
}
|
||
|
|
||
|
return khmerCharClasses[uc.unicode() - KhmerFirstChar];
|
||
|
}
|
||
|
|
||
|
|
||
|
// The stateTable is used to calculate the end (the length) of a well
|
||
|
// formed Khmer Syllable.
|
||
|
//
|
||
|
// Each horizontal line is ordered exactly the same way as the values in KhmerClassTable
|
||
|
// CharClassValues. This coincidence of values allows the follow up of the table.
|
||
|
//
|
||
|
// Each line corresponds to a state, which does not necessarily need to be a type
|
||
|
// of component... for example, state 2 is a base, with is always a first character
|
||
|
// in the syllable, but the state could be produced a consonant of any type when
|
||
|
// it is the first character that is analysed (in ground state).
|
||
|
//
|
||
|
// Differentiating 3 types of consonants is necessary in order to
|
||
|
// forbid the use of certain combinations, such as having a second
|
||
|
// coeng after a coeng RO,
|
||
|
// The inexistent possibility of having a type 3 after another type 3 is permitted,
|
||
|
// eliminating it would very much complicate the table, and it does not create typing
|
||
|
// problems, as the case above.
|
||
|
//
|
||
|
// The table is quite complex, in order to limit the number of coeng consonants
|
||
|
// to 2 (by means of the table).
|
||
|
//
|
||
|
// There a peculiarity, as far as Unicode is concerned:
|
||
|
// - The consonant-shifter is considered in two possible different
|
||
|
// locations, the one considered in Unicode 3.0 and the one considered in
|
||
|
// Unicode 4.0. (there is a backwards compatibility problem in this standard).
|
||
|
//
|
||
|
//
|
||
|
// xx independent character, such as a number, punctuation sign or non-khmer char
|
||
|
//
|
||
|
// c1 Khmer consonant of type 1 or an independent vowel
|
||
|
// that is, a letter in which the subscript for is only under the
|
||
|
// base, not taking any space to the right or to the left
|
||
|
//
|
||
|
// c2 Khmer consonant of type 2, the coeng form takes space under
|
||
|
// and to the left of the base (only RO is of this type)
|
||
|
//
|
||
|
// c3 Khmer consonant of type 3. Its subscript form takes space under
|
||
|
// and to the right of the base.
|
||
|
//
|
||
|
// cs Khmer consonant shifter
|
||
|
//
|
||
|
// rb Khmer robat
|
||
|
//
|
||
|
// co coeng character (u17D2)
|
||
|
//
|
||
|
// dv dependent vowel (including split vowels, they are treated in the same way).
|
||
|
// even if dv is not defined above, the component that is really tested for is
|
||
|
// KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels
|
||
|
//
|
||
|
// zwj Zero Width joiner
|
||
|
//
|
||
|
// zwnj Zero width non joiner
|
||
|
//
|
||
|
// sa above sign
|
||
|
//
|
||
|
// sp post sign
|
||
|
//
|
||
|
// there are lines with equal content but for an easier understanding
|
||
|
// (and maybe change in the future) we did not join them
|
||
|
//
|
||
|
static const signed char khmerStateTable[][CC_COUNT] =
|
||
|
{
|
||
|
// xx c1 c2 c3 zwnj cs rb co dv sa sp zwj
|
||
|
{ 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, // 0 - ground state
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state (or sign to the right of the syllable)
|
||
|
{-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, // 2 - Base consonant
|
||
|
{-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, // 3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel
|
||
|
{-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, // 4 - First register shifter
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, // 5 - Robat
|
||
|
{-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - First Coeng
|
||
|
{-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, // 7 - First consonant of type 1 after coeng
|
||
|
{-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, // 8 - First consonant of type 2 after coeng
|
||
|
{-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, // 9 - First consonant or type 3 after ceong
|
||
|
{-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 10 - Second Coeng (no register shifter before)
|
||
|
{-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, // 11 - Second coeng consonant (or ind. vowel) no register shifter before
|
||
|
{-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, // 12 - Second ZWNJ before a register shifter
|
||
|
{-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, // 13 - Second register shifter
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, // 14 - ZWJ before vowel
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, // 15 - ZWNJ before vowel
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, // 16 - dependent vowel
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, // 17 - sign above
|
||
|
{-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, // 18 - ZWJ after vowel
|
||
|
{-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, // 19 - Third coeng
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, // 20 - dependent vowel after a Robat
|
||
|
};
|
||
|
|
||
|
|
||
|
// #define KHMER_DEBUG
|
||
|
#ifdef KHMER_DEBUG
|
||
|
#define KHDEBUG qDebug
|
||
|
#else
|
||
|
#define KHDEBUG if(0) qDebug
|
||
|
#endif
|
||
|
|
||
|
// Given an input string of characters and a location in which to start looking
|
||
|
// calculate, using the state table, which one is the last character of the syllable
|
||
|
// that starts in the starting position.
|
||
|
//
|
||
|
static inline int khmer_nextSyllableBoundary(const QString &s, int start, int end, bool *invalid)
|
||
|
{
|
||
|
*invalid = FALSE;
|
||
|
const QChar *uc = s.unicode() + start;
|
||
|
int state = 0;
|
||
|
int pos = start;
|
||
|
|
||
|
while (pos < end) {
|
||
|
KhmerCharClass charClass = getKhmerCharClass(*uc);
|
||
|
if (pos == start) {
|
||
|
*invalid = (charClass > 0) && ! (charClass & CF_CONSONANT);
|
||
|
}
|
||
|
state = khmerStateTable[state][charClass & CF_CLASS_MASK];
|
||
|
|
||
|
KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state,
|
||
|
charClass, uc->unicode() );
|
||
|
|
||
|
if (state < 0) {
|
||
|
break;
|
||
|
}
|
||
|
++uc;
|
||
|
++pos;
|
||
|
}
|
||
|
return pos;
|
||
|
}
|
||
|
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
static const QOpenType::Features khmer_features[] = {
|
||
|
{ FT_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty },
|
||
|
{ FT_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty },
|
||
|
{ FT_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty },
|
||
|
{ FT_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty },
|
||
|
{ FT_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty },
|
||
|
{ FT_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty },
|
||
|
{ FT_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty },
|
||
|
{ FT_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty },
|
||
|
{ FT_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty },
|
||
|
{ 0, 0 }
|
||
|
};
|
||
|
#endif
|
||
|
|
||
|
|
||
|
static bool khmer_shape_syllable(QOpenType *openType, QShaperItem *item)
|
||
|
{
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
if (openType)
|
||
|
openType->selectScript(QFont::Khmer, khmer_features);
|
||
|
#endif
|
||
|
// according to the specs this is the max length one can get
|
||
|
// ### the real value should be smaller
|
||
|
assert(item->length < 13);
|
||
|
|
||
|
KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length,
|
||
|
item->string->mid(item->from, item->length).utf8().data());
|
||
|
|
||
|
int len = 0;
|
||
|
int syllableEnd = item->from + item->length;
|
||
|
unsigned short reordered[16];
|
||
|
unsigned char properties[16];
|
||
|
enum {
|
||
|
AboveForm = 0x01,
|
||
|
PreForm = 0x02,
|
||
|
PostForm = 0x04,
|
||
|
BelowForm = 0x08
|
||
|
};
|
||
|
memset(properties, 0, 16*sizeof(unsigned char));
|
||
|
|
||
|
#ifdef KHMER_DEBUG
|
||
|
qDebug("original:");
|
||
|
for (int i = from; i < syllableEnd; i++) {
|
||
|
qDebug(" %d: %4x", i, string[i].unicode());
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
// write a pre vowel or the pre part of a split vowel first
|
||
|
// and look out for coeng + ro. RO is the only vowel of type 2, and
|
||
|
// therefore the only one that requires saving space before the base.
|
||
|
//
|
||
|
int coengRo = -1; // There is no Coeng Ro, if found this value will change
|
||
|
int i;
|
||
|
for (i = item->from; i < syllableEnd; i += 1) {
|
||
|
KhmerCharClass charClass = getKhmerCharClass(item->string->at(i));
|
||
|
|
||
|
// if a split vowel, write the pre part. In Khmer the pre part
|
||
|
// is the same for all split vowels, same glyph as pre vowel C_VOWEL_E
|
||
|
if (charClass & CF_SPLIT_VOWEL) {
|
||
|
reordered[len] = C_VOWEL_E;
|
||
|
properties[len] = PreForm;
|
||
|
++len;
|
||
|
break; // there can be only one vowel
|
||
|
}
|
||
|
// if a vowel with pos before write it out
|
||
|
if (charClass & CF_POS_BEFORE) {
|
||
|
reordered[len] = item->string->at(i).unicode();
|
||
|
properties[len] = PreForm;
|
||
|
++len;
|
||
|
break; // there can be only one vowel
|
||
|
}
|
||
|
// look for coeng + ro and remember position
|
||
|
// works because coeng + ro is always in front of a vowel (if there is a vowel)
|
||
|
// and because CC_CONSONANT2 is enough to identify it, as it is the only consonant
|
||
|
// with this flag
|
||
|
if ( (charClass & CF_COENG) && (i + 1 < syllableEnd) &&
|
||
|
( (getKhmerCharClass(item->string->at(i+1)) & CF_CLASS_MASK) == CC_CONSONANT2) ) {
|
||
|
coengRo = i;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// write coeng + ro if found
|
||
|
if (coengRo > -1) {
|
||
|
reordered[len] = C_COENG;
|
||
|
properties[len] = PreForm;
|
||
|
++len;
|
||
|
reordered[len] = C_RO;
|
||
|
properties[len] = PreForm;
|
||
|
++len;
|
||
|
}
|
||
|
|
||
|
// shall we add a dotted circle?
|
||
|
// If in the position in which the base should be (first char in the string) there is
|
||
|
// a character that has the Dotted circle flag (a character that cannot be a base)
|
||
|
// then write a dotted circle
|
||
|
if (getKhmerCharClass(item->string->at(item->from)) & CF_DOTTED_CIRCLE) {
|
||
|
reordered[len] = C_DOTTED_CIRCLE;
|
||
|
++len;
|
||
|
}
|
||
|
|
||
|
// copy what is left to the output, skipping before vowels and
|
||
|
// coeng Ro if they are present
|
||
|
for (i = item->from; i < syllableEnd; i += 1) {
|
||
|
QChar uc = item->string->at(i);
|
||
|
KhmerCharClass charClass = getKhmerCharClass(uc);
|
||
|
|
||
|
// skip a before vowel, it was already processed
|
||
|
if (charClass & CF_POS_BEFORE) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
// skip coeng + ro, it was already processed
|
||
|
if (i == coengRo) {
|
||
|
i += 1;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
switch (charClass & CF_POS_MASK)
|
||
|
{
|
||
|
case CF_POS_ABOVE :
|
||
|
reordered[len] = uc.unicode();
|
||
|
properties[len] = AboveForm;
|
||
|
++len;
|
||
|
break;
|
||
|
|
||
|
case CF_POS_AFTER :
|
||
|
reordered[len] = uc.unicode();
|
||
|
properties[len] = PostForm;
|
||
|
++len;
|
||
|
break;
|
||
|
|
||
|
case CF_POS_BELOW :
|
||
|
reordered[len] = uc.unicode();
|
||
|
properties[len] = BelowForm;
|
||
|
++len;
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
// assign the correct flags to a coeng consonant
|
||
|
// Consonants of type 3 are taged as Post forms and those type 1 as below forms
|
||
|
if ( (charClass & CF_COENG) && i + 1 < syllableEnd ) {
|
||
|
unsigned char property = (getKhmerCharClass(item->string->at(i+1)) & CF_CLASS_MASK) == CC_CONSONANT3 ?
|
||
|
PostForm : BelowForm;
|
||
|
reordered[len] = uc.unicode();
|
||
|
properties[len] = property;
|
||
|
++len;
|
||
|
i += 1;
|
||
|
reordered[len] = item->string->at(i).unicode();
|
||
|
properties[len] = property;
|
||
|
++len;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// if a shifter is followed by an above vowel change the shifter to below form,
|
||
|
// an above vowel can have two possible positions i + 1 or i + 3
|
||
|
// (position i+1 corresponds to unicode 3, position i+3 to Unicode 4)
|
||
|
// and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two
|
||
|
// different positions, right after the shifter or after a vowel (Unicode 4)
|
||
|
if ( (charClass & CF_SHIFTER) && (i + 1 < syllableEnd) ) {
|
||
|
if (getKhmerCharClass(item->string->at(i+1)) & CF_ABOVE_VOWEL ) {
|
||
|
reordered[len] = uc.unicode();
|
||
|
properties[len] = BelowForm;
|
||
|
++len;
|
||
|
break;
|
||
|
}
|
||
|
if (i + 2 < syllableEnd &&
|
||
|
(item->string->at(i+1).unicode() == C_VOWEL_AA) &&
|
||
|
(item->string->at(i+2).unicode() == C_SIGN_NIKAHIT) )
|
||
|
{
|
||
|
reordered[len] = uc.unicode();
|
||
|
properties[len] = BelowForm;
|
||
|
++len;
|
||
|
break;
|
||
|
}
|
||
|
if (i + 3 < syllableEnd && (getKhmerCharClass(item->string->at(i+3)) & CF_ABOVE_VOWEL) ) {
|
||
|
reordered[len] = uc.unicode();
|
||
|
properties[len] = BelowForm;
|
||
|
++len;
|
||
|
break;
|
||
|
}
|
||
|
if (i + 4 < syllableEnd &&
|
||
|
(item->string->at(i+3).unicode() == C_VOWEL_AA) &&
|
||
|
(item->string->at(i+4).unicode() == C_SIGN_NIKAHIT) )
|
||
|
{
|
||
|
reordered[len] = uc.unicode();
|
||
|
properties[len] = BelowForm;
|
||
|
++len;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// default - any other characters
|
||
|
reordered[len] = uc.unicode();
|
||
|
++len;
|
||
|
break;
|
||
|
} // switch
|
||
|
} // for
|
||
|
|
||
|
if (item->font->stringToCMap((const QChar *)reordered, len, item->glyphs, item->advances,
|
||
|
&item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError)
|
||
|
return FALSE;
|
||
|
|
||
|
KHDEBUG("after shaping: len=%d", len);
|
||
|
for (i = 0; i < len; i++) {
|
||
|
item->attributes[i].mark = FALSE;
|
||
|
item->attributes[i].clusterStart = FALSE;
|
||
|
item->attributes[i].justification = 0;
|
||
|
item->attributes[i].zeroWidth = FALSE;
|
||
|
KHDEBUG(" %d: %4x property=%x", i, reordered[i], properties[i]);
|
||
|
}
|
||
|
|
||
|
// now we have the syllable in the right order, and can start running it through open type.
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
if (openType) {
|
||
|
unsigned short logClusters[16];
|
||
|
for (int i = 0; i < len; ++i)
|
||
|
logClusters[i] = i;
|
||
|
|
||
|
uint where[16];
|
||
|
|
||
|
for (int i = 0; i < len; ++i) {
|
||
|
where[i] = ~(PreSubstProperty
|
||
|
| BelowSubstProperty
|
||
|
| AboveSubstProperty
|
||
|
| PostSubstProperty
|
||
|
| CligProperty
|
||
|
| PositioningProperties);
|
||
|
if (properties[i] == PreForm)
|
||
|
where[i] &= ~PreFormProperty;
|
||
|
else if (properties[i] == BelowForm)
|
||
|
where[i] &= ~BelowFormProperty;
|
||
|
else if (properties[i] == AboveForm)
|
||
|
where[i] &= ~AboveFormProperty;
|
||
|
else if (properties[i] == PostForm)
|
||
|
where[i] &= ~PostFormProperty;
|
||
|
}
|
||
|
|
||
|
openType->shape(item, where);
|
||
|
if (!openType->positionAndAdd(item, FALSE))
|
||
|
return FALSE;
|
||
|
} else
|
||
|
#endif
|
||
|
{
|
||
|
KHDEBUG("Not using openType");
|
||
|
Q_UNUSED(openType);
|
||
|
}
|
||
|
|
||
|
item->attributes[0].clusterStart = TRUE;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
static bool khmer_shape(QShaperItem *item)
|
||
|
{
|
||
|
assert(item->script == QFont::Khmer);
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
QOpenType *openType = item->font->openType();
|
||
|
if (openType && !openType->supportsScript(item->script))
|
||
|
openType = 0;
|
||
|
#else
|
||
|
QOpenType *openType = 0;
|
||
|
#endif
|
||
|
unsigned short *logClusters = item->log_clusters;
|
||
|
|
||
|
QShaperItem syllable = *item;
|
||
|
int first_glyph = 0;
|
||
|
|
||
|
int sstart = item->from;
|
||
|
int end = sstart + item->length;
|
||
|
KHDEBUG("khmer_shape: from %d length %d", item->from, item->length);
|
||
|
while (sstart < end) {
|
||
|
bool invalid;
|
||
|
int send = khmer_nextSyllableBoundary(*item->string, sstart, end, &invalid);
|
||
|
KHDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
|
||
|
invalid ? "TRUE" : "FALSE");
|
||
|
syllable.from = sstart;
|
||
|
syllable.length = send-sstart;
|
||
|
syllable.glyphs = item->glyphs + first_glyph;
|
||
|
syllable.offsets = item->offsets + first_glyph;
|
||
|
syllable.advances = item->advances + first_glyph;
|
||
|
syllable.attributes = item->attributes + first_glyph;
|
||
|
syllable.num_glyphs = item->num_glyphs - first_glyph;
|
||
|
if (!khmer_shape_syllable(openType, &syllable)) {
|
||
|
KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
|
||
|
item->num_glyphs += syllable.num_glyphs;
|
||
|
return FALSE;
|
||
|
}
|
||
|
item->has_positioning |= syllable.has_positioning;
|
||
|
|
||
|
// fix logcluster array
|
||
|
KHDEBUG("syllable:");
|
||
|
int i;
|
||
|
for (i = first_glyph; i < first_glyph + syllable.num_glyphs; ++i)
|
||
|
KHDEBUG(" %d -> glyph %x", i, item->glyphs[i]);
|
||
|
KHDEBUG(" logclusters:");
|
||
|
for (i = sstart; i < send; ++i) {
|
||
|
KHDEBUG(" %d -> glyph %d", i, first_glyph);
|
||
|
logClusters[i-item->from] = first_glyph;
|
||
|
}
|
||
|
sstart = send;
|
||
|
first_glyph += syllable.num_glyphs;
|
||
|
}
|
||
|
item->num_glyphs = first_glyph;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
static void khmer_attributes( int script, const QString &text, int from, int len, QCharAttributes *attributes )
|
||
|
{
|
||
|
Q_UNUSED(script);
|
||
|
|
||
|
int end = from + len;
|
||
|
const QChar *uc = text.unicode() + from;
|
||
|
attributes += from;
|
||
|
int i = 0;
|
||
|
while ( i < len ) {
|
||
|
bool invalid;
|
||
|
int boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
|
||
|
|
||
|
attributes[i].charStop = TRUE;
|
||
|
|
||
|
if ( boundary > len-1 ) boundary = len;
|
||
|
i++;
|
||
|
while ( i < boundary ) {
|
||
|
attributes[i].charStop = FALSE;
|
||
|
++uc;
|
||
|
++i;
|
||
|
}
|
||
|
assert( i == boundary );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// Myanmar
|
||
|
//
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
|
||
|
enum MymrCharClassValues
|
||
|
{
|
||
|
Mymr_CC_RESERVED = 0,
|
||
|
Mymr_CC_CONSONANT = 1, /* Consonant of type 1, that has subscript form */
|
||
|
Mymr_CC_CONSONANT2 = 2, /* Consonant of type 2, that has no subscript form */
|
||
|
Mymr_CC_NGA = 3, /* Consonant NGA */
|
||
|
Mymr_CC_YA = 4, /* Consonant YA */
|
||
|
Mymr_CC_RA = 5, /* Consonant RA */
|
||
|
Mymr_CC_WA = 6, /* Consonant WA */
|
||
|
Mymr_CC_HA = 7, /* Consonant HA */
|
||
|
Mymr_CC_IND_VOWEL = 8, /* Independent vowel */
|
||
|
Mymr_CC_ZERO_WIDTH_NJ_MARK = 9, /* Zero Width non joiner character (0x200C) */
|
||
|
Mymr_CC_VIRAMA = 10, /* Subscript consonant combining character */
|
||
|
Mymr_CC_PRE_VOWEL = 11, /* Dependent vowel, prebase (Vowel e) */
|
||
|
Mymr_CC_BELOW_VOWEL = 12, /* Dependent vowel, prebase (Vowel u, uu) */
|
||
|
Mymr_CC_ABOVE_VOWEL = 13, /* Dependent vowel, prebase (Vowel i, ii, ai) */
|
||
|
Mymr_CC_POST_VOWEL = 14, /* Dependent vowel, prebase (Vowel aa) */
|
||
|
Mymr_CC_SIGN_ABOVE = 15,
|
||
|
Mymr_CC_SIGN_BELOW = 16,
|
||
|
Mymr_CC_SIGN_AFTER = 17,
|
||
|
Mymr_CC_ZERO_WIDTH_J_MARK = 18, /* Zero width joiner character */
|
||
|
Mymr_CC_COUNT = 19 /* This is the number of character classes */
|
||
|
};
|
||
|
|
||
|
enum MymrCharClassFlags
|
||
|
{
|
||
|
Mymr_CF_CLASS_MASK = 0x0000FFFF,
|
||
|
|
||
|
Mymr_CF_CONSONANT = 0x01000000, /* flag to speed up comparing */
|
||
|
Mymr_CF_MEDIAL = 0x02000000, /* flag to speed up comparing */
|
||
|
Mymr_CF_IND_VOWEL = 0x04000000, /* flag to speed up comparing */
|
||
|
Mymr_CF_DEP_VOWEL = 0x08000000, /* flag to speed up comparing */
|
||
|
Mymr_CF_DOTTED_CIRCLE = 0x10000000, /* add a dotted circle if a character with this flag is the first in a syllable */
|
||
|
Mymr_CF_VIRAMA = 0x20000000, /* flag to speed up comparing */
|
||
|
|
||
|
/* position flags */
|
||
|
Mymr_CF_POS_BEFORE = 0x00080000,
|
||
|
Mymr_CF_POS_BELOW = 0x00040000,
|
||
|
Mymr_CF_POS_ABOVE = 0x00020000,
|
||
|
Mymr_CF_POS_AFTER = 0x00010000,
|
||
|
Mymr_CF_POS_MASK = 0x000f0000,
|
||
|
|
||
|
Mymr_CF_AFTER_KINZI = 0x00100000
|
||
|
};
|
||
|
|
||
|
/* Characters that get refrered to by name */
|
||
|
enum MymrChar
|
||
|
{
|
||
|
Mymr_C_SIGN_ZWNJ = 0x200C,
|
||
|
Mymr_C_SIGN_ZWJ = 0x200D,
|
||
|
Mymr_C_DOTTED_CIRCLE = 0x25CC,
|
||
|
Mymr_C_RA = 0x101B,
|
||
|
Mymr_C_YA = 0x101A,
|
||
|
Mymr_C_NGA = 0x1004,
|
||
|
Mymr_C_VOWEL_E = 0x1031,
|
||
|
Mymr_C_VIRAMA = 0x1039
|
||
|
};
|
||
|
|
||
|
enum
|
||
|
{
|
||
|
Mymr_xx = Mymr_CC_RESERVED,
|
||
|
Mymr_c1 = Mymr_CC_CONSONANT | Mymr_CF_CONSONANT | Mymr_CF_POS_BELOW,
|
||
|
Mymr_c2 = Mymr_CC_CONSONANT2 | Mymr_CF_CONSONANT,
|
||
|
Mymr_ng = Mymr_CC_NGA | Mymr_CF_CONSONANT | Mymr_CF_POS_ABOVE,
|
||
|
Mymr_ya = Mymr_CC_YA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_AFTER | Mymr_CF_AFTER_KINZI,
|
||
|
Mymr_ra = Mymr_CC_RA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BEFORE,
|
||
|
Mymr_wa = Mymr_CC_WA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BELOW,
|
||
|
Mymr_ha = Mymr_CC_HA | Mymr_CF_CONSONANT | Mymr_CF_MEDIAL | Mymr_CF_POS_BELOW,
|
||
|
Mymr_id = Mymr_CC_IND_VOWEL | Mymr_CF_IND_VOWEL,
|
||
|
Mymr_vi = Mymr_CC_VIRAMA | Mymr_CF_VIRAMA | Mymr_CF_POS_ABOVE | Mymr_CF_DOTTED_CIRCLE,
|
||
|
Mymr_dl = Mymr_CC_PRE_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_BEFORE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
|
||
|
Mymr_db = Mymr_CC_BELOW_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_BELOW | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
|
||
|
Mymr_da = Mymr_CC_ABOVE_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_ABOVE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
|
||
|
Mymr_dr = Mymr_CC_POST_VOWEL | Mymr_CF_DEP_VOWEL | Mymr_CF_POS_AFTER | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI,
|
||
|
Mymr_sa = Mymr_CC_SIGN_ABOVE | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_POS_ABOVE | Mymr_CF_AFTER_KINZI,
|
||
|
Mymr_sb = Mymr_CC_SIGN_BELOW | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_POS_BELOW | Mymr_CF_AFTER_KINZI,
|
||
|
Mymr_sp = Mymr_CC_SIGN_AFTER | Mymr_CF_DOTTED_CIRCLE | Mymr_CF_AFTER_KINZI
|
||
|
};
|
||
|
|
||
|
|
||
|
typedef int MymrCharClass;
|
||
|
|
||
|
|
||
|
static const MymrCharClass mymrCharClasses[] =
|
||
|
{
|
||
|
Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_ng, Mymr_c1, Mymr_c1, Mymr_c1,
|
||
|
Mymr_c1, Mymr_c1, Mymr_c2, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, /* 1000 - 100F */
|
||
|
Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1, Mymr_c1,
|
||
|
Mymr_c1, Mymr_c1, Mymr_ya, Mymr_ra, Mymr_c1, Mymr_wa, Mymr_c1, Mymr_ha, /* 1010 - 101F */
|
||
|
Mymr_c2, Mymr_c2, Mymr_xx, Mymr_id, Mymr_id, Mymr_id, Mymr_id, Mymr_id,
|
||
|
Mymr_xx, Mymr_id, Mymr_id, Mymr_xx, Mymr_dr, Mymr_da, Mymr_da, Mymr_db, /* 1020 - 102F */
|
||
|
Mymr_db, Mymr_dl, Mymr_da, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_sa, Mymr_sb,
|
||
|
Mymr_sp, Mymr_vi, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1030 - 103F */
|
||
|
Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx,
|
||
|
Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1040 - 104F */
|
||
|
Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx,
|
||
|
Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, Mymr_xx, /* 1050 - 105F */
|
||
|
};
|
||
|
|
||
|
static MymrCharClass
|
||
|
getMyanmarCharClass (const QChar &ch)
|
||
|
{
|
||
|
if (ch.unicode() == Mymr_C_SIGN_ZWJ)
|
||
|
return Mymr_CC_ZERO_WIDTH_J_MARK;
|
||
|
|
||
|
if (ch.unicode() == Mymr_C_SIGN_ZWNJ)
|
||
|
return Mymr_CC_ZERO_WIDTH_NJ_MARK;
|
||
|
|
||
|
if (ch.unicode() < 0x1000 || ch.unicode() > 0x105f)
|
||
|
return Mymr_CC_RESERVED;
|
||
|
|
||
|
return mymrCharClasses[ch.unicode() - 0x1000];
|
||
|
}
|
||
|
|
||
|
static const signed char mymrStateTable[][Mymr_CC_COUNT] =
|
||
|
{
|
||
|
// xx c1, c2 ng ya ra wa ha id zwnj vi dl db da dr sa sb sp zwj
|
||
|
{ 1, 4, 4, 2, 4, 4, 4, 4, 24, 1, 27, 17, 18, 19, 20, 21, 1, 1, 4}, // 0 - ground state
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state (or sp to the right of the syllable)
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 17, 18, 19, 20, 21, -1, -1, 4}, // 2 - NGA
|
||
|
{-1, 4, 4, 4, 4, 4, 4, 4, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 3 - Virama after NGA
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, 17, 18, 19, 20, 21, 1, 1, -1}, // 4 - Base consonant
|
||
|
{-2, 6, -2, -2, 7, 8, 9, 10, -2, 23, -2, -2, -2, -2, -2, -2, -2, -2, -2}, // 5 - First virama
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 25, 17, 18, 19, 20, 21, -1, -1, -1}, // 6 - c1 after virama
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, // 7 - ya after virama
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, // 8 - ra after virama
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, -1, -1}, // 9 - wa after virama
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, // 10 - ha after virama
|
||
|
{-1, -1, -1, -1, 7, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 11 - Virama after NGA+zwj
|
||
|
{-2, -2, -2, -2, -2, -2, 13, 14, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, // 12 - Second virama
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, 17, 18, 19, 20, 21, -1, -1, -1}, // 13 - wa after virama
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, // 14 - ha after virama
|
||
|
{-2, -2, -2, -2, -2, -2, -2, 16, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, // 15 - Third virama
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 18, 19, 20, 21, -1, -1, -1}, // 16 - ha after virama
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 20, 21, 1, 1, -1}, // 17 - dl, Dependent vowel e
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, -1, 21, 1, 1, -1}, // 18 - db, Dependent vowel u,uu
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, -1}, // 19 - da, Dependent vowel i,ii,ai
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, 1, 1, -1}, // 20 - dr, Dependent vowel aa
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1}, // 21 - sa, Sign anusvara
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 22 - atha
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1}, // 23 - zwnj for atha
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, // 24 - Independent vowel
|
||
|
{-2, -2, -2, -2, 26, 26, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2}, // 25 - Virama after subscript consonant
|
||
|
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 17, 18, 19, 20, 21, -1, 1, -1}, // 26 - ra/ya after subscript consonant + virama
|
||
|
{-1, 6, -1, -1, 7, 8, 9, 10, -1, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 27 - Virama after ground state
|
||
|
// exit state -2 is for invalid order of medials and combination of invalids
|
||
|
// with virama where virama should treat as start of next syllable
|
||
|
};
|
||
|
|
||
|
|
||
|
|
||
|
// #define MYANMAR_DEBUG
|
||
|
#ifdef MYANMAR_DEBUG
|
||
|
#define MMDEBUG qDebug
|
||
|
#else
|
||
|
#define MMDEBUG if(0) qDebug
|
||
|
#endif
|
||
|
|
||
|
// Given an input string of characters and a location in which to start looking
|
||
|
// calculate, using the state table, which one is the last character of the syllable
|
||
|
// that starts in the starting position.
|
||
|
//
|
||
|
static inline int myanmar_nextSyllableBoundary(const QString &s, int start, int end, bool *invalid)
|
||
|
{
|
||
|
*invalid = FALSE;
|
||
|
const QChar *uc = s.unicode() + start;
|
||
|
int state = 0;
|
||
|
int pos = start;
|
||
|
|
||
|
while (pos < end) {
|
||
|
MymrCharClass charClass = getMyanmarCharClass(*uc);
|
||
|
state = mymrStateTable[state][charClass & Mymr_CF_CLASS_MASK];
|
||
|
if (pos == start)
|
||
|
*invalid = charClass & Mymr_CF_DOTTED_CIRCLE;
|
||
|
|
||
|
MMDEBUG("state[%d]=%d class=%8x (uc=%4x)", pos - start, state, charClass, uc->unicode() );
|
||
|
|
||
|
if (state < 0) {
|
||
|
if (state < -1)
|
||
|
--pos;
|
||
|
break;
|
||
|
}
|
||
|
++uc;
|
||
|
++pos;
|
||
|
}
|
||
|
return pos;
|
||
|
}
|
||
|
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
// ###### might have to change order of above and below forms and substitutions,
|
||
|
// but according to Unicode below comes before above
|
||
|
static const QOpenType::Features myanmar_features[] = {
|
||
|
{ FT_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty },
|
||
|
{ FT_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty },
|
||
|
{ FT_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty },
|
||
|
{ FT_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty },
|
||
|
{ FT_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty },
|
||
|
{ FT_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty },
|
||
|
{ FT_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty },
|
||
|
{ FT_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty },
|
||
|
{ FT_MAKE_TAG( 'r', 'l', 'i', 'g' ), CligProperty }, // Myanmar1 uses this instead of the other features
|
||
|
{ 0, 0 }
|
||
|
};
|
||
|
#endif
|
||
|
|
||
|
|
||
|
// Visual order before shaping should be:
|
||
|
//
|
||
|
// [Vowel Mark E]
|
||
|
// [Virama + Medial Ra]
|
||
|
// [Base]
|
||
|
// [Virama + Consonant]
|
||
|
// [Nga + Virama] (Kinzi) ### should probably come before post forms (medial ya)
|
||
|
// [Vowels]
|
||
|
// [Marks]
|
||
|
//
|
||
|
// This means that we can keep the logical order apart from having to
|
||
|
// move the pre vowel, medial ra and kinzi
|
||
|
|
||
|
static bool myanmar_shape_syllable(QOpenType *openType, QShaperItem *item, bool invalid)
|
||
|
{
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
if (openType)
|
||
|
openType->selectScript(QFont::Myanmar, myanmar_features);
|
||
|
#endif
|
||
|
// according to the table the max length of a syllable should be around 14 chars
|
||
|
assert(item->length < 32);
|
||
|
|
||
|
MMDEBUG("\nsyllable from %d len %d, str='%s'", item->from, item->length,
|
||
|
item->string->mid(item->from, item->length).utf8().data());
|
||
|
|
||
|
const QChar *uc = item->string->unicode() + item->from;
|
||
|
#ifdef MYANMAR_DEBUG
|
||
|
qDebug("original:");
|
||
|
for (int i = 0; i < item->length; i++) {
|
||
|
qDebug(" %d: %4x", i, uc[i].unicode());
|
||
|
}
|
||
|
#endif
|
||
|
int vowel_e = -1;
|
||
|
int kinzi = -1;
|
||
|
int medial_ra = -1;
|
||
|
int base = -1;
|
||
|
int i;
|
||
|
for (i = 0; i < item->length; ++i) {
|
||
|
ushort chr = uc[i].unicode();
|
||
|
|
||
|
if (chr == Mymr_C_VOWEL_E) {
|
||
|
vowel_e = i;
|
||
|
continue;
|
||
|
}
|
||
|
if (i == 0
|
||
|
&& chr == Mymr_C_NGA
|
||
|
&& i + 2 < item->length
|
||
|
&& uc[i+1].unicode() == Mymr_C_VIRAMA) {
|
||
|
int mc = getMyanmarCharClass(uc[i+2]);
|
||
|
//MMDEBUG("maybe kinzi: mc=%x", mc);
|
||
|
if ((mc & Mymr_CF_CONSONANT) == Mymr_CF_CONSONANT) {
|
||
|
kinzi = i;
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
if (base >= 0
|
||
|
&& chr == Mymr_C_VIRAMA
|
||
|
&& i + 1 < item->length
|
||
|
&& uc[i+1].unicode() == Mymr_C_RA) {
|
||
|
medial_ra = i;
|
||
|
continue;
|
||
|
}
|
||
|
if (base < 0)
|
||
|
base = i;
|
||
|
}
|
||
|
|
||
|
MMDEBUG("\n base=%d, vowel_e=%d, kinzi=%d, medial_ra=%d", base, vowel_e, kinzi, medial_ra);
|
||
|
int len = 0;
|
||
|
unsigned short reordered[32];
|
||
|
unsigned char properties[32];
|
||
|
enum {
|
||
|
AboveForm = 0x01,
|
||
|
PreForm = 0x02,
|
||
|
PostForm = 0x04,
|
||
|
BelowForm = 0x08
|
||
|
};
|
||
|
memset(properties, 0, 32*sizeof(unsigned char));
|
||
|
|
||
|
// write vowel_e if found
|
||
|
if (vowel_e >= 0) {
|
||
|
reordered[0] = Mymr_C_VOWEL_E;
|
||
|
len = 1;
|
||
|
}
|
||
|
// write medial_ra
|
||
|
if (medial_ra >= 0) {
|
||
|
reordered[len] = Mymr_C_VIRAMA;
|
||
|
reordered[len+1] = Mymr_C_RA;
|
||
|
properties[len] = PreForm;
|
||
|
properties[len+1] = PreForm;
|
||
|
len += 2;
|
||
|
}
|
||
|
|
||
|
// shall we add a dotted circle?
|
||
|
// If in the position in which the base should be (first char in the string) there is
|
||
|
// a character that has the Dotted circle flag (a character that cannot be a base)
|
||
|
// then write a dotted circle
|
||
|
if (invalid) {
|
||
|
reordered[len] = C_DOTTED_CIRCLE;
|
||
|
++len;
|
||
|
}
|
||
|
|
||
|
bool lastWasVirama = FALSE;
|
||
|
int basePos = -1;
|
||
|
// copy the rest of the syllable to the output, inserting the kinzi
|
||
|
// at the correct place
|
||
|
for (i = 0; i < item->length; ++i) {
|
||
|
if (i == vowel_e)
|
||
|
continue;
|
||
|
if (i == medial_ra || i == kinzi) {
|
||
|
++i;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
ushort chr = uc[i].unicode();
|
||
|
MymrCharClass cc = getMyanmarCharClass(uc[i]);
|
||
|
if (kinzi >= 0 && i > base && (cc & Mymr_CF_AFTER_KINZI)) {
|
||
|
reordered[len] = Mymr_C_NGA;
|
||
|
reordered[len+1] = Mymr_C_VIRAMA;
|
||
|
properties[len-1] = AboveForm;
|
||
|
properties[len] = AboveForm;
|
||
|
len += 2;
|
||
|
kinzi = -1;
|
||
|
}
|
||
|
|
||
|
if (lastWasVirama) {
|
||
|
int prop = 0;
|
||
|
switch(cc & Mymr_CF_POS_MASK) {
|
||
|
case Mymr_CF_POS_BEFORE:
|
||
|
prop = PreForm;
|
||
|
break;
|
||
|
case Mymr_CF_POS_BELOW:
|
||
|
prop = BelowForm;
|
||
|
break;
|
||
|
case Mymr_CF_POS_ABOVE:
|
||
|
prop = AboveForm;
|
||
|
break;
|
||
|
case Mymr_CF_POS_AFTER:
|
||
|
prop = PostForm;
|
||
|
break;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
properties[len-1] = prop;
|
||
|
properties[len] = prop;
|
||
|
if(basePos >= 0 && basePos == len-2)
|
||
|
properties[len-2] = prop;
|
||
|
}
|
||
|
lastWasVirama = (chr == Mymr_C_VIRAMA);
|
||
|
if(i == base)
|
||
|
basePos = len;
|
||
|
|
||
|
if ((chr != Mymr_C_SIGN_ZWNJ && chr != Mymr_C_SIGN_ZWJ) || !len) {
|
||
|
reordered[len] = chr;
|
||
|
++len;
|
||
|
}
|
||
|
}
|
||
|
if (kinzi >= 0) {
|
||
|
reordered[len] = Mymr_C_NGA;
|
||
|
reordered[len+1] = Mymr_C_VIRAMA;
|
||
|
properties[len] = AboveForm;
|
||
|
properties[len+1] = AboveForm;
|
||
|
len += 2;
|
||
|
}
|
||
|
|
||
|
if (item->font->stringToCMap((const QChar *)reordered, len, item->glyphs, item->advances,
|
||
|
&item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError)
|
||
|
return FALSE;
|
||
|
|
||
|
MMDEBUG("after shaping: len=%d", len);
|
||
|
for (i = 0; i < len; i++) {
|
||
|
item->attributes[i].mark = FALSE;
|
||
|
item->attributes[i].clusterStart = FALSE;
|
||
|
item->attributes[i].justification = 0;
|
||
|
item->attributes[i].zeroWidth = FALSE;
|
||
|
MMDEBUG(" %d: %4x property=%x", i, reordered[i], properties[i]);
|
||
|
}
|
||
|
|
||
|
// now we have the syllable in the right order, and can start running it through open type.
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
if (openType) {
|
||
|
unsigned short logClusters[32];
|
||
|
for (int i = 0; i < len; ++i)
|
||
|
logClusters[i] = i;
|
||
|
|
||
|
uint where[32];
|
||
|
|
||
|
for (int i = 0; i < len; ++i) {
|
||
|
where[i] = ~(PreSubstProperty
|
||
|
| BelowSubstProperty
|
||
|
| AboveSubstProperty
|
||
|
| PostSubstProperty
|
||
|
| CligProperty
|
||
|
| PositioningProperties);
|
||
|
if (properties[i] == PreForm)
|
||
|
where[i] &= ~PreFormProperty;
|
||
|
else if (properties[i] == BelowForm)
|
||
|
where[i] &= ~BelowFormProperty;
|
||
|
else if (properties[i] == AboveForm)
|
||
|
where[i] &= ~AboveFormProperty;
|
||
|
else if (properties[i] == PostForm)
|
||
|
where[i] &= ~PostFormProperty;
|
||
|
}
|
||
|
|
||
|
openType->shape(item, where);
|
||
|
if (!openType->positionAndAdd(item, FALSE))
|
||
|
return FALSE;
|
||
|
} else
|
||
|
#endif
|
||
|
{
|
||
|
MMDEBUG("Not using openType");
|
||
|
Q_UNUSED(openType);
|
||
|
}
|
||
|
|
||
|
item->attributes[0].clusterStart = TRUE;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
static bool myanmar_shape(QShaperItem *item)
|
||
|
{
|
||
|
assert(item->script == QFont::Myanmar);
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
QOpenType *openType = item->font->openType();
|
||
|
if (openType && !openType->supportsScript(item->script))
|
||
|
openType = 0;
|
||
|
#else
|
||
|
QOpenType *openType = 0;
|
||
|
#endif
|
||
|
unsigned short *logClusters = item->log_clusters;
|
||
|
|
||
|
QShaperItem syllable = *item;
|
||
|
int first_glyph = 0;
|
||
|
|
||
|
int sstart = item->from;
|
||
|
int end = sstart + item->length;
|
||
|
MMDEBUG("myanmar_shape: from %d length %d", item->from, item->length);
|
||
|
while (sstart < end) {
|
||
|
bool invalid;
|
||
|
int send = myanmar_nextSyllableBoundary(*item->string, sstart, end, &invalid);
|
||
|
MMDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
|
||
|
invalid ? "TRUE" : "FALSE");
|
||
|
syllable.from = sstart;
|
||
|
syllable.length = send-sstart;
|
||
|
syllable.glyphs = item->glyphs + first_glyph;
|
||
|
syllable.offsets = item->offsets + first_glyph;
|
||
|
syllable.advances = item->advances + first_glyph;
|
||
|
syllable.attributes = item->attributes + first_glyph;
|
||
|
syllable.num_glyphs = item->num_glyphs - first_glyph;
|
||
|
if (!myanmar_shape_syllable(openType, &syllable, invalid)) {
|
||
|
MMDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
|
||
|
item->num_glyphs += syllable.num_glyphs;
|
||
|
return FALSE;
|
||
|
}
|
||
|
item->has_positioning |= syllable.has_positioning;
|
||
|
|
||
|
// fix logcluster array
|
||
|
MMDEBUG("syllable:");
|
||
|
int i;
|
||
|
for (i = first_glyph; i < first_glyph + syllable.num_glyphs; ++i)
|
||
|
MMDEBUG(" %d -> glyph %x", i, item->glyphs[i]);
|
||
|
MMDEBUG(" logclusters:");
|
||
|
for (i = sstart; i < send; ++i) {
|
||
|
MMDEBUG(" %d -> glyph %d", i, first_glyph);
|
||
|
logClusters[i-item->from] = first_glyph;
|
||
|
}
|
||
|
sstart = send;
|
||
|
first_glyph += syllable.num_glyphs;
|
||
|
}
|
||
|
item->num_glyphs = first_glyph;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
static void myanmar_attributes( int script, const QString &text, int from, int len, QCharAttributes *attributes )
|
||
|
{
|
||
|
Q_UNUSED(script);
|
||
|
|
||
|
int end = from + len;
|
||
|
const QChar *uc = text.unicode() + from;
|
||
|
attributes += from;
|
||
|
int i = 0;
|
||
|
while ( i < len ) {
|
||
|
bool invalid;
|
||
|
int boundary = myanmar_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
|
||
|
|
||
|
attributes[i].charStop = TRUE;
|
||
|
attributes[i].softBreak = TRUE;
|
||
|
|
||
|
if ( boundary > len-1 ) boundary = len;
|
||
|
i++;
|
||
|
while ( i < boundary ) {
|
||
|
attributes[i].charStop = FALSE;
|
||
|
attributes[i].softBreak = FALSE;
|
||
|
++uc;
|
||
|
++i;
|
||
|
}
|
||
|
assert( i == boundary );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// Hangul
|
||
|
//
|
||
|
// --------------------------------------------------------------------------------------------------------------------------------------------
|
||
|
|
||
|
// Hangul is a syllable based script. Unicode reserves a large range
|
||
|
// for precomposed hangul, where syllables are already precomposed to
|
||
|
// their final glyph shape. In addition, a so called jamo range is
|
||
|
// defined, that can be used to express old Hangul. Modern hangul
|
||
|
// syllables can also be expressed as jamo, and should be composed
|
||
|
// into syllables. The operation is rather simple and mathematical.
|
||
|
|
||
|
// Every hangul jamo is classified as being either a Leading consonant
|
||
|
// (L), and intermediat Vowel (V) or a trailing consonant (T). Modern
|
||
|
// hangul syllables (the ones in the precomposed area can be of type
|
||
|
// LV or LVT.
|
||
|
//
|
||
|
// Syllable breaks do _not_ occur between:
|
||
|
//
|
||
|
// L L, V or precomposed
|
||
|
// V, LV V, T
|
||
|
// LVT, T T
|
||
|
//
|
||
|
// A standard syllable is of the form L+V+T*. The above rules allow
|
||
|
// nonstandard syllables L*V*T*. To transform them into standard
|
||
|
// syllables fill characers L_f and V_f can be inserted.
|
||
|
|
||
|
enum {
|
||
|
Hangul_SBase = 0xac00,
|
||
|
Hangul_LBase = 0x1100,
|
||
|
Hangul_VBase = 0x1161,
|
||
|
Hangul_TBase = 0x11a7,
|
||
|
Hangul_SCount = 11172,
|
||
|
Hangul_LCount = 19,
|
||
|
Hangul_VCount = 21,
|
||
|
Hangul_TCount = 28,
|
||
|
Hangul_NCount = 21*28
|
||
|
};
|
||
|
|
||
|
static inline bool hangul_isPrecomposed(unsigned short uc) {
|
||
|
return (uc >= Hangul_SBase && uc < Hangul_SBase + Hangul_SCount);
|
||
|
}
|
||
|
|
||
|
static inline bool hangul_isLV(unsigned short uc) {
|
||
|
return ((uc - Hangul_SBase) % Hangul_TCount == 0);
|
||
|
}
|
||
|
|
||
|
enum HangulType {
|
||
|
L,
|
||
|
V,
|
||
|
T,
|
||
|
LV,
|
||
|
LVT,
|
||
|
X
|
||
|
};
|
||
|
|
||
|
static inline HangulType hangul_type(unsigned short uc) {
|
||
|
if (uc > Hangul_SBase && uc < Hangul_SBase + Hangul_SCount)
|
||
|
return hangul_isLV(uc) ? LV : LVT;
|
||
|
if (uc < Hangul_LBase || uc > 0x11ff)
|
||
|
return X;
|
||
|
if (uc < Hangul_VBase)
|
||
|
return L;
|
||
|
if (uc < Hangul_TBase)
|
||
|
return V;
|
||
|
return T;
|
||
|
}
|
||
|
|
||
|
static int hangul_nextSyllableBoundary(const QString &s, int start, int end)
|
||
|
{
|
||
|
const QChar *uc = s.unicode() + start;
|
||
|
|
||
|
HangulType state = hangul_type(uc->unicode());
|
||
|
int pos = 1;
|
||
|
|
||
|
while (pos < end - start) {
|
||
|
HangulType newState = hangul_type(uc[pos].unicode());
|
||
|
switch(newState) {
|
||
|
case X:
|
||
|
goto finish;
|
||
|
case L:
|
||
|
case V:
|
||
|
case T:
|
||
|
if (state > newState)
|
||
|
goto finish;
|
||
|
state = newState;
|
||
|
break;
|
||
|
case LV:
|
||
|
if (state > L)
|
||
|
goto finish;
|
||
|
state = V;
|
||
|
break;
|
||
|
case LVT:
|
||
|
if (state > L)
|
||
|
goto finish;
|
||
|
state = T;
|
||
|
}
|
||
|
++pos;
|
||
|
}
|
||
|
|
||
|
finish:
|
||
|
return start+pos;
|
||
|
}
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
static const QOpenType::Features hangul_features [] = {
|
||
|
{ FT_MAKE_TAG('c', 'c', 'm', 'p'), CcmpProperty },
|
||
|
{ FT_MAKE_TAG('l', 'j', 'm', 'o'), CcmpProperty },
|
||
|
{ FT_MAKE_TAG('j', 'j', 'm', 'o'), CcmpProperty },
|
||
|
{ FT_MAKE_TAG('t', 'j', 'm', 'o'), CcmpProperty },
|
||
|
{ 0, 0 }
|
||
|
};
|
||
|
#endif
|
||
|
|
||
|
static bool hangul_shape_syllable(QOpenType *openType, QShaperItem *item)
|
||
|
{
|
||
|
Q_UNUSED(openType)
|
||
|
const QChar *ch = item->string->unicode() + item->from;
|
||
|
|
||
|
int i;
|
||
|
unsigned short composed = 0;
|
||
|
// see if we can compose the syllable into a modern hangul
|
||
|
if (item->length == 2) {
|
||
|
int LIndex = ch[0].unicode() - Hangul_LBase;
|
||
|
int VIndex = ch[1].unicode() - Hangul_VBase;
|
||
|
if (LIndex >= 0 && LIndex < Hangul_LCount &&
|
||
|
VIndex >= 0 && VIndex < Hangul_VCount)
|
||
|
composed = (LIndex * Hangul_VCount + VIndex) * Hangul_TCount + Hangul_SBase;
|
||
|
} else if (item->length == 3) {
|
||
|
int LIndex = ch[0].unicode() - Hangul_LBase;
|
||
|
int VIndex = ch[1].unicode() - Hangul_VBase;
|
||
|
int TIndex = ch[2].unicode() - Hangul_TBase;
|
||
|
if (LIndex >= 0 && LIndex < Hangul_LCount &&
|
||
|
VIndex >= 0 && VIndex < Hangul_VCount &&
|
||
|
TIndex >= 0 && TIndex < Hangul_TCount)
|
||
|
composed = (LIndex * Hangul_VCount + VIndex) * Hangul_TCount + TIndex + Hangul_SBase;
|
||
|
}
|
||
|
|
||
|
|
||
|
int len = item->length;
|
||
|
QChar c(composed);
|
||
|
|
||
|
// ### icc says 'chars' is unused
|
||
|
// const QChar *chars = ch;
|
||
|
|
||
|
// if we have a modern hangul use the composed form
|
||
|
if (composed) {
|
||
|
// chars = &c;
|
||
|
len = 1;
|
||
|
}
|
||
|
|
||
|
if (item->font->stringToCMap(ch, len, item->glyphs, item->advances,
|
||
|
&item->num_glyphs, item->flags & QTextEngine::RightToLeft) != QFontEngine::NoError)
|
||
|
return FALSE;
|
||
|
for (i = 0; i < len; i++) {
|
||
|
item->attributes[i].mark = FALSE;
|
||
|
item->attributes[i].clusterStart = FALSE;
|
||
|
item->attributes[i].justification = 0;
|
||
|
item->attributes[i].zeroWidth = FALSE;
|
||
|
IDEBUG(" %d: %4x", i, ch[i].unicode());
|
||
|
}
|
||
|
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
if (openType && !composed) {
|
||
|
|
||
|
QVarLengthArray<unsigned short> logClusters(len);
|
||
|
for (i = 0; i < len; ++i)
|
||
|
logClusters[i] = i;
|
||
|
item->log_clusters = logClusters.data();
|
||
|
|
||
|
openType->shape(item);
|
||
|
if (!openType->positionAndAdd(item, FALSE))
|
||
|
return FALSE;
|
||
|
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
item->attributes[0].clusterStart = TRUE;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
static bool hangul_shape(QShaperItem *item)
|
||
|
{
|
||
|
Q_ASSERT(item->script == QFont::Hangul);
|
||
|
|
||
|
const QChar *uc = item->string->unicode() + item->from;
|
||
|
|
||
|
bool allPrecomposed = TRUE;
|
||
|
for (int i = 0; i < item->length; ++i) {
|
||
|
if (!hangul_isPrecomposed(uc[i].unicode())) {
|
||
|
allPrecomposed = FALSE;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!allPrecomposed) {
|
||
|
#ifndef QT_NO_XFTFREETYPE
|
||
|
QOpenType *openType = item->font->openType();
|
||
|
if (openType && !openType->supportsScript(item->script))
|
||
|
openType = 0;
|
||
|
if (openType)
|
||
|
openType->selectScript(QFont::Hangul, hangul_features);
|
||
|
#else
|
||
|
QOpenType *openType = 0;
|
||
|
#endif
|
||
|
|
||
|
unsigned short *logClusters = item->log_clusters;
|
||
|
|
||
|
QShaperItem syllable = *item;
|
||
|
int first_glyph = 0;
|
||
|
|
||
|
int sstart = item->from;
|
||
|
int end = sstart + item->length;
|
||
|
while (sstart < end) {
|
||
|
int send = hangul_nextSyllableBoundary(*(item->string), sstart, end);
|
||
|
|
||
|
syllable.from = sstart;
|
||
|
syllable.length = send-sstart;
|
||
|
syllable.glyphs = item->glyphs + first_glyph;
|
||
|
syllable.offsets = item->offsets + first_glyph;
|
||
|
syllable.advances = item->advances + first_glyph;
|
||
|
syllable.attributes = item->attributes + first_glyph;
|
||
|
syllable.num_glyphs = item->num_glyphs - first_glyph;
|
||
|
if (!hangul_shape_syllable(openType, &syllable)) {
|
||
|
item->num_glyphs += syllable.num_glyphs;
|
||
|
return FALSE;
|
||
|
}
|
||
|
item->has_positioning |= syllable.has_positioning;
|
||
|
// fix logcluster array
|
||
|
for (int i = sstart; i < send; ++i)
|
||
|
logClusters[i-item->from] = first_glyph;
|
||
|
sstart = send;
|
||
|
first_glyph += syllable.num_glyphs;
|
||
|
}
|
||
|
item->num_glyphs = first_glyph;
|
||
|
return TRUE;
|
||
|
}
|
||
|
|
||
|
return basic_shape(item);
|
||
|
}
|
||
|
|
||
|
static void hangul_attributes(int script, const QString &text, int from, int len, QCharAttributes *attributes)
|
||
|
{
|
||
|
Q_UNUSED(script);
|
||
|
|
||
|
int end = from + len;
|
||
|
const QChar *uc = text.unicode() + from;
|
||
|
attributes += from;
|
||
|
int i = 0;
|
||
|
while (i < len) {
|
||
|
int boundary = hangul_nextSyllableBoundary(text, from+i, end) - from;
|
||
|
|
||
|
attributes[i].charStop = TRUE;
|
||
|
|
||
|
if (boundary > len-1) boundary = len;
|
||
|
i++;
|
||
|
while (i < boundary) {
|
||
|
attributes[i].charStop = FALSE;
|
||
|
++uc;
|
||
|
++i;
|
||
|
}
|
||
|
assert(i == boundary);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// -----------------------------------------------------------------------------------------------
|
||
|
//
|
||
|
// The script engine jump table
|
||
|
//
|
||
|
// -----------------------------------------------------------------------------------------------
|
||
|
|
||
|
const q_scriptEngine scriptEngines[] = {
|
||
|
// Latin,
|
||
|
{ basic_shape, 0 },
|
||
|
// Greek,
|
||
|
{ basic_shape, 0 },
|
||
|
// Cyrillic,
|
||
|
{ basic_shape, 0 },
|
||
|
// Armenian,
|
||
|
{ basic_shape, 0 },
|
||
|
// Georgian,
|
||
|
{ basic_shape, 0 },
|
||
|
// Runic,
|
||
|
{ basic_shape, 0 },
|
||
|
// Ogham,
|
||
|
{ basic_shape, 0 },
|
||
|
// SpacingModifiers,
|
||
|
{ basic_shape, 0 },
|
||
|
// CombiningMarks,
|
||
|
{ basic_shape, 0 },
|
||
|
|
||
|
// // Middle Eastern Scripts
|
||
|
// Hebrew,
|
||
|
{ hebrew_shape, 0 },
|
||
|
// Arabic,
|
||
|
{ arabic_shape, 0 },
|
||
|
// Syriac,
|
||
|
{ syriac_shape, 0 },
|
||
|
// Thaana,
|
||
|
{ thaana_shape, 0 },
|
||
|
|
||
|
// // South and Southeast Asian Scripts
|
||
|
// Devanagari,
|
||
|
{ indic_shape, indic_attributes },
|
||
|
// Bengali,
|
||
|
{ indic_shape, indic_attributes },
|
||
|
// Gurmukhi,
|
||
|
{ indic_shape, indic_attributes },
|
||
|
// Gujarati,
|
||
|
{ indic_shape, indic_attributes },
|
||
|
// Oriya,
|
||
|
{ indic_shape, indic_attributes },
|
||
|
// Tamil,
|
||
|
{ indic_shape, indic_attributes },
|
||
|
// Telugu,
|
||
|
{ indic_shape, indic_attributes },
|
||
|
// Kannada,
|
||
|
{ indic_shape, indic_attributes },
|
||
|
// Malayalam,
|
||
|
{ indic_shape, indic_attributes },
|
||
|
// Sinhala,
|
||
|
{ indic_shape, indic_attributes },
|
||
|
// Thai,
|
||
|
{ basic_shape, thai_attributes },
|
||
|
// Lao,
|
||
|
{ basic_shape, thai_attributes },
|
||
|
// Tibetan,
|
||
|
{ tibetan_shape, tibetan_attributes },
|
||
|
// Myanmar,
|
||
|
{ myanmar_shape, myanmar_attributes },
|
||
|
// Khmer,
|
||
|
{ khmer_shape, khmer_attributes },
|
||
|
|
||
|
// // East Asian Scripts
|
||
|
// Han,
|
||
|
{ basic_shape, 0 },
|
||
|
// Hiragana,
|
||
|
{ basic_shape, 0 },
|
||
|
// Katakana,
|
||
|
{ basic_shape, 0 },
|
||
|
// Hangul,
|
||
|
{ hangul_shape, hangul_attributes },
|
||
|
// Bopomofo,
|
||
|
{ basic_shape, 0 },
|
||
|
// Yi,
|
||
|
{ basic_shape, 0 },
|
||
|
|
||
|
// // Additional Scripts
|
||
|
// Ethiopic,
|
||
|
{ basic_shape, 0 },
|
||
|
// Cherokee,
|
||
|
{ basic_shape, 0 },
|
||
|
// CanadianAboriginal,
|
||
|
{ basic_shape, 0 },
|
||
|
// Mongolian,
|
||
|
{ basic_shape, 0 },
|
||
|
|
||
|
// // Symbols
|
||
|
// CurrencySymbols,
|
||
|
{ basic_shape, 0 },
|
||
|
// LetterlikeSymbols,
|
||
|
{ basic_shape, 0 },
|
||
|
// NumberForms,
|
||
|
{ basic_shape, 0 },
|
||
|
// MathematicalOperators,
|
||
|
{ basic_shape, 0 },
|
||
|
// TechnicalSymbols,
|
||
|
{ basic_shape, 0 },
|
||
|
// GeometricSymbols,
|
||
|
{ basic_shape, 0 },
|
||
|
// MiscellaneousSymbols,
|
||
|
{ basic_shape, 0 },
|
||
|
// EnclosedAndSquare,
|
||
|
{ basic_shape, 0 },
|
||
|
// Braille,
|
||
|
{ basic_shape, 0 },
|
||
|
|
||
|
// Unicode,
|
||
|
{ basic_shape, 0 },
|
||
|
//Tagalog,
|
||
|
{ basic_shape, 0 },
|
||
|
//Hanunoo,
|
||
|
{ basic_shape, 0 },
|
||
|
//Buhid,
|
||
|
{ basic_shape, 0 },
|
||
|
//Tagbanwa,
|
||
|
{ basic_shape, 0 },
|
||
|
// KatakanaHalfWidth
|
||
|
{ basic_shape, 0 },
|
||
|
// Limbu
|
||
|
{ basic_shape, 0 },
|
||
|
// TaiLe
|
||
|
{ basic_shape, 0 }
|
||
|
};
|