You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
164 lines
5.7 KiB
164 lines
5.7 KiB
#ifndef BREAK_LINES_H
|
|
#define BREAK_LINES_H
|
|
|
|
#include <tqstring.h>
|
|
|
|
namespace khtml {
|
|
|
|
/*
|
|
array of unicode codes where breaking shouldn't occur.
|
|
(in sorted order because of using with binary search)
|
|
these are currently for Japanese, though simply adding
|
|
Korean, Chinese ones should work as well
|
|
*/
|
|
/*
|
|
dontbreakbefore[] contains characters not covered by TQChar::Punctuation_Close that shouldn't be broken before.
|
|
chars included in TQChar::Punctuation_Close are listed below.(look at UAX #14)
|
|
- 3001 ideographic comma
|
|
- 3002 ideographic full stop
|
|
- FE50 small comma
|
|
- FF52 small full stop
|
|
- FF0C fullwidth comma
|
|
- FF0E fullwidth full stop
|
|
- FF61 halfwidth ideographic full stop
|
|
- FF64 halfwidth ideographic comma
|
|
these character is commented out.
|
|
*/
|
|
const ushort dontbreakbefore[] = {
|
|
//0x3001, //ideographic comma
|
|
//0x3002, //ideographic full stop
|
|
0x3005, //ideographic iteration mark
|
|
0x3009, //right angle bracket
|
|
0x300b, //right double angle bracket
|
|
0x300d, //right corner bracket
|
|
0x300f, //right white corner bracket
|
|
0x3011, //right black lenticular bracket
|
|
0x3015, //right tortoise shell bracket
|
|
0x3041, //small a hiragana
|
|
0x3043, //small i hiragana
|
|
0x3045, //small u hiragana
|
|
0x3047, //small e hiragana
|
|
0x3049, //small o hiragana
|
|
0x3063, //small tsu hiragana
|
|
0x3083, //small ya hiragana
|
|
0x3085, //small yu hiragana
|
|
0x3087, //small yo hiragana
|
|
0x308E, //small wa hiragana
|
|
0x309B, //jap voiced sound mark
|
|
0x309C, //jap semi-voiced sound mark
|
|
0x309D, //jap iteration mark hiragana
|
|
0x309E, //jap voiced iteration mark hiragana
|
|
0x30A1, //small a katakana
|
|
0x30A3, //small i katakana
|
|
0x30A5, //small u katakana
|
|
0x30A7, //small e katakana
|
|
0x30A9, //small o katakana
|
|
0x30C3, //small tsu katakana
|
|
0x30E3, //small ya katakana
|
|
0x30E5, //small yu katakana
|
|
0x30E7, //small yo katakana
|
|
0x30EE, //small wa katakana
|
|
0x30F5, //small ka katakana
|
|
0x30F6, //small ke katakana
|
|
0x30FC, //jap prolonged sound mark
|
|
0x30FD, //jap iteration mark katakana
|
|
0x30FE, //jap voiced iteration mark katakana
|
|
//0xFE50, //small comma
|
|
//0xFF52, //small full stop
|
|
0xFF01, //fullwidth exclamation mark
|
|
0xFF09, //fullwidth right parenthesis
|
|
//0xFF0C, //fullwidth comma
|
|
0xFF0D, //fullwidth hypen-minus
|
|
//0xFF0E, //fullwidth full stop
|
|
0xFF1F, //fullwidth question mark
|
|
0xFF3D, //fullwidth right square bracket
|
|
0xFF5D, //fullwidth right curly bracket
|
|
//0xFF61, //halfwidth ideographic full stop
|
|
0xFF63, //halfwidth right corner bracket
|
|
//0xFF64, //halfwidth ideographic comma
|
|
0xFF67, //halfwidth katakana letter small a
|
|
0xFF68, //halfwidth katakana letter small i
|
|
0xFF69, //halfwidth katakana letter small u
|
|
0xFF6a, //halfwidth katakana letter small e
|
|
0xFF6b, //halfwidth katakana letter small o
|
|
0xFF6c, //halfwidth katakana letter small ya
|
|
0xFF6d, //halfwidth katakana letter small yu
|
|
0xFF6e, //halfwidth katakana letter small yo
|
|
0xFF6f, //halfwidth katakana letter small tu
|
|
0xFF70 //halfwidth katakana-hiragana prolonged sound mark
|
|
};
|
|
|
|
// characters that aren't covered by TQChar::Punctuation_Open
|
|
const ushort dontbreakafter[] = {
|
|
0x3012, //postal mark
|
|
0xFF03, //full width pound mark
|
|
0xFF04, //full width dollar sign
|
|
0xFF20, //full width @
|
|
0xFFE1, //full width british pound sign
|
|
0xFFE5 //full width yen sign
|
|
};
|
|
|
|
inline bool break_bsearch( const ushort* arr, const ushort val ) {
|
|
int left = 0;
|
|
int right = (sizeof(arr) / sizeof(ushort)) - 1;
|
|
|
|
while (1) {
|
|
if (left == right)
|
|
return val != arr[left];
|
|
|
|
int i = (left + right) >> 1;
|
|
if ( val == arr[i] )
|
|
return false;
|
|
if ( val < arr[i] )
|
|
right = i;
|
|
else
|
|
left = i + 1;
|
|
}
|
|
}
|
|
|
|
bool isBreakableThai( const TQChar *string, const int pos, const int len);
|
|
void cleanup_thaibreaks();
|
|
|
|
inline bool isBreakable( const TQChar *str, const int pos, int len )
|
|
{
|
|
const TQChar *c = str+pos;
|
|
unsigned short ch = c->unicode();
|
|
if ( ch > 0xff ) {
|
|
// not latin1, need to do more sophisticated checks for asian fonts
|
|
unsigned char row = c->row();
|
|
if ( row == 0x0e ) {
|
|
// 0e00 - 0e7f == Thai
|
|
if ( c->cell() < 0x80 ) {
|
|
// consult libthai
|
|
return isBreakableThai(str, pos, len);
|
|
} else
|
|
return false;
|
|
}
|
|
if ( row > 0x2d && row < 0xfb || row == 0x11 ) {
|
|
/* asian line breaking. */
|
|
if ( pos == 0 )
|
|
return false; // never break before first character
|
|
|
|
// check for simple punctuation cases
|
|
TQChar::Category cat = c->category();
|
|
if ( cat == TQChar::Punctuation_Close ||
|
|
cat == TQChar::Punctuation_Other ||
|
|
(str+(pos-1))->category() == TQChar::Punctuation_Open )
|
|
return false;
|
|
|
|
// do binary search in dontbreak[]
|
|
return break_bsearch(dontbreakbefore, c->unicode()) &&
|
|
break_bsearch(dontbreakafter, (str+(pos-1))->unicode());
|
|
} else // no asian font
|
|
return c->isSpace();
|
|
} else {
|
|
if ( ch == ' ' || ch == '\n' )
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|