tdelibs/khtml/rendering/break_lines.h

#ifndef BREAK_LINES_H
#define BREAK_LINES_H

#include <tqstring.h>

namespace khtml {

    /*
      array of tqunicode codes where breaking shouldn't occur.
      (in sorted order because of using with binary search)
      these are currently for Japanese, though simply adding
      Korean, Chinese ones should work as well
    */
    /*
      dontbreakbefore[] contains characters not covered by TQChar::Punctuation_Close that shouldn't be broken before.
      chars included in TQChar::Punctuation_Close are listed below.(look at UAX #14)
         - 3001 ideographic comma
         - 3002 ideographic full stop
         - FE50 small comma
         - FF52 small full stop
         - FF0C fullwidth comma
         - FF0E fullwidth full stop
         - FF61 halfwidth ideographic full stop
         - FF64 halfwidth ideographic comma
      these character is commented out.
    */
    const ushort dontbreakbefore[] = {
        //0x3001,   //ideographic comma
        //0x3002,   //ideographic full stop
        0x3005, //ideographic iteration mark
        0x3009, //right angle bracket
        0x300b, //right double angle bracket
        0x300d, //right corner bracket
        0x300f, //right white corner bracket
        0x3011, //right black lenticular bracket
        0x3015, //right tortoise shell bracket
        0x3041, //small a hiragana
        0x3043, //small i hiragana
        0x3045, //small u hiragana
        0x3047, //small e hiragana
        0x3049, //small o hiragana
        0x3063, //small tsu hiragana
        0x3083, //small ya hiragana
        0x3085, //small yu hiragana
        0x3087, //small yo hiragana
        0x308E, //small wa hiragana
        0x309B, //jap voiced sound mark
        0x309C, //jap semi-voiced sound mark
        0x309D, //jap iteration mark hiragana
        0x309E, //jap voiced iteration mark hiragana
        0x30A1, //small a katakana
        0x30A3, //small i katakana
        0x30A5, //small u katakana
        0x30A7, //small e katakana
        0x30A9, //small o katakana
        0x30C3, //small tsu katakana
        0x30E3, //small ya katakana
        0x30E5, //small yu katakana
        0x30E7, //small yo katakana
        0x30EE, //small wa katakana
        0x30F5, //small ka katakana
        0x30F6, //small ke katakana
        0x30FC, //jap prolonged sound mark
        0x30FD, //jap iteration mark katakana
        0x30FE, //jap voiced iteration mark katakana
        //0xFE50,   //small comma
        //0xFF52,   //small full stop
        0xFF01, //fullwidth exclamation mark
        0xFF09, //fullwidth right parenthesis
        //0xFF0C,   //fullwidth comma
        0xFF0D, //fullwidth hypen-minus
        //0xFF0E,   //fullwidth full stop
        0xFF1F, //fullwidth question mark
        0xFF3D, //fullwidth right square bracket
        0xFF5D, //fullwidth right curly bracket
        //0xFF61,   //halfwidth ideographic full stop
        0xFF63, //halfwidth right corner bracket
        //0xFF64,   //halfwidth ideographic comma
        0xFF67, //halfwidth katakana letter small a
        0xFF68, //halfwidth katakana letter small i
        0xFF69, //halfwidth katakana letter small u
        0xFF6a, //halfwidth katakana letter small e
        0xFF6b, //halfwidth katakana letter small o
        0xFF6c, //halfwidth katakana letter small ya
        0xFF6d, //halfwidth katakana letter small yu
        0xFF6e, //halfwidth katakana letter small yo
        0xFF6f, //halfwidth katakana letter small tu
        0xFF70  //halfwidth katakana-hiragana prolonged sound mark
    };

    // characters that aren't covered by TQChar::Punctuation_Open
    const ushort dontbreakafter[] = {
        0x3012, //postal mark
        0xFF03, //full width pound mark
        0xFF04, //full width dollar sign
        0xFF20, //full width @
        0xFFE1, //full width british pound sign
        0xFFE5  //full width yen sign
    };

    inline bool break_bsearch( const ushort* arr, const ushort val ) {
        int left = 0;
        int right = (sizeof(arr) / sizeof(ushort)) - 1;

        while (1) {
            if (left == right)
                return val != arr[left];

            int i = (left + right) >> 1;
            if ( val == arr[i] )
                return false;
            if ( val < arr[i] )
                right = i;
            else
                left = i + 1;
        }
    }
    
    bool isBreakableThai( const TQChar *string, const int pos, const int len);
    void cleanup_thaibreaks();

    inline bool isBreakable( const TQChar *str, const int pos, int len )
    {
	const TQChar *c = str+pos;
	unsigned short ch = c->tqunicode();
	if ( ch > 0xff ) {
	    // not latin1, need to do more sophisticated checks for asian fonts
	    unsigned char row = c->row();
	    if ( row == 0x0e ) {
		// 0e00 - 0e7f == Thai
		if ( c->cell() < 0x80 ) {
		    // consult libthai
		    return isBreakableThai(str, pos, len);
		} else
		    return false;
	    }
	    if ( row > 0x2d && row < 0xfb || row == 0x11 ) {
                /* asian line breaking. */
                if ( pos == 0 )
                    return false; // never break before first character

                // check for simple punctuation cases
                TQChar::Category cat = c->category();
                if ( cat == TQChar::Punctuation_Close ||
                     cat == TQChar::Punctuation_Other ||
                     (str+(pos-1))->category() == TQChar::Punctuation_Open )
                    return false;

                // do binary search in dontbreak[]
                return break_bsearch(dontbreakbefore, c->tqunicode()) &&
                       break_bsearch(dontbreakafter, (str+(pos-1))->tqunicode());
            } else // no asian font
		return c->isSpace();
	} else {
	    if ( ch == ' ' || ch == '\n' )
		return true;
	}
	return false;
    }

}

#endif
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`#ifndef BREAK_LINES_H`
			`#define BREAK_LINES_H`

Trinity Qt initial conversion git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1157647 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`#include <tqstring.h>`
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago
			`namespace khtml {`

			`/*`
Initial conversion for TQt for Qt4 3.4.0 TP2 This will also compile with TQt for Qt3, and should not cause any problems with dependent modules such as kdebase. If it does then it needs to be fixed! git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1214149 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`array of tqunicode codes where breaking shouldn't occur.`
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`(in sorted order because of using with binary search)`
			`these are currently for Japanese, though simply adding`
			`Korean, Chinese ones should work as well`
			`*/`
			`/*`
Revert automated changes Sorry guys, they are just not ready for prime time Work will continue as always git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1212479 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`dontbreakbefore[] contains characters not covered by TQChar::Punctuation_Close that shouldn't be broken before.`
Trinity Qt initial conversion git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1157647 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`chars included in TQChar::Punctuation_Close are listed below.(look at UAX #14)`
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`- 3001 ideographic comma`
			`- 3002 ideographic full stop`
			`- FE50 small comma`
			`- FF52 small full stop`
			`- FF0C fullwidth comma`
			`- FF0E fullwidth full stop`
			`- FF61 halfwidth ideographic full stop`
			`- FF64 halfwidth ideographic comma`
			`these character is commented out.`
			`*/`
			`const ushort dontbreakbefore[] = {`
			`//0x3001, //ideographic comma`
			`//0x3002, //ideographic full stop`
			`0x3005, //ideographic iteration mark`
			`0x3009, //right angle bracket`
			`0x300b, //right double angle bracket`
			`0x300d, //right corner bracket`
			`0x300f, //right white corner bracket`
			`0x3011, //right black lenticular bracket`
			`0x3015, //right tortoise shell bracket`
			`0x3041, //small a hiragana`
			`0x3043, //small i hiragana`
			`0x3045, //small u hiragana`
			`0x3047, //small e hiragana`
			`0x3049, //small o hiragana`
			`0x3063, //small tsu hiragana`
			`0x3083, //small ya hiragana`
			`0x3085, //small yu hiragana`
			`0x3087, //small yo hiragana`
			`0x308E, //small wa hiragana`
			`0x309B, //jap voiced sound mark`
			`0x309C, //jap semi-voiced sound mark`
			`0x309D, //jap iteration mark hiragana`
			`0x309E, //jap voiced iteration mark hiragana`
			`0x30A1, //small a katakana`
			`0x30A3, //small i katakana`
			`0x30A5, //small u katakana`
			`0x30A7, //small e katakana`
			`0x30A9, //small o katakana`
			`0x30C3, //small tsu katakana`
			`0x30E3, //small ya katakana`
			`0x30E5, //small yu katakana`
			`0x30E7, //small yo katakana`
			`0x30EE, //small wa katakana`
			`0x30F5, //small ka katakana`
			`0x30F6, //small ke katakana`
			`0x30FC, //jap prolonged sound mark`
			`0x30FD, //jap iteration mark katakana`
			`0x30FE, //jap voiced iteration mark katakana`
			`//0xFE50, //small comma`
			`//0xFF52, //small full stop`
			`0xFF01, //fullwidth exclamation mark`
			`0xFF09, //fullwidth right parenthesis`
			`//0xFF0C, //fullwidth comma`
			`0xFF0D, //fullwidth hypen-minus`
			`//0xFF0E, //fullwidth full stop`
			`0xFF1F, //fullwidth question mark`
			`0xFF3D, //fullwidth right square bracket`
			`0xFF5D, //fullwidth right curly bracket`
			`//0xFF61, //halfwidth ideographic full stop`
			`0xFF63, //halfwidth right corner bracket`
			`//0xFF64, //halfwidth ideographic comma`
			`0xFF67, //halfwidth katakana letter small a`
			`0xFF68, //halfwidth katakana letter small i`
			`0xFF69, //halfwidth katakana letter small u`
			`0xFF6a, //halfwidth katakana letter small e`
			`0xFF6b, //halfwidth katakana letter small o`
			`0xFF6c, //halfwidth katakana letter small ya`
			`0xFF6d, //halfwidth katakana letter small yu`
			`0xFF6e, //halfwidth katakana letter small yo`
			`0xFF6f, //halfwidth katakana letter small tu`
			`0xFF70 //halfwidth katakana-hiragana prolonged sound mark`
			`};`

Trinity Qt initial conversion git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1157647 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`// characters that aren't covered by TQChar::Punctuation_Open`
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`const ushort dontbreakafter[] = {`
			`0x3012, //postal mark`
			`0xFF03, //full width pound mark`
			`0xFF04, //full width dollar sign`
			`0xFF20, //full width @`
			`0xFFE1, //full width british pound sign`
			`0xFFE5 //full width yen sign`
			`};`

			`inline bool break_bsearch( const ushort* arr, const ushort val ) {`
			`int left = 0;`
			`int right = (sizeof(arr) / sizeof(ushort)) - 1;`

			`while (1) {`
			`if (left == right)`
			`return val != arr[left];`

			`int i = (left + right) >> 1;`
			`if ( val == arr[i] )`
			`return false;`
			`if ( val < arr[i] )`
			`right = i;`
			`else`
			`left = i + 1;`
			`}`
			`}`

Trinity Qt initial conversion git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1157647 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`bool isBreakableThai( const TQChar *string, const int pos, const int len);`
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`void cleanup_thaibreaks();`

Trinity Qt initial conversion git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1157647 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`inline bool isBreakable( const TQChar *str, const int pos, int len )`
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`{`
Trinity Qt initial conversion git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1157647 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`const TQChar *c = str+pos;`
Initial conversion for TQt for Qt4 3.4.0 TP2 This will also compile with TQt for Qt3, and should not cause any problems with dependent modules such as kdebase. If it does then it needs to be fixed! git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1214149 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`unsigned short ch = c->tqunicode();`
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`if ( ch > 0xff ) {`
			`// not latin1, need to do more sophisticated checks for asian fonts`
			`unsigned char row = c->row();`
			`if ( row == 0x0e ) {`
			`// 0e00 - 0e7f == Thai`
			`if ( c->cell() < 0x80 ) {`
			`// consult libthai`
			`return isBreakableThai(str, pos, len);`
			`} else`
			`return false;`
			`}`
			`if ( row > 0x2d && row < 0xfb \|\| row == 0x11 ) {`
			`/* asian line breaking. */`
			`if ( pos == 0 )`
			`return false; // never break before first character`

			`// check for simple punctuation cases`
Trinity Qt initial conversion git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1157647 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`TQChar::Category cat = c->category();`
			`if ( cat == TQChar::Punctuation_Close \|\|`
			`cat == TQChar::Punctuation_Other \|\|`
			`(str+(pos-1))->category() == TQChar::Punctuation_Open )`
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`return false;`

			`// do binary search in dontbreak[]`
Initial conversion for TQt for Qt4 3.4.0 TP2 This will also compile with TQt for Qt3, and should not cause any problems with dependent modules such as kdebase. If it does then it needs to be fixed! git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1214149 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 14 years ago			`return break_bsearch(dontbreakbefore, c->tqunicode()) &&`
			`break_bsearch(dontbreakafter, (str+(pos-1))->tqunicode());`
Copy the KDE 3.5 branch to branches/trinity for new KDE 3.5 features. BUG:215923 git-svn-id: svn://anonsvn.kde.org/home/kde/branches/trinity/kdelibs@1054174 283d02a7-25f6-0310-bc7c-ecb5cbfe19da 15 years ago			`} else // no asian font`
			`return c->isSpace();`
			`} else {`
			`if ( ch == ' ' \|\| ch == '\n' )`
			`return true;`
			`}`
			`return false;`
			`}`

			`}`

			`#endif`