You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
877 lines
28 KiB
877 lines
28 KiB
/***************************************************************************
|
|
* Copyright (C) 2004-2009 by Thomas Fischer *
|
|
* fischer@unix-ag.uni-kl.de *
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of the GNU General Public License as published by *
|
|
* the Free Software Foundation; either version 2 of the License, or *
|
|
* (at your option) any later version. *
|
|
* *
|
|
* This program is distributed in the hope that it will be useful, *
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
|
* GNU General Public License for more details. *
|
|
* *
|
|
* You should have received a copy of the GNU General Public License *
|
|
* along with this program; if not, write to the *
|
|
* Free Software Foundation, Inc., *
|
|
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
|
|
***************************************************************************/
|
|
#include <tqstring.h>
|
|
#include <tqapplication.h>
|
|
#include <tqregexp.h>
|
|
|
|
#include "encoderlatex.h"
|
|
|
|
namespace BibTeX
|
|
{
|
|
EncoderLaTeX *EncoderLaTeX::encoderLaTeX = NULL;
|
|
|
|
static struct Decomposition
|
|
{
|
|
const char *latexCommand;
|
|
unsigned int unicode;
|
|
}
|
|
decompositions[] =
|
|
{
|
|
{"`", 0x0300},
|
|
{"'", 0x0301},
|
|
{"^", 0x0302},
|
|
{"~", 0x0303},
|
|
{"=", 0x0304},
|
|
/*{"x", 0x0305}, OVERLINE */
|
|
{"u", 0x0306},
|
|
{".", 0x0307},
|
|
/*{"x", 0x0309}, HOOK ABOVE */
|
|
{"r", 0x030a},
|
|
{"H", 0x030b},
|
|
{"v", 0x030c},
|
|
/*{"x", 0x030d}, VERTICAL LINE ABOVE */
|
|
/*{"x", 0x030e}, DOUBLE VERTICAL LINE ABOVE */
|
|
/*{"x", 0x030f}, DOUBLE GRAVE ACCENT */
|
|
/*{"x", 0x0310}, CANDRABINDU */
|
|
/*{"x", 0x0311}, INVERTED BREVE */
|
|
/*{"x", 0x0312}, TURNED COMMA ABOVE */
|
|
/*{"x", 0x0313}, COMMA ABOVE */
|
|
/*{"x", 0x0314}, REVERSED COMMA ABOVE */
|
|
/*{"x", 0x0315}, */
|
|
/*{"x", 0x0316}, */
|
|
/*{"x", 0x0317}, */
|
|
/*{"x", 0x0318}, */
|
|
/*{"x", 0x0319}, */
|
|
/*{"x", 0x031a}, */
|
|
/*{"x", 0x031b}, */
|
|
/*{"x", 0x031c}, */
|
|
/*{"x", 0x031d}, */
|
|
/*{"x", 0x031e}, */
|
|
/*{"x", 0x031f}, */
|
|
/*{"x", 0x0320}, */
|
|
/*{"x", 0x0321}, */
|
|
/*{"x", 0x0322}, */
|
|
{"d", 0x0323},
|
|
/*{"x", 0x0324}, */
|
|
/*{"x", 0x0325}, */
|
|
/*{"x", 0x0326}, */
|
|
{"d", 0x0327},
|
|
{"k", 0x0328},
|
|
/*{"x", 0x0329}, */
|
|
/*{"x", 0x032a}, */
|
|
/*{"x", 0x032b}, */
|
|
/*{"x", 0x032c}, */
|
|
/*{"x", 0x032d}, */
|
|
/*{"x", 0x032e}, */
|
|
/*{"x", 0x032f}, */
|
|
{"b", 0x0331},
|
|
{"t", 0x0361}
|
|
};
|
|
|
|
static const int decompositionscount = sizeof( decompositions ) / sizeof( decompositions[ 0 ] ) ;
|
|
|
|
static const struct EncoderLaTeXCommandMapping
|
|
{
|
|
const char *letters;
|
|
unsigned int unicode;
|
|
}
|
|
commandmappingdatalatex[] =
|
|
{
|
|
{"AA", 0x00C5},
|
|
{"AE", 0x00C6},
|
|
{"ss", 0x00DF},
|
|
{"aa", 0x00E5},
|
|
{"ae", 0x00E6},
|
|
{"OE", 0x0152},
|
|
{"oe", 0x0153},
|
|
{"ldots", 0x2026},
|
|
{"L", 0x0141},
|
|
{"l", 0x0142},
|
|
{"grqq", 0x201C},
|
|
{"glqq", 0x201E},
|
|
{"frqq", 0x00BB},
|
|
{"flqq", 0x00AB},
|
|
|
|
// awk -F '[{}\\\\]+' '/DeclareUnicodeCharacter/ { print "{\""$4"\", 0x"$3"},"}' /usr/share/texmf-dist/tex/latex/base/t2aenc.dfu | grep '0x04' | sort -r -f
|
|
{"cyrzhdsc", 0x0497},
|
|
{"CYRZHDSC", 0x0496},
|
|
{"cyrzh", 0x0436},
|
|
{"CYRZH", 0x0416},
|
|
{"cyrzdsc", 0x0499},
|
|
{"CYRZDSC", 0x0498},
|
|
{"cyrz", 0x0437},
|
|
{"CYRZ", 0x0417},
|
|
{"cyryu", 0x044E},
|
|
{"CYRYU", 0x042E},
|
|
{"cyryo", 0x0451},
|
|
{"CYRYO", 0x0401},
|
|
{"cyryi", 0x0457},
|
|
{"CYRYI", 0x0407},
|
|
{"cyryhcrs", 0x04B1},
|
|
{"CYRYHCRS", 0x04B0},
|
|
{"cyrya", 0x044F},
|
|
{"CYRYA", 0x042F},
|
|
{"cyry", 0x04AF},
|
|
{"CYRY", 0x04AE},
|
|
{"cyrv", 0x0432},
|
|
{"CYRV", 0x0412},
|
|
{"cyrushrt", 0x045E},
|
|
{"CYRUSHRT", 0x040E},
|
|
{"cyru", 0x0443},
|
|
{"CYRU", 0x0423},
|
|
{"cyrtshe", 0x045B},
|
|
{"CYRTSHE", 0x040B},
|
|
{"cyrtdsc", 0x04AD},
|
|
{"CYRTDSC", 0x04AC},
|
|
{"cyrt", 0x0442},
|
|
{"CYRT", 0x0422},
|
|
{"cyrshha", 0x04BB},
|
|
{"CYRSHHA", 0x04BA},
|
|
{"cyrshch", 0x0449},
|
|
{"CYRSHCH", 0x0429},
|
|
{"cyrsh", 0x0448},
|
|
{"CYRSH", 0x0428},
|
|
{"cyrsftsn", 0x044C},
|
|
{"CYRSFTSN", 0x042C},
|
|
{"cyrsdsc", 0x04AB},
|
|
{"CYRSDSC", 0x04AA},
|
|
{"cyrschwa", 0x04D9},
|
|
{"CYRSCHWA", 0x04D8},
|
|
{"cyrs", 0x0441},
|
|
{"CYRS", 0x0421},
|
|
{"cyrr", 0x0440},
|
|
{"CYRR", 0x0420},
|
|
{"CYRpalochka", 0x04C0},
|
|
{"cyrp", 0x043F},
|
|
{"CYRP", 0x041F},
|
|
{"cyrotld", 0x04E9},
|
|
{"CYROTLD", 0x04E8},
|
|
{"cyro", 0x043E},
|
|
{"CYRO", 0x041E},
|
|
{"cyrnje", 0x045A},
|
|
{"CYRNJE", 0x040A},
|
|
{"cyrng", 0x04A5},
|
|
{"CYRNG", 0x04A4},
|
|
{"cyrndsc", 0x04A3},
|
|
{"CYRNDSC", 0x04A2},
|
|
{"cyrn", 0x043D},
|
|
{"CYRN", 0x041D},
|
|
{"cyrm", 0x043C},
|
|
{"CYRM", 0x041C},
|
|
{"cyrlje", 0x0459},
|
|
{"CYRLJE", 0x0409},
|
|
{"cyrl", 0x043B},
|
|
{"CYRL", 0x041B},
|
|
{"cyrkvcrs", 0x049D},
|
|
{"CYRKVCRS", 0x049C},
|
|
{"cyrkdsc", 0x049B},
|
|
{"CYRKDSC", 0x049A},
|
|
{"cyrk", 0x043A},
|
|
{"CYRK", 0x041A},
|
|
{"cyrje", 0x0458},
|
|
{"CYRJE", 0x0408},
|
|
{"cyrishrt", 0x0439},
|
|
{"CYRISHRT", 0x0419},
|
|
{"cyrii", 0x0456},
|
|
{"CYRII", 0x0406},
|
|
{"cyrie", 0x0454},
|
|
{"CYRIE", 0x0404},
|
|
{"cyri", 0x0438},
|
|
{"CYRI", 0x0418},
|
|
{"cyrhrdsn", 0x044A},
|
|
{"CYRHRDSN", 0x042A},
|
|
{"cyrhdsc", 0x04B3},
|
|
{"CYRHDSC", 0x04B2},
|
|
{"cyrh", 0x0445},
|
|
{"CYRH", 0x0425},
|
|
{"cyrgup", 0x0491},
|
|
{"CYRGUP", 0x0490},
|
|
{"cyrghcrs", 0x0493},
|
|
{"CYRGHCRS", 0x0492},
|
|
{"cyrg", 0x0433},
|
|
{"CYRG", 0x0413},
|
|
{"cyrf", 0x0444},
|
|
{"CYRF", 0x0424},
|
|
{"cyrery", 0x044B},
|
|
{"CYRERY", 0x042B},
|
|
{"cyrerev", 0x044D},
|
|
{"CYREREV", 0x042D},
|
|
{"cyre", 0x0435},
|
|
{"CYRE", 0x0415},
|
|
{"cyrdzhe", 0x045F},
|
|
{"CYRDZHE", 0x040F},
|
|
{"cyrdze", 0x0455},
|
|
{"CYRDZE", 0x0405},
|
|
{"cyrdje", 0x0452},
|
|
{"CYRDJE", 0x0402},
|
|
{"cyrd", 0x0434},
|
|
{"CYRD", 0x0414},
|
|
{"cyrchvcrs", 0x04B9},
|
|
{"CYRCHVCRS", 0x04B8},
|
|
{"cyrchrdsc", 0x04B7},
|
|
{"CYRCHRDSC", 0x04B6},
|
|
{"cyrch", 0x0447},
|
|
{"CYRCH", 0x0427},
|
|
{"cyrc", 0x0446},
|
|
{"CYRC", 0x0426},
|
|
{"cyrb", 0x0431},
|
|
{"CYRB", 0x0411},
|
|
{"cyrae", 0x04D5},
|
|
{"CYRAE", 0x04D4},
|
|
{"cyra", 0x0430},
|
|
{"CYRA", 0x0410}
|
|
};
|
|
|
|
static const int commandmappingdatalatexcount = sizeof( commandmappingdatalatex ) / sizeof( commandmappingdatalatex[ 0 ] ) ;
|
|
|
|
/** Command can be either
|
|
(1) {embraced}
|
|
(2) delimited by {},
|
|
(3) <space>, line end,
|
|
(4) \following_command (including \<space>, which must be maintained!),
|
|
(5) } (end of entry or group)
|
|
**/
|
|
const char *expansionsCmd[] = {"\\{\\\\%1\\}", "\\\\%1\\{\\}", "\\\\%1(\\n|\\r|\\\\|\\})", "\\\\%1\\s"};
|
|
static const int expansionscmdcount = sizeof( expansionsCmd ) / sizeof( expansionsCmd[0] );
|
|
|
|
static const struct EncoderLaTeXModCharMapping
|
|
{
|
|
const char *modifier;
|
|
const char *letter;
|
|
unsigned int unicode;
|
|
}
|
|
modcharmappingdatalatex[] =
|
|
{
|
|
{"\\\\`", "A", 0x00C0},
|
|
{"\\\\'", "A", 0x00C1},
|
|
{"\\\\\\^", "A", 0x00C2},
|
|
{"\\\\~", "A", 0x00C3},
|
|
{"\\\\\"", "A", 0x00C4},
|
|
{"\\\\r", "A", 0x00C5},
|
|
/** 0x00C6 */
|
|
{"\\\\c", "C", 0x00C7},
|
|
{"\\\\`", "E", 0x00C8},
|
|
{"\\\\'", "E", 0x00C9},
|
|
{"\\\\\\^", "E", 0x00CA},
|
|
{"\\\\\"", "E", 0x00CB},
|
|
{"\\\\`", "I", 0x00CC},
|
|
{"\\\\'", "I", 0x00CD},
|
|
{"\\\\\\^", "I", 0x00CE},
|
|
{"\\\\\"", "I", 0x00CF},
|
|
/** 0x00D0 */
|
|
{"\\\\~", "N", 0x00D1},
|
|
{"\\\\`", "O", 0x00D2},
|
|
{"\\\\'", "O", 0x00D3},
|
|
{"\\\\\\^", "O", 0x00D4},
|
|
/** 0x00D5 */
|
|
{"\\\\\"", "O", 0x00D6},
|
|
/** 0x00D7 */
|
|
{"\\\\", "O", 0x00D8},
|
|
{"\\\\`", "U", 0x00D9},
|
|
{"\\\\'", "U", 0x00DA},
|
|
{"\\\\\\^", "U", 0x00DB},
|
|
{"\\\\\"", "U", 0x00DC},
|
|
{"\\\\'", "Y", 0x00DD},
|
|
/** 0x00DE */
|
|
{"\\\\\"", "s", 0x00DF},
|
|
{"\\\\`", "a", 0x00E0},
|
|
{"\\\\'", "a", 0x00E1},
|
|
{"\\\\\\^", "a", 0x00E2},
|
|
{"\\\\~", "a", 0x00E3},
|
|
{"\\\\\"", "a", 0x00E4},
|
|
{"\\\\r", "a", 0x00E5},
|
|
/** 0x00E6 */
|
|
{"\\\\c", "c", 0x00E7},
|
|
{"\\\\`", "e", 0x00E8},
|
|
{"\\\\'", "e", 0x00E9},
|
|
{"\\\\\\^", "e", 0x00EA},
|
|
{"\\\\\"", "e", 0x00EB},
|
|
{"\\\\`", "i", 0x00EC},
|
|
{"\\\\'", "i", 0x00ED},
|
|
{"\\\\'", "\\\\i", 0x00ED},
|
|
{"\\\\\\^", "i", 0x00EE},
|
|
/** 0x00EF */
|
|
/** 0x00F0 */
|
|
{"\\\\~", "n", 0x00F1},
|
|
{"\\\\`", "o", 0x00F2},
|
|
{"\\\\'", "o", 0x00F3},
|
|
{"\\\\\\^", "o", 0x00F4},
|
|
/** 0x00F5 */
|
|
{"\\\\\"", "o", 0x00F6},
|
|
/** 0x00F7 */
|
|
{"\\\\", "o", 0x00F8},
|
|
{"\\\\`", "u", 0x00F9},
|
|
{"\\\\'", "u", 0x00FA},
|
|
{"\\\\\\^", "u", 0x00FB},
|
|
{"\\\\\"", "u", 0x00FC},
|
|
{"\\\\'", "y", 0x00FD},
|
|
/** 0x00FE */
|
|
/** 0x00FF */
|
|
/** 0x0100 */
|
|
/** 0x0101 */
|
|
{"\\\\u", "A", 0x0102},
|
|
{"\\\\u", "a", 0x0103},
|
|
/** 0x0104 */
|
|
/** 0x0105 */
|
|
{"\\\\'", "C", 0x0106},
|
|
{"\\\\'", "c", 0x0107},
|
|
/** 0x0108 */
|
|
/** 0x0109 */
|
|
/** 0x010A */
|
|
/** 0x010B */
|
|
{"\\\\v", "C", 0x010C},
|
|
{"\\\\v", "c", 0x010D},
|
|
{"\\\\v", "D", 0x010E},
|
|
/** 0x010F */
|
|
/** 0x0110 */
|
|
/** 0x0111 */
|
|
/** 0x0112 */
|
|
/** 0x0113 */
|
|
/** 0x0114 */
|
|
/** 0x0115 */
|
|
/** 0x0116 */
|
|
/** 0x0117 */
|
|
{"\\\\c", "E", 0x0118},
|
|
{"\\\\c", "e", 0x0119},
|
|
{"\\\\v", "E", 0x011A},
|
|
{"\\\\v", "e", 0x011B},
|
|
/** 0x011C */
|
|
/** 0x011D */
|
|
{"\\\\u", "G", 0x011E},
|
|
{"\\\\u", "g", 0x011F},
|
|
/** 0x0120 */
|
|
/** 0x0121 */
|
|
/** 0x0122 */
|
|
/** 0x0123 */
|
|
/** 0x0124 */
|
|
/** 0x0125 */
|
|
/** 0x0126 */
|
|
/** 0x0127 */
|
|
/** 0x0128 */
|
|
/** 0x0129 */
|
|
/** 0x012A */
|
|
/** 0x012B */
|
|
{"\\\\u", "I", 0x012C},
|
|
{"\\\\u", "i", 0x012D},
|
|
/** 0x012E */
|
|
/** 0x012F */
|
|
/** 0x0130 */
|
|
/** 0x0131 */
|
|
/** 0x0132 */
|
|
/** 0x0133 */
|
|
/** 0x0134 */
|
|
/** 0x0135 */
|
|
/** 0x0136 */
|
|
/** 0x0137 */
|
|
/** 0x0138 */
|
|
{"\\\\'", "L", 0x0139},
|
|
{"\\\\'", "l", 0x013A},
|
|
/** 0x013B */
|
|
/** 0x013C */
|
|
/** 0x013D */
|
|
/** 0x013E */
|
|
/** 0x013F */
|
|
/** 0x0140 */
|
|
/** 0x0141 */
|
|
/** 0x0142 */
|
|
{"\\\\'", "N", 0x0143},
|
|
{"\\\\'", "n", 0x0144},
|
|
/** 0x0145 */
|
|
/** 0x0146 */
|
|
{"\\\\v", "N", 0x0147},
|
|
{"\\\\v", "n", 0x0148},
|
|
/** 0x0149 */
|
|
/** 0x014A */
|
|
/** 0x014B */
|
|
/** 0x014C */
|
|
/** 0x014D */
|
|
{"\\\\u", "O", 0x014E},
|
|
{"\\\\u", "o", 0x014F},
|
|
{"\\\\H", "O", 0x0150},
|
|
{"\\\\H", "o", 0x0151},
|
|
/** 0x0152 */
|
|
/** 0x0153 */
|
|
{"\\\\'", "R", 0x0154},
|
|
{"\\\\'", "r", 0x0155},
|
|
/** 0x0156 */
|
|
/** 0x0157 */
|
|
{"\\\\v", "R", 0x0158},
|
|
{"\\\\v", "r", 0x0159},
|
|
{"\\\\'", "S", 0x015A},
|
|
{"\\\\'", "s", 0x015B},
|
|
/** 0x015C */
|
|
/** 0x015D */
|
|
{"\\\\c", "S", 0x015E},
|
|
{"\\\\c", "s", 0x015F},
|
|
{"\\\\v", "S", 0x0160},
|
|
{"\\\\v", "s", 0x0161},
|
|
/** 0x0162 */
|
|
/** 0x0163 */
|
|
{"\\\\v", "T", 0x0164},
|
|
/** 0x0165 */
|
|
/** 0x0166 */
|
|
/** 0x0167 */
|
|
/** 0x0168 */
|
|
/** 0x0169 */
|
|
/** 0x016A */
|
|
/** 0x016B */
|
|
{"\\\\u", "U", 0x016C},
|
|
{"\\\\u", "u", 0x016D},
|
|
{"\\\\r", "U", 0x016E},
|
|
{"\\\\r", "u", 0x016F},
|
|
/** 0x0170 */
|
|
/** 0x0171 */
|
|
/** 0x0172 */
|
|
/** 0x0173 */
|
|
/** 0x0174 */
|
|
/** 0x0175 */
|
|
/** 0x0176 */
|
|
/** 0x0177 */
|
|
{"\\\\\"", "Y", 0x0178},
|
|
{"\\\\'", "Z", 0x0179},
|
|
{"\\\\'", "z", 0x017A},
|
|
/** 0x017B */
|
|
/** 0x017C */
|
|
{"\\\\v", "Z", 0x017D},
|
|
{"\\\\v", "z", 0x017E},
|
|
/** 0x017F */
|
|
/** 0x0180 */
|
|
{"\\\\v", "A", 0x01CD},
|
|
{"\\\\v", "a", 0x01CE},
|
|
{"\\\\v", "G", 0x01E6},
|
|
{"\\\\v", "g", 0x01E7}
|
|
};
|
|
|
|
const char *expansionsMod1[] = {"\\{%1\\{%2\\}\\}", "\\{%1 %2\\}", "%1\\{%2\\}"};
|
|
static const int expansionsmod1count = sizeof( expansionsMod1 ) / sizeof( expansionsMod1[0] );
|
|
const char *expansionsMod2[] = {"\\{%1%2\\}", "%1%2\\{\\}", "%1%2"};
|
|
static const int expansionsmod2count = sizeof( expansionsMod2 ) / sizeof( expansionsMod2[0] );
|
|
|
|
static const int modcharmappingdatalatexcount = sizeof( modcharmappingdatalatex ) / sizeof( modcharmappingdatalatex[ 0 ] ) ;
|
|
|
|
static const struct EncoderLaTeXCharMapping
|
|
{
|
|
const char *regexp;
|
|
unsigned int unicode;
|
|
const char *latex;
|
|
}
|
|
charmappingdatalatex[] =
|
|
{
|
|
{"\\\\#", 0x0023, "\\#"},
|
|
{"\\\\&", 0x0026, "\\&"},
|
|
{"\\\\_", 0x005F, "\\_"},
|
|
{"!`", 0x00A1, "!`"},
|
|
{"\"<", 0x00AB, "\"<"},
|
|
{"\">", 0x00BB, "\">"},
|
|
{"[?]`", 0x00BF, "?`"},
|
|
{"--", 0x2013, "--"}
|
|
};
|
|
|
|
static const int charmappingdatalatexcount = sizeof( charmappingdatalatex ) / sizeof( charmappingdatalatex[ 0 ] ) ;
|
|
|
|
EncoderLaTeX::EncoderLaTeX()
|
|
{
|
|
buildCharMapping();
|
|
buildCombinedMapping();
|
|
}
|
|
|
|
EncoderLaTeX::~EncoderLaTeX()
|
|
{
|
|
// nothing
|
|
}
|
|
|
|
TQString EncoderLaTeX::decode( const TQString & text )
|
|
{
|
|
const TQString splitMarker = "|KBIBTEX|";
|
|
|
|
/** start-stop marker ensures that each text starts and stops
|
|
* with plain text and not with an inline math environment.
|
|
* This invariant is exploited implicitly in the code below. */
|
|
const TQString startStopMarker="|STARTSTOP|";
|
|
TQString result = startStopMarker + text + startStopMarker;
|
|
|
|
/** Collect (all?) urls from the BibTeX file and store them in urls */
|
|
/** Problem is that the replace function below will replace
|
|
* character sequences in the URL rendering the URL invalid.
|
|
* Later, all URLs will be replaced back to their original
|
|
* in the hope nothing breaks ... */
|
|
TQStringList urls;
|
|
TQRegExp httpRegExp( "(ht|f)tp://[^\"} ]+" );
|
|
httpRegExp.setMinimal( false );
|
|
int pos = 0;
|
|
while ( pos >= 0 )
|
|
{
|
|
pos = httpRegExp.search( result, pos );
|
|
if ( pos >= 0 )
|
|
{
|
|
++pos;
|
|
TQString url = httpRegExp.cap( 0 );
|
|
urls << url;
|
|
}
|
|
}
|
|
|
|
decomposedUTF8toLaTeX( result );
|
|
|
|
/** split text into math and non-math regions */
|
|
TQStringList intermediate = TQStringList::split( '$', result, true );
|
|
TQStringList::Iterator it = intermediate.begin();
|
|
while ( it != intermediate.end() )
|
|
{
|
|
/**
|
|
* Sometimes we split strings like "\$", which is not intended.
|
|
* So, we have to manually fix things by checking for strings
|
|
* ending with "\" and append both the removed dollar sign and
|
|
* the following string (which was never supposed to be an
|
|
* independent string). Finally, we remove the unnecessary
|
|
* string and continue.
|
|
*/
|
|
if (( *it ).endsWith( "\\" ) )
|
|
{
|
|
TQStringList::Iterator cur = it;
|
|
++it;
|
|
( *cur ).append( '$' ).append( *it );
|
|
intermediate.remove( it );
|
|
it = cur;
|
|
}
|
|
else
|
|
++it;
|
|
}
|
|
|
|
tqApp->processEvents();
|
|
|
|
result = "";
|
|
for ( TQStringList::Iterator it = intermediate.begin(); it != intermediate.end(); ++it )
|
|
{
|
|
if ( !result.isEmpty() ) result.append( splitMarker );
|
|
result.append( *it );
|
|
|
|
++it;
|
|
if ( it == intermediate.end() )
|
|
break;
|
|
|
|
if (( *it ).length() > 256 )
|
|
tqDebug( "Very long math equation using $ found, maybe due to broken inline math: %s", ( *it ).left( 48 ).latin1() );
|
|
}
|
|
|
|
tqApp->processEvents();
|
|
|
|
for ( TQValueList<CharMappingItem>::ConstIterator cmit = m_charMapping.begin(); cmit != m_charMapping.end(); ++cmit )
|
|
result.replace(( *cmit ).regExp, ( *cmit ).unicode );
|
|
|
|
tqApp->processEvents();
|
|
|
|
TQStringList transformed = TQStringList::split( splitMarker, result, true );
|
|
|
|
tqApp->processEvents();
|
|
|
|
result = "";
|
|
for ( TQStringList::Iterator itt = transformed.begin(), iti = intermediate.begin(); itt != transformed.end() && iti != intermediate.end(); ++itt, ++iti )
|
|
{
|
|
result.append( *itt );
|
|
|
|
++iti;
|
|
if ( iti == intermediate.end() )
|
|
break;
|
|
|
|
result.append( "$" ).append( *iti ).append( "$" );
|
|
}
|
|
|
|
tqApp->processEvents();
|
|
|
|
/** Reinserting original URLs as explained above */
|
|
pos = 0;
|
|
int idx = 0;
|
|
while ( pos >= 0 )
|
|
{
|
|
pos = httpRegExp.search( result, pos );
|
|
if ( pos >= 0 )
|
|
{
|
|
++pos;
|
|
int len = httpRegExp.cap( 0 ).length();
|
|
result = result.left( pos - 1 ).append( urls[idx++] ).append( result.mid( pos + len - 1 ) );
|
|
}
|
|
}
|
|
|
|
return result.replace( startStopMarker,"" );
|
|
}
|
|
|
|
TQString EncoderLaTeX::encode( const TQString & text )
|
|
{
|
|
const TQString splitMarker = "|KBIBTEX|";
|
|
|
|
/** start-stop marker ensures that each text starts and stops
|
|
* with plain text and not with an inline math environment.
|
|
* This invariant is exploited implicitly in the code below. */
|
|
const TQString startStopMarker="|STARTSTOP|";
|
|
TQString result = startStopMarker + text + startStopMarker;
|
|
|
|
/** Collect (all?) urls from the BibTeX file and store them in urls */
|
|
/** Problem is that the replace function below will replace
|
|
* character sequences in the URL rendering the URL invalid.
|
|
* Later, all URLs will be replaced back to their original
|
|
* in the hope nothing breaks ... */
|
|
TQStringList urls;
|
|
TQRegExp httpRegExp( "(ht|f)tp://[^\"} ]+" );
|
|
httpRegExp.setMinimal( false );
|
|
int pos = 0;
|
|
while ( pos >= 0 )
|
|
{
|
|
pos = httpRegExp.search( result, pos );
|
|
if ( pos >= 0 )
|
|
{
|
|
++pos;
|
|
TQString url = httpRegExp.cap( 0 );
|
|
urls << url;
|
|
}
|
|
}
|
|
|
|
/** split text into math and non-math regions */
|
|
TQStringList intermediate = TQStringList::split( '$', result, true );
|
|
TQStringList::Iterator it = intermediate.begin();
|
|
while ( it != intermediate.end() )
|
|
{
|
|
/**
|
|
* Sometimes we split strings like "\$", which is not intended.
|
|
* So, we have to manually fix things by checking for strings
|
|
* ending with "\" and append both the removed dollar sign and
|
|
* the following string (which was never supposed to be an
|
|
* independent string). Finally, we remove the unnecessary
|
|
* string and continue.
|
|
*/
|
|
if (( *it ).endsWith( "\\" ) )
|
|
{
|
|
TQStringList::Iterator cur = it;
|
|
++it;
|
|
( *cur ).append( '$' ).append( *it );
|
|
intermediate.remove( it );
|
|
it = cur;
|
|
}
|
|
else
|
|
++it;
|
|
}
|
|
|
|
tqApp->processEvents();
|
|
|
|
result = "";
|
|
for ( TQStringList::Iterator it = intermediate.begin(); it != intermediate.end(); ++it )
|
|
{
|
|
if ( !result.isEmpty() ) result.append( splitMarker );
|
|
result.append( *it );
|
|
|
|
++it;
|
|
if ( it == intermediate.end() )
|
|
break;
|
|
|
|
if (( *it ).length() > 256 )
|
|
tqDebug( "Very long math equation using $ found, maybe due to broken inline math: %s", ( *it ).left( 48 ).latin1() );
|
|
}
|
|
|
|
tqApp->processEvents();
|
|
|
|
for ( TQValueList<CharMappingItem>::ConstIterator cmit = m_charMapping.begin(); cmit != m_charMapping.end(); ++cmit )
|
|
result.replace(( *cmit ).unicode, ( *cmit ).latex );
|
|
|
|
tqApp->processEvents();
|
|
|
|
TQStringList transformed = TQStringList::split( splitMarker, result, true );
|
|
|
|
tqApp->processEvents();
|
|
|
|
result = "";
|
|
for ( TQStringList::Iterator itt = transformed.begin(), iti = intermediate.begin(); itt != transformed.end() && iti != intermediate.end(); ++itt, ++iti )
|
|
{
|
|
result.append( *itt );
|
|
|
|
++iti;
|
|
if ( iti == intermediate.end() )
|
|
break;
|
|
|
|
result.append( "$" ).append( *iti ).append( "$" );
|
|
}
|
|
|
|
tqApp->processEvents();
|
|
|
|
/** \url accepts unquotet & and _
|
|
May introduce new problem tough */
|
|
if ( result.contains( "\\url{" ) )
|
|
result.replace( "\\&", "&" ).replace( "\\_", "_" ).replace( TQChar( 0x2013 ), "--" ).replace( "\\#", "#" );
|
|
|
|
decomposedUTF8toLaTeX( result );
|
|
|
|
/** Reinserting original URLs as explained above */
|
|
pos = 0;
|
|
int idx = 0;
|
|
while ( pos >= 0 )
|
|
{
|
|
pos = httpRegExp.search( result, pos );
|
|
if ( pos >= 0 )
|
|
{
|
|
++pos;
|
|
int len = httpRegExp.cap( 0 ).length();
|
|
result = result.left( pos - 1 ).append( urls[idx++] ).append( result.mid( pos + len - 1 ) );
|
|
}
|
|
}
|
|
|
|
return result.replace( startStopMarker,"" );
|
|
}
|
|
|
|
TQString EncoderLaTeX::encode( const TQString &text, const TQChar &replace )
|
|
{
|
|
TQString result = text;
|
|
for ( TQValueList<CharMappingItem>::ConstIterator it = m_charMapping.begin(); it != m_charMapping.end(); ++it )
|
|
if (( *it ).unicode == replace )
|
|
result.replace(( *it ).unicode, ( *it ).latex );
|
|
return result;
|
|
}
|
|
|
|
TQString EncoderLaTeX::encodeSpecialized( const TQString & text, const EntryField::FieldType fieldType )
|
|
{
|
|
TQString result = encode( text );
|
|
|
|
switch ( fieldType )
|
|
{
|
|
case EntryField::ftPages:
|
|
result.replace( TQChar( 0x2013 ), "--" );
|
|
break;
|
|
|
|
case EntryField::ftURL:
|
|
result.replace( "\\&", "&" ).replace( "\\_", "_" ).replace( TQChar( 0x2013 ), "--" ).replace( "\\#", "#" );
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
TQString& EncoderLaTeX::decomposedUTF8toLaTeX( TQString &text )
|
|
{
|
|
for ( TQValueList<CombinedMappingItem>::Iterator it = m_combinedMapping.begin(); it != m_combinedMapping.end(); ++it )
|
|
{
|
|
int i = ( *it ).regExp.search( text );
|
|
while ( i >= 0 )
|
|
{
|
|
TQString a = ( *it ).regExp.cap( 1 );
|
|
text = text.left( i ) + "\\" + ( *it ).latex + "{" + a + "}" + text.mid( i + 2 );
|
|
i = ( *it ).regExp.search( text, i + 1 );
|
|
}
|
|
}
|
|
|
|
return text;
|
|
}
|
|
|
|
void EncoderLaTeX::buildCombinedMapping()
|
|
{
|
|
for ( int i = 0; i < decompositionscount; i++ )
|
|
{
|
|
CombinedMappingItem item;
|
|
item.regExp = TQRegExp( "(.)" + TQString( TQChar( decompositions[i].unicode ) ) );
|
|
item.latex = decompositions[i].latexCommand;
|
|
m_combinedMapping.append( item );
|
|
}
|
|
}
|
|
|
|
void EncoderLaTeX::buildCharMapping()
|
|
{
|
|
/** encoding and decoding for digraphs such as -- or ?` */
|
|
for ( int i = 0; i < charmappingdatalatexcount; i++ )
|
|
{
|
|
CharMappingItem charMappingItem;
|
|
charMappingItem.regExp = TQRegExp( charmappingdatalatex[ i ].regexp );
|
|
charMappingItem.unicode = TQChar( charmappingdatalatex[ i ].unicode );
|
|
charMappingItem.latex = TQString( charmappingdatalatex[ i ].latex );
|
|
m_charMapping.append( charMappingItem );
|
|
}
|
|
|
|
/** encoding and decoding for commands such as \AA or \ss */
|
|
for ( int i = 0; i < commandmappingdatalatexcount; ++i )
|
|
{
|
|
/** different types of writing such as {\AA} or \AA{} possible */
|
|
for ( int j = 0; j < expansionscmdcount; ++j )
|
|
{
|
|
CharMappingItem charMappingItem;
|
|
charMappingItem.regExp = TQRegExp( TQString( expansionsCmd[j] ).arg( commandmappingdatalatex[i].letters ) );
|
|
charMappingItem.unicode = TQChar( commandmappingdatalatex[i].unicode );
|
|
if ( charMappingItem.regExp.numCaptures() > 0 )
|
|
charMappingItem.unicode += TQString( "\\1" );
|
|
charMappingItem.latex = TQString( "{\\%1}" ).arg( commandmappingdatalatex[i].letters );
|
|
m_charMapping.append( charMappingItem );
|
|
}
|
|
}
|
|
|
|
/** encoding and decoding for letters such as \"a */
|
|
for ( int i = 0; i < modcharmappingdatalatexcount; ++i )
|
|
{
|
|
TQString modifierRegExp = TQString( modcharmappingdatalatex[i].modifier );
|
|
TQString modifier = modifierRegExp;
|
|
modifier.replace( "\\^", "^" ).replace( "\\\\", "\\" );
|
|
|
|
/** first batch of replacement rules, where no separator is required between modifier and character (e.g. \"a) */
|
|
if ( !modifierRegExp.at( modifierRegExp.length() - 1 ).isLetter() )
|
|
for ( int j = 0; j < expansionsmod2count; ++j )
|
|
{
|
|
CharMappingItem charMappingItem;
|
|
charMappingItem.regExp = TQRegExp( TQString( expansionsMod2[j] ).arg( modifierRegExp ).arg( modcharmappingdatalatex[i].letter ) );
|
|
charMappingItem.unicode = TQChar( modcharmappingdatalatex[i].unicode );
|
|
charMappingItem.latex = TQString( "{%1%2}" ).arg( modifier ).arg( modcharmappingdatalatex[i].letter );
|
|
m_charMapping.append( charMappingItem );
|
|
}
|
|
|
|
/** second batch of replacement rules, where a separator is required between modifier and character (e.g. \v{g}) */
|
|
for ( int j = 0; j < expansionsmod1count; ++j )
|
|
{
|
|
CharMappingItem charMappingItem;
|
|
charMappingItem.regExp = TQRegExp( TQString( expansionsMod1[j] ).arg( modifierRegExp ).arg( modcharmappingdatalatex[i].letter ) );
|
|
charMappingItem.unicode = TQChar( modcharmappingdatalatex[i].unicode );
|
|
charMappingItem.latex = TQString( "%1{%2}" ).arg( modifier ).arg( modcharmappingdatalatex[i].letter );
|
|
m_charMapping.append( charMappingItem );
|
|
}
|
|
}
|
|
}
|
|
|
|
EncoderLaTeX* EncoderLaTeX::currentEncoderLaTeX()
|
|
{
|
|
if ( encoderLaTeX == NULL )
|
|
encoderLaTeX = new EncoderLaTeX();
|
|
|
|
return encoderLaTeX;
|
|
}
|
|
|
|
void EncoderLaTeX::deleteCurrentEncoderLaTeX()
|
|
{
|
|
if ( encoderLaTeX != NULL )
|
|
{
|
|
delete encoderLaTeX;
|
|
encoderLaTeX = NULL;
|
|
}
|
|
}
|
|
|
|
char EncoderLaTeX::unicodeToASCII( unsigned int unicode )
|
|
{
|
|
if ( unicode < 128 ) return ( char )unicode;
|
|
for ( int i = 0; i < modcharmappingdatalatexcount; ++i )
|
|
if ( modcharmappingdatalatex[i].unicode == unicode )
|
|
return *modcharmappingdatalatex[i].letter;
|
|
return '?';
|
|
}
|
|
|
|
}
|