You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdesdk/poxml/parser.cpp

1009 lines
32 KiB

// #define POXML_DEBUG
#include "parser.h"
#include <iostream>
#include <stdlib.h>
#include <assert.h>
#include <tqregexp.h>
using namespace std;
static const char *singletags[] = {"beginpage","imagedata", "colspec", "spanspec",
"anchor", "xref", "area",
"footnoteref", "void", "inlinegraphic",
"glosssee", "graphic", "xi:include",
0};
static const char *cuttingtags[] = {"bridgehead", "trans_comment", "para", "title", "term",
"entry", "contrib", "keyword", "example",
"note", "footnote", "caution",
"informalexample", "remark", "comment",
"imageobject", "varlistentry", "thead",
"tbody", "tgroup", "row", "screenshot", "screeninfo",
"variablelist", "step", "procedure",
"step", "holder", "listitem", "important",
"author", "itemizedlist", "orderedlist",
"caption", "textobject", "mediaobject",
"tip", "glossdef", "inlinemediaobject",
"simplelist", "member", "glossentry",
"areaspec", "corpauthor", "indexterm",
"calloutlist", "callout", "subtitle",
"table", "part", "xi:fallback", "primary",
"secondary", "chapter", "sect1", "sect2",
"figure", "abstract", "sect3", "sect", "sect4",
"warning", "preface", "authorgroup", "keywordset",
"informaltable", "qandaentry", "question", "answer",
"othercredit", "affiliation", "qandaset",
"cmdsynopsis", "funcsynopsis", "funcsynopsisinfo" ,
"epigraph", "attribution", "glossary", "chapterinfo",
"glossdiv", "blockingquote", "simplesect", "section",
"qandadiv", "refsect1", "refmeta", "formalpara",
"refentry", "refnamediv", "refpurpose", "refentrytitle",
"refmiscinfo", "refsect2", "refsect3", "refsect1info",
"refsect2info", "refsect3info", "refsection", "refsectioninfo",
"refsynopsisdiv", "refsysnopsisdivinfo", "remark",
"revdescription", "glossentry", "partinfo",
"segmentedlist", "segtitle", "seg", "seglistitem", "screenco",
0};
static const char *literaltags[] = {"literallayout", "synopsis", "screen",
"programlisting", 0};
bool StructureParser::fatalError ( const TQXmlParseException &e )
{
cerr << "fatalError " << e.message().latin1() << " " << e.lineNumber() << " "
<< e.columnNumber() << endl;
return false;
}
bool StructureParser::startDocument()
{
infos_reg = TQRegExp("\\s*poxml_line=\"(\\d+)\" poxml_col=\"(\\d+)\"");
do_not_split_reg = TQRegExp("\\s*condition=\"do-not-split\"");
message = "";
inside = 0;
return true;
}
bool StructureParser::isCuttingTag(const TQString &qName)
{
int index = 0;
while (cuttingtags[index]) {
if (cuttingtags[index] == qName)
return true;
index++;
}
return isLiteralTag(qName);
}
bool StructureParser::isSingleTag(const TQString &qName)
{
int index = 0;
while (singletags[index]) {
if (singletags[index] == qName)
return true;
index++;
}
return false;
}
bool StructureParser::isLiteralTag(const TQString &qName)
{
int index = 0;
while (literaltags[index]) {
if (literaltags[index] == qName)
return true;
index++;
}
return false;
}
bool StructureParser::skippedEntity ( const TQString & name )
{
if (inside)
message += TQString("&%1;").arg(name);
return true;
}
bool StructureParser::startElement( const TQString& , const TQString& ,
const TQString& qName,
const TQXmlAttributes & attr )
{
TQString tname = qName.lower();
bool first = false;
if (isCuttingTag(tname)) {
if (!inside) {
message = TQString();
list.pc.increasePara();
startline = locator->lineNumber();
startcol = locator->columnNumber();
first = true;
}
inside++;
}
if (inside)
{
TQString tmp = "<" + tname;
for (int i = 0; i < attr.length(); i++) {
tmp += TQString(" %1=\"%2\"").arg(attr.qName(i)).arg(attr.value(i));
}
tmp += TQString(" poxml_line=\"%1\"").arg(locator->lineNumber());
tmp += TQString(" poxml_col=\"%1\"").arg(locator->columnNumber());
if (isSingleTag(qName))
tmp += "/>";
else
tmp += ">";
message += tmp;
if (first)
startcol -= message.length();
}
if (tname == "anchor" || tname.left(4) == "sect" || tname == "chapter")
if (!attr.value("id").isEmpty()) list.pc.addAnchor(attr.value("id"));
return true;
}
bool StructureParser::startCDATA()
{
if ( inside )
message += "<![CDATA[";
return true;
}
bool StructureParser::endCDATA()
{
if ( inside )
message += "]]>";
return true;
}
bool StructureParser::isClosure(const TQString &message)
{
assert(message.at(0) == '<');
int endindex = 1;
while (!message.at(endindex).isSpace() && message.at(endindex) != '>')
endindex++;
TQString tag = message.mid(1, endindex - 1);
return closureTag(message, tag);
}
bool StructureParser::closureTag(const TQString& message, const TQString &tag)
{
#ifdef POXML_DEBUG
qDebug("closureTag %s %s", message.latin1(), tag.latin1());
#endif
int inside = 0;
uint index = 0;
while (true)
{
int nextclose = message.find(TQRegExp(TQString::fromLatin1("</%1[\\s>]").arg(tag)), index);
int nextstart = message.find(TQRegExp(TQString::fromLatin1("<%1[>\\s]").arg(tag)), index);
// qDebug("finding %d %d %d %d", nextstart, nextclose, index, inside);
if (nextclose == -1) {
#ifdef POXML_DEBUG
qDebug("ending on no close anymore %d %d %d %d", (!inside && index >= message.length()), inside, index, message.length());
#endif
return !inside && index >= message.length();
}
if (nextstart == -1)
nextstart = message.length() + 1;
if (nextstart < nextclose) {
inside++;
index = nextstart + 1;
while (message.at(index) != '>')
index++;
index++;
} else {
inside--;
index = nextclose + 1;
while (message.at(index) != '>')
index++;
index++;
if (!inside) {
#ifdef POXML_DEBUG
qDebug("ending on exit %d", index >= message.length());
#endif
return index >= message.length();
}
}
}
}
void StructureParser::descape(TQString &message)
{
uint index = 0;
stripWhiteSpace( message );
int inside = 0;
bool lastws = false;
while (index < message.length()) {
switch (message.at(index).latin1()) {
case '\n':
case '\t':
case '\r':
if (!inside)
message[index] = ' ';
case ' ':
if (!inside && lastws)
message[index] = '\010';
lastws = true;
break;
case '<': {
uint endindex = index+1;
while (endindex < message.length() && !message.at(endindex).isSpace() &&
message.at(endindex) != '>')
endindex++;
TQString tag = message.mid(index + 1, endindex - index - 1);
if (tag.at(0) == '/') {
if (isLiteralTag(tag.mid(1)))
inside--;
} else
if (isLiteralTag(tag))
inside++;
break;
}
default:
lastws = false;
}
index++;
}
message.replace(TQRegExp("\010"), "");
}
bool StructureParser::formatMessage(MsgBlock &msg) const
{
#ifdef POXML_DEBUG
qDebug("formatMessage %s", msg.msgid.latin1());
#endif
int offset = 0;
bool changed = false;
bool recurse = true;
if (msg.msgid.isEmpty())
return true;
for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++);
stripWhiteSpace( msg.msgid );
// removing starting single tags
for (int index = 0; singletags[index]; index++)
{
int slen = strlen(singletags[index]);
if (msg.msgid.left(slen + 1) == TQString::fromLatin1("<%1").arg(singletags[index]) &&
!msg.msgid.at( slen + 1 ).isLetterOrNumber() )
{
#ifdef POXML_DEBUG
qDebug("removing single tag %s", singletags[index]);
#endif
int strindex = strlen(singletags[index]) + 1;
while (msg.msgid.at(strindex) != '>')
strindex++;
msg.msgid = msg.msgid.mid(strindex + 1);
changed = true;
offset += strindex + 1;
for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++) ;
stripWhiteSpace( msg.msgid );
}
}
while (msg.msgid.right(2) == "/>")
{
int strindex = msg.msgid.length() - 2;
while (msg.msgid.at(strindex) != '<')
strindex--;
msg.msgid = msg.msgid.left(strindex);
stripWhiteSpace( msg.msgid ); // only removed space at the end
changed = true;
}
for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++) ;
stripWhiteSpace( msg.msgid );
while (true) {
if (msg.msgid.at(0) != '<')
break;
if (msg.msgid.at(msg.msgid.length() - 1) != '>')
break;
int strindex = 1;
while (msg.msgid.at(strindex) != ' ' && msg.msgid.at(strindex) != '>')
strindex++;
TQString starttag = msg.msgid.mid(1, strindex - 1);
int endindex = msg.msgid.length() - 2;
while (msg.msgid.at(endindex) != '<' && msg.msgid.at(endindex + 1) != '/')
endindex--;
#ifdef POXML_DEBUG
qDebug("endIndex %d", endindex);
#endif
strindex = endindex;
TQString orig = msg.msgid;
TQString endtag = msg.msgid.mid(endindex + 2, msg.msgid.length() - (endindex + 2) - 1);
TQString endtag_attr = endtag.mid(endtag.find(' '), endtag.length());
endtag.replace(infos_reg, "");
if (endtag == starttag) {
if (!closureTag(msg.msgid, starttag))
break;
// removing start/end tags
msg.msgid = msg.msgid.left(endindex);
strindex = 0;
while (msg.msgid.at(strindex) != '>')
strindex++;
TQString attr = msg.msgid.left(strindex);
msg.msgid = msg.msgid.mid(strindex + 1);
offset += strindex + 1;
for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++) ;
stripWhiteSpace( msg.msgid );
msg.tag = starttag;
if (infos_reg.search(attr) >= 0) {
msg.lines.first().start_line = infos_reg.cap(1).toInt();
msg.lines.first().start_col = infos_reg.cap(2).toInt();
#ifdef POXML_DEBUG
qDebug("col %s %s %d", attr.latin1(), msg.msgid.latin1(), msg.lines.first().start_col);
#endif
offset = 0;
if (infos_reg.search(endtag_attr) >= 0) {
msg.lines.first().end_line = infos_reg.cap(1).toInt();
msg.lines.first().end_col = infos_reg.cap(2).toInt() + 1;
}
}
if (do_not_split_reg.search(attr) >= 0) {
msg.do_not_split = true;
break;
}
changed = true;
} else
break;
}
#ifdef POXML_DEBUG
qDebug("formatMessage result %s %d %d", msg.msgid.latin1(), changed && recurse, msg.lines.first().start_col);
#endif
msg.lines.first().offset += offset;
if (msg.do_not_split)
recurse = false;
if (changed && recurse)
formatMessage(msg);
return !recurse; // indicates an abort
}
MsgList StructureParser::splitMessage(const MsgBlock &mb)
{
MsgList result;
MsgBlock msg1 = mb;
MsgBlock msg2 = mb;
TQString message = mb.msgid;
#ifdef POXML_DEBUG
qDebug("splitMessage %s", message.latin1());
#endif
if (message.at(0) == '<') {
int endindex = 1;
while (!message.at(endindex).isSpace() && message.at(endindex) != '>')
endindex++;
TQString tag = message.mid(1, endindex - 1);
if (closureTag(message, tag))
goto error;
if (isCuttingTag(tag))
{
// if the message starts with a cutting tag, this tag has to
// end in between. We split both messages and format them
int strindex = endindex;
strindex++;
int inside = 1;
while (true) {
#ifdef POXML_DEBUG
qDebug("inside %s %d", message.mid(strindex, 35).latin1(), inside);
#endif
// the exception for poxml_* attributes is made in the closing tag
int closing_index = message.find(TQRegExp(TQString::fromLatin1("</%1[\\s>]").arg(tag)),
strindex);
int starting_index = message.find(TQRegExp(TQString::fromLatin1("<%1[\\s>]").arg(tag)),
strindex);
#ifdef POXML_DEBUG
qDebug("index1 %d %d %d", closing_index, starting_index, strindex);
#endif
// when a new start was found, we set the start_index after the next match
// (and set strindex to it later - increasing inside)
if (starting_index != -1) {
starting_index += tag.length() + 1;
while (message.at(starting_index) != '>')
starting_index++;
starting_index++;
}
#ifdef POXML_DEBUG
qDebug("index %d %d %d", closing_index, starting_index, strindex);
#endif
assert(closing_index != -1);
closing_index += 3 + tag.length();
while (message.at(closing_index - 1) != '>')
closing_index++;
if (starting_index == -1) {
strindex = closing_index;
#ifdef POXML_DEBUG
qDebug("set strindex %d", strindex);
#endif
inside--;
if (!inside)
break;
continue;
}
if (closing_index < starting_index)
{
strindex = closing_index;
inside--;
} else {
strindex = starting_index;
inside++;
}
if (!inside)
break;
}
#ifdef POXML_DEBUG
qDebug("split into %s -AAAAAANNNNNNDDDDDD- %s", message.left(strindex).latin1(), message.mid(strindex).latin1());
#endif
msg1.msgid = message.left(strindex);
bool leave = formatMessage(msg1);
msg2.msgid = message.mid(strindex);
msg2.lines.first().offset += strindex;
leave = leave & formatMessage(msg2);
if (msg1.lines.first().end_line > msg2.lines.first().start_line ||
(msg1.lines.first().end_line == msg2.lines.first().start_line &&
msg1.lines.first().end_col > msg2.lines.first().start_col))
{
msg2.lines.first().start_line = msg1.lines.first().end_line;
msg2.lines.first().start_col = msg1.lines.first().end_col;
}
#ifdef POXML_DEBUG
qDebug("splited %d-%d(%s) and %d-%d(%s)", msg1.lines.first().end_line,msg1.lines.first().end_col,
msg1.msgid.latin1(),
msg2.lines.first().start_line,msg2.lines.first().start_col, msg2.msgid.latin1());
#endif
if (leave) {
result.append(msg1);
result.append(msg2);
return result;
}
result = splitMessage(msg1);
result += splitMessage(msg2);
return result;
}
}
if (message.at(message.length() - 1 ) == '>')
{
int endindex = message.length() - 1;
while (endindex >= 0 && (message.at(endindex) != '<' || message.at(endindex + 1) != '/'))
endindex--;
TQString tag = message.mid(endindex + 2, message.length() - endindex - 3);
if (tag.find(' ') > 0 ) {
tag = tag.left(tag.find(' '));
}
#ifdef POXML_DEBUG
qDebug("behind tag %s", tag.latin1());
#endif
if (isCuttingTag(tag))
{
// if the message ends with a cutting tag, this tag has to
// start in between. We split both messages and format them
int strindex = endindex;
int inside = 1;
while (true) {
#ifdef POXML_DEBUG
qDebug("inside %s %d", message.mid(strindex, 35).latin1(), inside);
#endif
int closing_index = message.findRev(TQRegExp(TQString::fromLatin1("</%1[\\s>]").arg(tag)),
strindex - 1);
int starting_index = message.findRev(TQRegExp(TQString::fromLatin1("<%1[\\s>]").arg(tag)),
strindex - 1);
#ifdef POXML_DEBUG
qDebug("index1 %d %d %d", closing_index, starting_index, strindex);
#endif
if (starting_index == -1) {
assert(inside == 1);
break;
}
if (closing_index > starting_index)
{
strindex = closing_index;
inside++;
} else {
strindex = starting_index;
inside--;
}
if (!inside)
break;
}
#ifdef POXML_DEBUG
qDebug("split2 into \"%s\" -AAAAAANNNNNNNNNDDDDDDDDDDD- \"%s\"", message.left(strindex).latin1(), message.mid(strindex).latin1());
#endif
msg1.msgid = message.left(strindex);
formatMessage(msg1);
msg2.msgid = message.mid(strindex);
msg2.lines.first().offset += strindex;
formatMessage(msg2);
if (msg1.lines.first().end_line > msg2.lines.first().start_line ||
(msg1.lines.first().end_line == msg2.lines.first().start_line &&
msg1.lines.first().end_col > msg2.lines.first().start_col))
{
msg1.lines.first().end_line = msg2.lines.first().start_line;
msg1.lines.first().end_col = msg2.lines.first().start_col - 1;
}
#ifdef POXML_DEBUG
qDebug("splited %d-%d(%s) and %d-%d(%s)", msg1.lines.first().end_line,msg1.lines.first().end_col,
msg1.msgid.latin1(),
msg2.lines.first().start_line,msg2.lines.first().start_col, msg2.msgid.latin1());
#endif
result = splitMessage(msg1);
result += splitMessage(msg2);
return result;
}
}
error:
result.append(mb);
return result;
}
bool StructureParser::endElement( const TQString& , const TQString&, const TQString& qName)
{
TQString tname = qName.lower();
// qDebug("endElement %s - %s %d", tname.latin1(), message.latin1(), inside);
if (inside) {
if (!isSingleTag(qName)) {
message += TQString("</%1").arg(tname);
message += TQString(" poxml_line=\"%1\"").arg(locator->lineNumber());
message += TQString(" poxml_col=\"%1\"").arg(locator->columnNumber());
message += ">";
}
}
if (isCuttingTag(tname)) {
inside--;
if (!inside) {
MsgBlock m;
descape(message);
m.msgid = message;
BlockInfo bi;
bi.start_line = startline;
bi.start_col = startcol;
bi.end_line = locator->lineNumber();
bi.end_col = locator->columnNumber() + 1;
bi.offset = m.lines.first().offset;
m.lines.append(bi);
formatMessage(m);
MsgList messages = splitMessage(m);
for (MsgList::Iterator it = messages.begin();
it != messages.end(); ++it)
{
#ifdef POXML_DEBUG
qDebug("parser '%s' %d '%s' %d:%d", (*it).msgid.latin1(), (*it).lines.first().offset, message.mid((*it).lines.first().offset, 15).latin1(), (*it).lines.first().start_line, (*it).lines.first().start_col);
#endif
// if the remaining text still starts with a tag, the poxml_ info
// is most probably more correct
if ((*it).msgid.at(0) == '<' && isClosure((*it).msgid)) {
if (infos_reg.search((*it).msgid) >= 0) {
(*it).lines.first().start_line = infos_reg.cap(1).toInt();
(*it).lines.first().start_col = infos_reg.cap(2).toInt();;
(*it).lines.first().offset = 0;
}
}
(*it).msgid.replace(infos_reg, TQString());
if (!(*it).msgid.isEmpty())
list.append(*it);
}
}
}
return true;
}
bool StructureParser::comment ( const TQString &c )
{
if (c.left(7) != " TRANS:")
return true;
assert(false);
return true;
}
TQString StructureParser::escapeLiterals( const TQString &_contents) {
TQString contents = _contents;
contents.replace(TQRegExp("\n"), "&POXML_LINEFEED;");
contents.replace(TQRegExp("<"), "&POXML_LT;");
contents.replace(TQRegExp(">"), "&POXML_GT;");
contents.replace(TQRegExp("\t"), " ");
contents.replace(TQRegExp(" "), "&POXML_SPACE;");
return contents;
}
TQString StructureParser::descapeLiterals( const TQString &_contents) {
TQString contents = _contents;
contents.replace(TQRegExp("&POXML_LINEFEED;"), "\n");
contents.replace(TQRegExp("&POXML_LT;"), "<");
contents.replace(TQRegExp("&POXML_GT;"), ">");
contents.replace(TQRegExp("&POXML_SPACE;"), " ");
contents.replace(TQRegExp("!POXML_AMP!"), "&");
return contents;
}
void StructureParser::stripWhiteSpace( TQString &contents)
{
contents = contents.stripWhiteSpace();
bool changed;
do {
changed = false;
if (contents.startsWith("&POXML_LINEFEED;")) {
contents = contents.mid(strlen("&POXML_LINEFEED;"), contents.length());
changed = true;
}
if (contents.startsWith("&POXML_SPACE;")) {
contents = contents.mid(strlen("&POXML_SPACE;"), contents.length());
changed = true;
}
if (contents.endsWith("&POXML_LINEFEED;")) {
contents = contents.left(contents.length() - strlen("&POXML_LINEFEED;"));
changed = true;
}
if (contents.endsWith("&POXML_SPACE;")) {
contents = contents.left( contents.length() - strlen("&POXML_SPACE;"));
changed = true;
}
} while (changed);
}
void StructureParser::cleanupTags( TQString &contents )
{
contents.replace(TQRegExp("&"), "!POXML_AMP!");
for (int index = 0; literaltags[index]; index++) {
TQRegExp start(TQString("<%1[\\s>]").arg(literaltags[index]));
TQRegExp end(TQString("</%1[\\s>]").arg(literaltags[index]));
int strindex = 0;
while (true) {
strindex = contents.find(start, strindex);
if (strindex < 0)
break;
while (contents.at(strindex) != '>')
strindex++;
strindex++; // one more
int endindex = contents.find(end, strindex);
TQString part = contents.mid(strindex, endindex - strindex);
TQString newpart = escapeLiterals(part);
contents.replace(strindex, part.length(), newpart);
// this assumes that literal tags to not overlap
strindex = strindex + newpart.length();
}
}
TQRegExp unclosed("</(\\w*)\\s\\s*>");
int index = -1;
while (true) {
index = unclosed.search(contents, index + 1);
if (index < 0)
break;
TQString tag = unclosed.cap(1);
contents.replace(index, unclosed.matchedLength(), TQString("</%1>").arg(tag));
}
TQRegExp start("<((\\s*[^<>\\s])*)\\s\\s*(/*)>");
start.setMinimal(true);
index = -1;
while (true) {
index = start.search(contents, index + 1);
if (index < 0)
break;
TQString tag = start.cap(1);
TQString cut = start.capturedTexts().last();
// qDebug("UNCLO %s %d -%s- -%s-", start.cap(0).latin1(), index, tag.latin1(), cut.latin1());
contents.replace(index, start.matchedLength(), TQString("<%1%2>").arg(tag).arg(cut));
}
TQRegExp singletag("<(\\w*)\\s([^><]*)/>");
index = -1;
while (true) {
index = singletag.search(contents, index + 1);
if (index < 0)
break;
TQString tag = singletag.cap(1);
if (!StructureParser::isSingleTag(tag)) {
contents.replace(index, singletag.matchedLength(), TQString("<%1 %2></%3>").arg(tag).arg(singletag.cap(2)).arg(tag));
}
}
TQRegExp trans_comment("<!-- TRANS:([^<>]*)-->");
index = -1;
while (true) {
index = trans_comment.search(contents, index + 1);
if (index < 0)
break;
TQString msgid = trans_comment.cap(1);
contents.replace(index, trans_comment.matchedLength(), TQString("<trans_comment>%1</trans_comment>").arg(msgid));
}
#ifdef POXML_DEBUG
qDebug("final %s", contents.latin1());
#endif
}
static bool removeEmptyTag( TQString &contents, const TQString & tag)
{
// qDebug("cont %s %s", contents.latin1(), tag.latin1());
TQRegExp empty(TQString("<%1[^>]*>[\\s\n][\\s\n]*</%2\\s*>").arg(tag).arg(tag));
int strindex = 0;
while (true) {
strindex = contents.find(empty, strindex);
if (strindex < 0)
break;
qDebug("found empty tag %s", tag.latin1());
contents.replace(strindex, empty.matchedLength(), " ");
strindex++;
return true;
}
return false;
}
void StructureParser::removeEmptyTags( TQString &contents )
{
bool removed;
do {
removed = false;
for (int index = 0; cuttingtags[index]; index++) {
if (removeEmptyTag(contents, cuttingtags[index])) {
removed = true;
break;
}
}
// as glossterm has two different semantics, it's likely
// to break something when it's cuttingtag
if (removeEmptyTag(contents, "glossterm"))
removed = true;
} while (removed);
}
bool StructureParser::characters(const TQString &ch)
{
if (inside && !ch.isEmpty())
message += ch;
return true;
}
TQString escape(TQString message)
{
message.replace(TQRegExp("\\\\"), "\\\\");
message.replace(TQRegExp("\""), "\\\"");
return message;
}
void outputMsg(const char *prefix, const TQString &message)
{
TQStringList list = TQStringList::split('\n', message, true);
TQString line;
if (list.count() == 1) {
line = list.first();
if (line.isEmpty())
cout << prefix << " \"\"\n";
else
cout << prefix << " \"" << escape(line).utf8().data() << "\"\n";
} else {
cout << prefix << " \"\"\n";
for (TQStringList::ConstIterator it = list.begin(); it != list.end(); it++) {
line = *it;
if (!line.isEmpty()) {
cout << " \"" << escape(line).utf8().data();
if (it == list.fromLast())
cout << "\"\n";
else
cout << "\\n\"\n";
} else {
cout << " \"";
if (it != list.fromLast())
cout << "\\n";
cout << "\"\n";
}
}
}
}
TQString escapePO(TQString msgid)
{
int index = 0;
while (true) {
index = msgid.find("\\n", index);
if (index == -1)
break;
if (index >= 1 && msgid.at(index - 1) == '\\' && msgid.at(index - 2) != '\\') {
msgid.replace(index - 1, 3, "&POXML_LITERALLINEFEED;");
index += 3;
} else
msgid.replace(index, 2, "\n");
}
index = 0;
while (true) {
index = msgid.find("\\\"", index);
if (index == -1)
break;
if (index > 1 && msgid.at(index - 1) == '\\' && msgid.at(index - 2) != '\\')
msgid.replace(index - 1, 3, "&POXML_LITERALTQUOTE;");
else
msgid.replace(index, 2, "\"");
}
index = 0;
while (true) {
index = msgid.find("\\t", index);
if (index == -1)
break;
if (msgid.at(index - 1) == '\\')
msgid.replace(index - 1, 3, "\\t");
else
msgid.replace(index, 2, "\t");
}
index = 0;
while (true) {
index = msgid.find("\\\\", index);
if (index == -1)
break;
msgid.replace(index, 2, "\\");
index += 1;
}
msgid.replace(TQRegExp("&POXML_LITERALLINEFEED;"), "\\n");
msgid.replace(TQRegExp("&POXML_LITERALTQUOTE;"), "\\");
return msgid;
}
MsgList parseXML(const char *filename)
{
StructureParser handler;
TQFile xmlFile( filename );
xmlFile.open(IO_ReadOnly);
TQCString ccontents;
ccontents.fill(0, xmlFile.size() + 1);
memcpy(ccontents.data(), xmlFile.readAll().data(), xmlFile.size());
xmlFile.close();
TQString contents = TQString::fromUtf8( ccontents );
StructureParser::cleanupTags(contents);
while (true) {
int index = contents.find("<!ENTITY");
if (index < 0)
break;
int inside = 0;
int endindex = index + 1;
TQString replacement = "";
while (contents.at(endindex) != '>' || inside)
{
switch (contents.at(endindex).latin1()) {
case '<':
inside++; break;
case '>':
inside--; break;
case '\n':
replacement += '\n';
break;
default:
break;
}
endindex++;
}
endindex++;
contents.replace(index, endindex - index, replacement);
}
TQTextStream ts(contents.utf8(), IO_ReadOnly);
TQXmlInputSource source( ts );
TQXmlSimpleReader reader;
reader.setFeature( "http://trolltech.com/xml/features/report-start-end-entity", true);
reader.setContentHandler( &handler );
reader.setLexicalHandler( &handler );
reader.setDTDHandler( &handler );
// reader.setErrorHandler( &handler );
reader.parse( source );
MsgList english = handler.getList();
bool changed = false;
do {
changed = false;
TQMap<TQString, TQString> msgids;
for (MsgList::Iterator it = english.begin();
it != english.end(); it++)
{
TQMap<TQString,TQString>::Iterator found = msgids.find((*it).msgid);
if ((*it).msgid.length() < 4) {
(*it).msgid = TQString("<%1>").arg((*it).tag) + (*it).msgid +
TQString("</%1>").arg((*it).tag);
changed = true;
break;
}
if (found != msgids.end()) {
if (found.data() != (*it).tag) {
#ifdef POXML_DEBUG
qDebug("same msgid for '%s' and '%s'", found.data().latin1(), (*it).tag.latin1());
#endif
changed = true;
TQString msgid = (*it).msgid;
for (MsgList::Iterator it2 = english.begin();
it2 != english.end(); it2++)
{
if ((*it2).msgid == msgid)
(*it2).msgid = TQString("<%1>").arg((*it2).tag) + msgid + TQString("</%1>").arg((*it2).tag);
}
break;
}
} else {
msgids.insert((*it).msgid, (*it).tag);
}
}
} while (changed);
return english;
}