You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1009 lines
32 KiB
1009 lines
32 KiB
// #define POXML_DEBUG
|
|
|
|
#include "parser.h"
|
|
#include <iostream>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <tqregexp.h>
|
|
|
|
using namespace std;
|
|
|
|
static const char *singletags[] = {"beginpage","imagedata", "colspec", "spanspec",
|
|
"anchor", "xref", "area",
|
|
"footnoteref", "void", "inlinegraphic",
|
|
"glosssee", "graphic", "xi:include",
|
|
0};
|
|
static const char *cuttingtags[] = {"bridgehead", "trans_comment", "para", "title", "term",
|
|
"entry", "contrib", "keyword", "example",
|
|
"note", "footnote", "caution",
|
|
"informalexample", "remark", "comment",
|
|
"imageobject", "varlistentry", "thead",
|
|
"tbody", "tgroup", "row", "screenshot", "screeninfo",
|
|
"variablelist", "step", "procedure",
|
|
"step", "holder", "listitem", "important",
|
|
"author", "itemizedlist", "orderedlist",
|
|
"caption", "textobject", "mediaobject",
|
|
"tip", "glossdef", "inlinemediaobject",
|
|
"simplelist", "member", "glossentry",
|
|
"areaspec", "corpauthor", "indexterm",
|
|
"calloutlist", "callout", "subtitle",
|
|
"table", "part", "xi:fallback", "primary",
|
|
"secondary", "chapter", "sect1", "sect2",
|
|
"figure", "abstract", "sect3", "sect", "sect4",
|
|
"warning", "preface", "authorgroup", "keywordset",
|
|
"informaltable", "qandaentry", "question", "answer",
|
|
"othercredit", "affiliation", "qandaset",
|
|
"cmdsynopsis", "funcsynopsis", "funcsynopsisinfo" ,
|
|
"epigraph", "attribution", "glossary", "chapterinfo",
|
|
"glossdiv", "blockingquote", "simplesect", "section",
|
|
"qandadiv", "refsect1", "refmeta", "formalpara",
|
|
"refentry", "refnamediv", "refpurpose", "refentrytitle",
|
|
"refmiscinfo", "refsect2", "refsect3", "refsect1info",
|
|
"refsect2info", "refsect3info", "refsection", "refsectioninfo",
|
|
"refsynopsisdiv", "refsysnopsisdivinfo", "remark",
|
|
"revdescription", "glossentry", "partinfo",
|
|
"segmentedlist", "segtitle", "seg", "seglistitem", "screenco",
|
|
0};
|
|
static const char *literaltags[] = {"literallayout", "synopsis", "screen",
|
|
"programlisting", 0};
|
|
|
|
bool StructureParser::fatalError ( const TQXmlParseException &e )
|
|
{
|
|
cerr << "fatalError " << e.message().latin1() << " " << e.lineNumber() << " "
|
|
<< e.columnNumber() << endl;
|
|
return false;
|
|
}
|
|
|
|
bool StructureParser::startDocument()
|
|
{
|
|
infos_reg = TQRegExp("\\s*poxml_line=\"(\\d+)\" poxml_col=\"(\\d+)\"");
|
|
do_not_split_reg = TQRegExp("\\s*condition=\"do-not-split\"");
|
|
message = "";
|
|
inside = 0;
|
|
return true;
|
|
}
|
|
|
|
bool StructureParser::isCuttingTag(const TQString &qName)
|
|
{
|
|
int index = 0;
|
|
while (cuttingtags[index]) {
|
|
if (cuttingtags[index] == qName)
|
|
return true;
|
|
index++;
|
|
}
|
|
return isLiteralTag(qName);
|
|
}
|
|
|
|
bool StructureParser::isSingleTag(const TQString &qName)
|
|
{
|
|
int index = 0;
|
|
while (singletags[index]) {
|
|
if (singletags[index] == qName)
|
|
return true;
|
|
index++;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool StructureParser::isLiteralTag(const TQString &qName)
|
|
{
|
|
int index = 0;
|
|
while (literaltags[index]) {
|
|
if (literaltags[index] == qName)
|
|
return true;
|
|
index++;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool StructureParser::skippedEntity ( const TQString & name )
|
|
{
|
|
if (inside)
|
|
message += TQString("&%1;").arg(name);
|
|
return true;
|
|
}
|
|
|
|
bool StructureParser::startElement( const TQString& , const TQString& ,
|
|
const TQString& qName,
|
|
const TQXmlAttributes & attr )
|
|
{
|
|
TQString tname = qName.lower();
|
|
|
|
bool first = false;
|
|
|
|
if (isCuttingTag(tname)) {
|
|
if (!inside) {
|
|
message = TQString();
|
|
list.pc.increasePara();
|
|
startline = locator->lineNumber();
|
|
startcol = locator->columnNumber();
|
|
first = true;
|
|
}
|
|
inside++;
|
|
}
|
|
|
|
if (inside)
|
|
{
|
|
TQString tmp = "<" + tname;
|
|
for (int i = 0; i < attr.length(); i++) {
|
|
tmp += TQString(" %1=\"%2\"").arg(attr.qName(i)).arg(attr.value(i));
|
|
}
|
|
tmp += TQString(" poxml_line=\"%1\"").arg(locator->lineNumber());
|
|
tmp += TQString(" poxml_col=\"%1\"").arg(locator->columnNumber());
|
|
|
|
if (isSingleTag(qName))
|
|
tmp += "/>";
|
|
else
|
|
tmp += ">";
|
|
message += tmp;
|
|
if (first)
|
|
startcol -= message.length();
|
|
}
|
|
|
|
if (tname == "anchor" || tname.left(4) == "sect" || tname == "chapter")
|
|
if (!attr.value("id").isEmpty()) list.pc.addAnchor(attr.value("id"));
|
|
|
|
return true;
|
|
}
|
|
|
|
bool StructureParser::startCDATA()
|
|
{
|
|
if ( inside )
|
|
message += "<![CDATA[";
|
|
return true;
|
|
}
|
|
|
|
bool StructureParser::endCDATA()
|
|
{
|
|
if ( inside )
|
|
message += "]]>";
|
|
return true;
|
|
}
|
|
|
|
bool StructureParser::isClosure(const TQString &message)
|
|
{
|
|
assert(message.at(0) == '<');
|
|
int endindex = 1;
|
|
while (!message.at(endindex).isSpace() && message.at(endindex) != '>')
|
|
endindex++;
|
|
TQString tag = message.mid(1, endindex - 1);
|
|
return closureTag(message, tag);
|
|
}
|
|
|
|
bool StructureParser::closureTag(const TQString& message, const TQString &tag)
|
|
{
|
|
#ifdef POXML_DEBUG
|
|
qDebug("closureTag %s %s", message.latin1(), tag.latin1());
|
|
#endif
|
|
|
|
int inside = 0;
|
|
uint index = 0;
|
|
while (true)
|
|
{
|
|
int nextclose = message.find(TQRegExp(TQString::fromLatin1("</%1[\\s>]").arg(tag)), index);
|
|
int nextstart = message.find(TQRegExp(TQString::fromLatin1("<%1[>\\s]").arg(tag)), index);
|
|
// qDebug("finding %d %d %d %d", nextstart, nextclose, index, inside);
|
|
if (nextclose == -1) {
|
|
#ifdef POXML_DEBUG
|
|
qDebug("ending on no close anymore %d %d %d %d", (!inside && index >= message.length()), inside, index, message.length());
|
|
#endif
|
|
return !inside && index >= message.length();
|
|
}
|
|
if (nextstart == -1)
|
|
nextstart = message.length() + 1;
|
|
|
|
if (nextstart < nextclose) {
|
|
inside++;
|
|
index = nextstart + 1;
|
|
while (message.at(index) != '>')
|
|
index++;
|
|
index++;
|
|
} else {
|
|
inside--;
|
|
index = nextclose + 1;
|
|
while (message.at(index) != '>')
|
|
index++;
|
|
index++;
|
|
if (!inside) {
|
|
#ifdef POXML_DEBUG
|
|
qDebug("ending on exit %d", index >= message.length());
|
|
#endif
|
|
return index >= message.length();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void StructureParser::descape(TQString &message)
|
|
{
|
|
uint index = 0;
|
|
stripWhiteSpace( message );
|
|
|
|
int inside = 0;
|
|
bool lastws = false;
|
|
|
|
while (index < message.length()) {
|
|
switch (message.at(index).latin1()) {
|
|
case '\n':
|
|
case '\t':
|
|
case '\r':
|
|
if (!inside)
|
|
message[index] = ' ';
|
|
case ' ':
|
|
if (!inside && lastws)
|
|
message[index] = '\010';
|
|
lastws = true;
|
|
break;
|
|
case '<': {
|
|
uint endindex = index+1;
|
|
while (endindex < message.length() && !message.at(endindex).isSpace() &&
|
|
message.at(endindex) != '>')
|
|
endindex++;
|
|
TQString tag = message.mid(index + 1, endindex - index - 1);
|
|
if (tag.at(0) == '/') {
|
|
if (isLiteralTag(tag.mid(1)))
|
|
inside--;
|
|
} else
|
|
if (isLiteralTag(tag))
|
|
inside++;
|
|
break;
|
|
}
|
|
default:
|
|
lastws = false;
|
|
}
|
|
|
|
index++;
|
|
}
|
|
message.replace(TQRegExp("\010"), "");
|
|
}
|
|
|
|
bool StructureParser::formatMessage(MsgBlock &msg) const
|
|
{
|
|
#ifdef POXML_DEBUG
|
|
qDebug("formatMessage %s", msg.msgid.latin1());
|
|
#endif
|
|
|
|
int offset = 0;
|
|
bool changed = false;
|
|
bool recurse = true;
|
|
|
|
if (msg.msgid.isEmpty())
|
|
return true;
|
|
|
|
for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++);
|
|
stripWhiteSpace( msg.msgid );
|
|
|
|
// removing starting single tags
|
|
for (int index = 0; singletags[index]; index++)
|
|
{
|
|
int slen = strlen(singletags[index]);
|
|
|
|
if (msg.msgid.left(slen + 1) == TQString::fromLatin1("<%1").arg(singletags[index]) &&
|
|
!msg.msgid.at( slen + 1 ).isLetterOrNumber() )
|
|
{
|
|
#ifdef POXML_DEBUG
|
|
qDebug("removing single tag %s", singletags[index]);
|
|
#endif
|
|
int strindex = strlen(singletags[index]) + 1;
|
|
while (msg.msgid.at(strindex) != '>')
|
|
strindex++;
|
|
msg.msgid = msg.msgid.mid(strindex + 1);
|
|
changed = true;
|
|
offset += strindex + 1;
|
|
for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++) ;
|
|
stripWhiteSpace( msg.msgid );
|
|
}
|
|
}
|
|
|
|
while (msg.msgid.right(2) == "/>")
|
|
{
|
|
int strindex = msg.msgid.length() - 2;
|
|
while (msg.msgid.at(strindex) != '<')
|
|
strindex--;
|
|
msg.msgid = msg.msgid.left(strindex);
|
|
stripWhiteSpace( msg.msgid ); // only removed space at the end
|
|
changed = true;
|
|
}
|
|
|
|
for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++) ;
|
|
stripWhiteSpace( msg.msgid );
|
|
|
|
while (true) {
|
|
if (msg.msgid.at(0) != '<')
|
|
break;
|
|
if (msg.msgid.at(msg.msgid.length() - 1) != '>')
|
|
break;
|
|
int strindex = 1;
|
|
while (msg.msgid.at(strindex) != ' ' && msg.msgid.at(strindex) != '>')
|
|
strindex++;
|
|
TQString starttag = msg.msgid.mid(1, strindex - 1);
|
|
int endindex = msg.msgid.length() - 2;
|
|
while (msg.msgid.at(endindex) != '<' && msg.msgid.at(endindex + 1) != '/')
|
|
endindex--;
|
|
#ifdef POXML_DEBUG
|
|
qDebug("endIndex %d", endindex);
|
|
#endif
|
|
strindex = endindex;
|
|
TQString orig = msg.msgid;
|
|
|
|
TQString endtag = msg.msgid.mid(endindex + 2, msg.msgid.length() - (endindex + 2) - 1);
|
|
TQString endtag_attr = endtag.mid(endtag.find(' '), endtag.length());
|
|
endtag.replace(infos_reg, "");
|
|
if (endtag == starttag) {
|
|
if (!closureTag(msg.msgid, starttag))
|
|
break;
|
|
|
|
// removing start/end tags
|
|
msg.msgid = msg.msgid.left(endindex);
|
|
strindex = 0;
|
|
while (msg.msgid.at(strindex) != '>')
|
|
strindex++;
|
|
TQString attr = msg.msgid.left(strindex);
|
|
msg.msgid = msg.msgid.mid(strindex + 1);
|
|
offset += strindex + 1;
|
|
for (int index = 0; msg.msgid.at(index) == ' '; index++, offset++) ;
|
|
stripWhiteSpace( msg.msgid );
|
|
msg.tag = starttag;
|
|
|
|
if (infos_reg.search(attr) >= 0) {
|
|
msg.lines.first().start_line = infos_reg.cap(1).toInt();
|
|
msg.lines.first().start_col = infos_reg.cap(2).toInt();
|
|
#ifdef POXML_DEBUG
|
|
qDebug("col %s %s %d", attr.latin1(), msg.msgid.latin1(), msg.lines.first().start_col);
|
|
#endif
|
|
offset = 0;
|
|
|
|
if (infos_reg.search(endtag_attr) >= 0) {
|
|
msg.lines.first().end_line = infos_reg.cap(1).toInt();
|
|
msg.lines.first().end_col = infos_reg.cap(2).toInt() + 1;
|
|
}
|
|
}
|
|
if (do_not_split_reg.search(attr) >= 0) {
|
|
msg.do_not_split = true;
|
|
break;
|
|
}
|
|
|
|
changed = true;
|
|
} else
|
|
break;
|
|
}
|
|
|
|
#ifdef POXML_DEBUG
|
|
qDebug("formatMessage result %s %d %d", msg.msgid.latin1(), changed && recurse, msg.lines.first().start_col);
|
|
#endif
|
|
|
|
msg.lines.first().offset += offset;
|
|
if (msg.do_not_split)
|
|
recurse = false;
|
|
|
|
if (changed && recurse)
|
|
formatMessage(msg);
|
|
|
|
return !recurse; // indicates an abort
|
|
}
|
|
|
|
MsgList StructureParser::splitMessage(const MsgBlock &mb)
|
|
{
|
|
MsgList result;
|
|
|
|
MsgBlock msg1 = mb;
|
|
MsgBlock msg2 = mb;
|
|
|
|
TQString message = mb.msgid;
|
|
|
|
#ifdef POXML_DEBUG
|
|
qDebug("splitMessage %s", message.latin1());
|
|
#endif
|
|
|
|
if (message.at(0) == '<') {
|
|
int endindex = 1;
|
|
while (!message.at(endindex).isSpace() && message.at(endindex) != '>')
|
|
endindex++;
|
|
TQString tag = message.mid(1, endindex - 1);
|
|
|
|
if (closureTag(message, tag))
|
|
goto error;
|
|
|
|
if (isCuttingTag(tag))
|
|
{
|
|
// if the message starts with a cutting tag, this tag has to
|
|
// end in between. We split both messages and format them
|
|
int strindex = endindex;
|
|
strindex++;
|
|
|
|
int inside = 1;
|
|
while (true) {
|
|
#ifdef POXML_DEBUG
|
|
qDebug("inside %s %d", message.mid(strindex, 35).latin1(), inside);
|
|
#endif
|
|
|
|
// the exception for poxml_* attributes is made in the closing tag
|
|
int closing_index = message.find(TQRegExp(TQString::fromLatin1("</%1[\\s>]").arg(tag)),
|
|
strindex);
|
|
int starting_index = message.find(TQRegExp(TQString::fromLatin1("<%1[\\s>]").arg(tag)),
|
|
strindex);
|
|
|
|
#ifdef POXML_DEBUG
|
|
qDebug("index1 %d %d %d", closing_index, starting_index, strindex);
|
|
#endif
|
|
|
|
// when a new start was found, we set the start_index after the next match
|
|
// (and set strindex to it later - increasing inside)
|
|
if (starting_index != -1) {
|
|
starting_index += tag.length() + 1;
|
|
while (message.at(starting_index) != '>')
|
|
starting_index++;
|
|
starting_index++;
|
|
}
|
|
|
|
#ifdef POXML_DEBUG
|
|
qDebug("index %d %d %d", closing_index, starting_index, strindex);
|
|
#endif
|
|
|
|
assert(closing_index != -1);
|
|
closing_index += 3 + tag.length();
|
|
while (message.at(closing_index - 1) != '>')
|
|
closing_index++;
|
|
|
|
if (starting_index == -1) {
|
|
strindex = closing_index;
|
|
#ifdef POXML_DEBUG
|
|
qDebug("set strindex %d", strindex);
|
|
#endif
|
|
inside--;
|
|
if (!inside)
|
|
break;
|
|
continue;
|
|
}
|
|
if (closing_index < starting_index)
|
|
{
|
|
strindex = closing_index;
|
|
inside--;
|
|
} else {
|
|
strindex = starting_index;
|
|
inside++;
|
|
}
|
|
|
|
if (!inside)
|
|
break;
|
|
}
|
|
|
|
#ifdef POXML_DEBUG
|
|
qDebug("split into %s -AAAAAANNNNNNDDDDDD- %s", message.left(strindex).latin1(), message.mid(strindex).latin1());
|
|
#endif
|
|
msg1.msgid = message.left(strindex);
|
|
bool leave = formatMessage(msg1);
|
|
|
|
msg2.msgid = message.mid(strindex);
|
|
msg2.lines.first().offset += strindex;
|
|
leave = leave & formatMessage(msg2);
|
|
|
|
if (msg1.lines.first().end_line > msg2.lines.first().start_line ||
|
|
(msg1.lines.first().end_line == msg2.lines.first().start_line &&
|
|
msg1.lines.first().end_col > msg2.lines.first().start_col))
|
|
{
|
|
msg2.lines.first().start_line = msg1.lines.first().end_line;
|
|
msg2.lines.first().start_col = msg1.lines.first().end_col;
|
|
}
|
|
|
|
#ifdef POXML_DEBUG
|
|
qDebug("splited %d-%d(%s) and %d-%d(%s)", msg1.lines.first().end_line,msg1.lines.first().end_col,
|
|
msg1.msgid.latin1(),
|
|
msg2.lines.first().start_line,msg2.lines.first().start_col, msg2.msgid.latin1());
|
|
#endif
|
|
|
|
if (leave) {
|
|
result.append(msg1);
|
|
result.append(msg2);
|
|
return result;
|
|
}
|
|
result = splitMessage(msg1);
|
|
result += splitMessage(msg2);
|
|
return result;
|
|
}
|
|
|
|
}
|
|
|
|
if (message.at(message.length() - 1 ) == '>')
|
|
{
|
|
int endindex = message.length() - 1;
|
|
while (endindex >= 0 && (message.at(endindex) != '<' || message.at(endindex + 1) != '/'))
|
|
endindex--;
|
|
TQString tag = message.mid(endindex + 2, message.length() - endindex - 3);
|
|
if (tag.find(' ') > 0 ) {
|
|
tag = tag.left(tag.find(' '));
|
|
}
|
|
#ifdef POXML_DEBUG
|
|
qDebug("behind tag %s", tag.latin1());
|
|
#endif
|
|
|
|
if (isCuttingTag(tag))
|
|
{
|
|
// if the message ends with a cutting tag, this tag has to
|
|
// start in between. We split both messages and format them
|
|
int strindex = endindex;
|
|
|
|
int inside = 1;
|
|
while (true) {
|
|
#ifdef POXML_DEBUG
|
|
qDebug("inside %s %d", message.mid(strindex, 35).latin1(), inside);
|
|
#endif
|
|
|
|
int closing_index = message.findRev(TQRegExp(TQString::fromLatin1("</%1[\\s>]").arg(tag)),
|
|
strindex - 1);
|
|
int starting_index = message.findRev(TQRegExp(TQString::fromLatin1("<%1[\\s>]").arg(tag)),
|
|
strindex - 1);
|
|
|
|
#ifdef POXML_DEBUG
|
|
qDebug("index1 %d %d %d", closing_index, starting_index, strindex);
|
|
#endif
|
|
|
|
if (starting_index == -1) {
|
|
assert(inside == 1);
|
|
break;
|
|
}
|
|
|
|
if (closing_index > starting_index)
|
|
{
|
|
strindex = closing_index;
|
|
inside++;
|
|
} else {
|
|
strindex = starting_index;
|
|
inside--;
|
|
}
|
|
|
|
if (!inside)
|
|
break;
|
|
}
|
|
|
|
|
|
#ifdef POXML_DEBUG
|
|
qDebug("split2 into \"%s\" -AAAAAANNNNNNNNNDDDDDDDDDDD- \"%s\"", message.left(strindex).latin1(), message.mid(strindex).latin1());
|
|
#endif
|
|
|
|
msg1.msgid = message.left(strindex);
|
|
formatMessage(msg1);
|
|
|
|
msg2.msgid = message.mid(strindex);
|
|
msg2.lines.first().offset += strindex;
|
|
formatMessage(msg2);
|
|
|
|
if (msg1.lines.first().end_line > msg2.lines.first().start_line ||
|
|
(msg1.lines.first().end_line == msg2.lines.first().start_line &&
|
|
msg1.lines.first().end_col > msg2.lines.first().start_col))
|
|
{
|
|
msg1.lines.first().end_line = msg2.lines.first().start_line;
|
|
msg1.lines.first().end_col = msg2.lines.first().start_col - 1;
|
|
}
|
|
|
|
#ifdef POXML_DEBUG
|
|
qDebug("splited %d-%d(%s) and %d-%d(%s)", msg1.lines.first().end_line,msg1.lines.first().end_col,
|
|
msg1.msgid.latin1(),
|
|
msg2.lines.first().start_line,msg2.lines.first().start_col, msg2.msgid.latin1());
|
|
#endif
|
|
|
|
result = splitMessage(msg1);
|
|
result += splitMessage(msg2);
|
|
|
|
return result;
|
|
}
|
|
}
|
|
error:
|
|
result.append(mb);
|
|
return result;
|
|
}
|
|
|
|
bool StructureParser::endElement( const TQString& , const TQString&, const TQString& qName)
|
|
{
|
|
TQString tname = qName.lower();
|
|
|
|
// qDebug("endElement %s - %s %d", tname.latin1(), message.latin1(), inside);
|
|
|
|
if (inside) {
|
|
if (!isSingleTag(qName)) {
|
|
message += TQString("</%1").arg(tname);
|
|
message += TQString(" poxml_line=\"%1\"").arg(locator->lineNumber());
|
|
message += TQString(" poxml_col=\"%1\"").arg(locator->columnNumber());
|
|
message += ">";
|
|
}
|
|
}
|
|
|
|
if (isCuttingTag(tname)) {
|
|
inside--;
|
|
if (!inside) {
|
|
MsgBlock m;
|
|
descape(message);
|
|
m.msgid = message;
|
|
|
|
BlockInfo bi;
|
|
bi.start_line = startline;
|
|
bi.start_col = startcol;
|
|
bi.end_line = locator->lineNumber();
|
|
bi.end_col = locator->columnNumber() + 1;
|
|
bi.offset = m.lines.first().offset;
|
|
m.lines.append(bi);
|
|
formatMessage(m);
|
|
|
|
MsgList messages = splitMessage(m);
|
|
for (MsgList::Iterator it = messages.begin();
|
|
it != messages.end(); ++it)
|
|
{
|
|
#ifdef POXML_DEBUG
|
|
qDebug("parser '%s' %d '%s' %d:%d", (*it).msgid.latin1(), (*it).lines.first().offset, message.mid((*it).lines.first().offset, 15).latin1(), (*it).lines.first().start_line, (*it).lines.first().start_col);
|
|
#endif
|
|
// if the remaining text still starts with a tag, the poxml_ info
|
|
// is most probably more correct
|
|
if ((*it).msgid.at(0) == '<' && isClosure((*it).msgid)) {
|
|
if (infos_reg.search((*it).msgid) >= 0) {
|
|
(*it).lines.first().start_line = infos_reg.cap(1).toInt();
|
|
(*it).lines.first().start_col = infos_reg.cap(2).toInt();;
|
|
(*it).lines.first().offset = 0;
|
|
}
|
|
}
|
|
(*it).msgid.replace(infos_reg, TQString());
|
|
|
|
if (!(*it).msgid.isEmpty())
|
|
list.append(*it);
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool StructureParser::comment ( const TQString &c )
|
|
{
|
|
if (c.left(7) != " TRANS:")
|
|
return true;
|
|
|
|
assert(false);
|
|
return true;
|
|
}
|
|
|
|
TQString StructureParser::escapeLiterals( const TQString &_contents) {
|
|
TQString contents = _contents;
|
|
|
|
contents.replace(TQRegExp("\n"), "&POXML_LINEFEED;");
|
|
contents.replace(TQRegExp("<"), "&POXML_LT;");
|
|
contents.replace(TQRegExp(">"), "&POXML_GT;");
|
|
contents.replace(TQRegExp("\t"), " ");
|
|
contents.replace(TQRegExp(" "), "&POXML_SPACE;");
|
|
|
|
return contents;
|
|
}
|
|
|
|
TQString StructureParser::descapeLiterals( const TQString &_contents) {
|
|
TQString contents = _contents;
|
|
|
|
contents.replace(TQRegExp("&POXML_LINEFEED;"), "\n");
|
|
contents.replace(TQRegExp("&POXML_LT;"), "<");
|
|
contents.replace(TQRegExp("&POXML_GT;"), ">");
|
|
contents.replace(TQRegExp("&POXML_SPACE;"), " ");
|
|
contents.replace(TQRegExp("!POXML_AMP!"), "&");
|
|
return contents;
|
|
}
|
|
|
|
void StructureParser::stripWhiteSpace( TQString &contents)
|
|
{
|
|
contents = contents.stripWhiteSpace();
|
|
bool changed;
|
|
do {
|
|
changed = false;
|
|
if (contents.startsWith("&POXML_LINEFEED;")) {
|
|
contents = contents.mid(strlen("&POXML_LINEFEED;"), contents.length());
|
|
changed = true;
|
|
}
|
|
if (contents.startsWith("&POXML_SPACE;")) {
|
|
contents = contents.mid(strlen("&POXML_SPACE;"), contents.length());
|
|
changed = true;
|
|
}
|
|
if (contents.endsWith("&POXML_LINEFEED;")) {
|
|
contents = contents.left(contents.length() - strlen("&POXML_LINEFEED;"));
|
|
changed = true;
|
|
}
|
|
if (contents.endsWith("&POXML_SPACE;")) {
|
|
contents = contents.left( contents.length() - strlen("&POXML_SPACE;"));
|
|
changed = true;
|
|
}
|
|
} while (changed);
|
|
}
|
|
|
|
void StructureParser::cleanupTags( TQString &contents )
|
|
{
|
|
contents.replace(TQRegExp("&"), "!POXML_AMP!");
|
|
|
|
for (int index = 0; literaltags[index]; index++) {
|
|
TQRegExp start(TQString("<%1[\\s>]").arg(literaltags[index]));
|
|
TQRegExp end(TQString("</%1[\\s>]").arg(literaltags[index]));
|
|
int strindex = 0;
|
|
while (true) {
|
|
strindex = contents.find(start, strindex);
|
|
if (strindex < 0)
|
|
break;
|
|
while (contents.at(strindex) != '>')
|
|
strindex++;
|
|
strindex++; // one more
|
|
int endindex = contents.find(end, strindex);
|
|
TQString part = contents.mid(strindex, endindex - strindex);
|
|
TQString newpart = escapeLiterals(part);
|
|
contents.replace(strindex, part.length(), newpart);
|
|
// this assumes that literal tags to not overlap
|
|
strindex = strindex + newpart.length();
|
|
}
|
|
}
|
|
|
|
TQRegExp unclosed("</(\\w*)\\s\\s*>");
|
|
int index = -1;
|
|
while (true) {
|
|
index = unclosed.search(contents, index + 1);
|
|
if (index < 0)
|
|
break;
|
|
TQString tag = unclosed.cap(1);
|
|
contents.replace(index, unclosed.matchedLength(), TQString("</%1>").arg(tag));
|
|
}
|
|
|
|
TQRegExp start("<((\\s*[^<>\\s])*)\\s\\s*(/*)>");
|
|
start.setMinimal(true);
|
|
|
|
index = -1;
|
|
while (true) {
|
|
index = start.search(contents, index + 1);
|
|
if (index < 0)
|
|
break;
|
|
TQString tag = start.cap(1);
|
|
TQString cut = start.capturedTexts().last();
|
|
// qDebug("UNCLO %s %d -%s- -%s-", start.cap(0).latin1(), index, tag.latin1(), cut.latin1());
|
|
contents.replace(index, start.matchedLength(), TQString("<%1%2>").arg(tag).arg(cut));
|
|
}
|
|
TQRegExp singletag("<(\\w*)\\s([^><]*)/>");
|
|
|
|
index = -1;
|
|
while (true) {
|
|
index = singletag.search(contents, index + 1);
|
|
if (index < 0)
|
|
break;
|
|
TQString tag = singletag.cap(1);
|
|
if (!StructureParser::isSingleTag(tag)) {
|
|
contents.replace(index, singletag.matchedLength(), TQString("<%1 %2></%3>").arg(tag).arg(singletag.cap(2)).arg(tag));
|
|
}
|
|
}
|
|
|
|
TQRegExp trans_comment("<!-- TRANS:([^<>]*)-->");
|
|
index = -1;
|
|
while (true) {
|
|
index = trans_comment.search(contents, index + 1);
|
|
if (index < 0)
|
|
break;
|
|
TQString msgid = trans_comment.cap(1);
|
|
contents.replace(index, trans_comment.matchedLength(), TQString("<trans_comment>%1</trans_comment>").arg(msgid));
|
|
}
|
|
|
|
#ifdef POXML_DEBUG
|
|
qDebug("final %s", contents.latin1());
|
|
#endif
|
|
|
|
}
|
|
|
|
static bool removeEmptyTag( TQString &contents, const TQString & tag)
|
|
{
|
|
// qDebug("cont %s %s", contents.latin1(), tag.latin1());
|
|
|
|
TQRegExp empty(TQString("<%1[^>]*>[\\s\n][\\s\n]*</%2\\s*>").arg(tag).arg(tag));
|
|
int strindex = 0;
|
|
while (true) {
|
|
strindex = contents.find(empty, strindex);
|
|
if (strindex < 0)
|
|
break;
|
|
qDebug("found empty tag %s", tag.latin1());
|
|
contents.replace(strindex, empty.matchedLength(), " ");
|
|
strindex++;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void StructureParser::removeEmptyTags( TQString &contents )
|
|
{
|
|
bool removed;
|
|
do {
|
|
removed = false;
|
|
|
|
for (int index = 0; cuttingtags[index]; index++) {
|
|
if (removeEmptyTag(contents, cuttingtags[index])) {
|
|
removed = true;
|
|
break;
|
|
}
|
|
}
|
|
// as glossterm has two different semantics, it's likely
|
|
// to break something when it's cuttingtag
|
|
if (removeEmptyTag(contents, "glossterm"))
|
|
removed = true;
|
|
|
|
} while (removed);
|
|
}
|
|
|
|
bool StructureParser::characters(const TQString &ch)
|
|
{
|
|
if (inside && !ch.isEmpty())
|
|
message += ch;
|
|
return true;
|
|
}
|
|
|
|
TQString escape(TQString message)
|
|
{
|
|
message.replace(TQRegExp("\\\\"), "\\\\");
|
|
message.replace(TQRegExp("\""), "\\\"");
|
|
return message;
|
|
}
|
|
|
|
void outputMsg(const char *prefix, const TQString &message)
|
|
{
|
|
TQStringList list = TQStringList::split('\n', message, true);
|
|
TQString line;
|
|
|
|
if (list.count() == 1) {
|
|
line = list.first();
|
|
if (line.isEmpty())
|
|
cout << prefix << " \"\"\n";
|
|
else
|
|
cout << prefix << " \"" << escape(line).utf8().data() << "\"\n";
|
|
} else {
|
|
cout << prefix << " \"\"\n";
|
|
for (TQStringList::ConstIterator it = list.begin(); it != list.end(); it++) {
|
|
line = *it;
|
|
if (!line.isEmpty()) {
|
|
cout << " \"" << escape(line).utf8().data();
|
|
if (it == list.fromLast())
|
|
cout << "\"\n";
|
|
else
|
|
cout << "\\n\"\n";
|
|
} else {
|
|
cout << " \"";
|
|
if (it != list.fromLast())
|
|
cout << "\\n";
|
|
cout << "\"\n";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
TQString escapePO(TQString msgid)
|
|
{
|
|
int index = 0;
|
|
while (true) {
|
|
index = msgid.find("\\n", index);
|
|
if (index == -1)
|
|
break;
|
|
if (index >= 1 && msgid.at(index - 1) == '\\' && msgid.at(index - 2) != '\\') {
|
|
msgid.replace(index - 1, 3, "&POXML_LITERALLINEFEED;");
|
|
index += 3;
|
|
} else
|
|
msgid.replace(index, 2, "\n");
|
|
}
|
|
index = 0;
|
|
while (true) {
|
|
index = msgid.find("\\\"", index);
|
|
if (index == -1)
|
|
break;
|
|
if (index > 1 && msgid.at(index - 1) == '\\' && msgid.at(index - 2) != '\\')
|
|
msgid.replace(index - 1, 3, "&POXML_LITERALTQUOTE;");
|
|
else
|
|
msgid.replace(index, 2, "\"");
|
|
}
|
|
index = 0;
|
|
while (true) {
|
|
index = msgid.find("\\t", index);
|
|
if (index == -1)
|
|
break;
|
|
if (msgid.at(index - 1) == '\\')
|
|
msgid.replace(index - 1, 3, "\\t");
|
|
else
|
|
msgid.replace(index, 2, "\t");
|
|
}
|
|
index = 0;
|
|
while (true) {
|
|
index = msgid.find("\\\\", index);
|
|
if (index == -1)
|
|
break;
|
|
msgid.replace(index, 2, "\\");
|
|
index += 1;
|
|
}
|
|
|
|
msgid.replace(TQRegExp("&POXML_LITERALLINEFEED;"), "\\n");
|
|
msgid.replace(TQRegExp("&POXML_LITERALTQUOTE;"), "\\");
|
|
return msgid;
|
|
}
|
|
|
|
|
|
MsgList parseXML(const char *filename)
|
|
{
|
|
StructureParser handler;
|
|
TQFile xmlFile( filename );
|
|
xmlFile.open(IO_ReadOnly);
|
|
|
|
TQCString ccontents;
|
|
ccontents.fill(0, xmlFile.size() + 1);
|
|
memcpy(ccontents.data(), xmlFile.readAll().data(), xmlFile.size());
|
|
xmlFile.close();
|
|
|
|
TQString contents = TQString::fromUtf8( ccontents );
|
|
StructureParser::cleanupTags(contents);
|
|
|
|
while (true) {
|
|
int index = contents.find("<!ENTITY");
|
|
if (index < 0)
|
|
break;
|
|
int inside = 0;
|
|
int endindex = index + 1;
|
|
TQString replacement = "";
|
|
while (contents.at(endindex) != '>' || inside)
|
|
{
|
|
switch (contents.at(endindex).latin1()) {
|
|
case '<':
|
|
inside++; break;
|
|
case '>':
|
|
inside--; break;
|
|
case '\n':
|
|
replacement += '\n';
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
endindex++;
|
|
}
|
|
endindex++;
|
|
contents.replace(index, endindex - index, replacement);
|
|
}
|
|
|
|
TQTextStream ts(contents.utf8(), IO_ReadOnly);
|
|
TQXmlInputSource source( ts );
|
|
TQXmlSimpleReader reader;
|
|
reader.setFeature( "http://trolltech.com/xml/features/report-start-end-entity", true);
|
|
reader.setContentHandler( &handler );
|
|
reader.setLexicalHandler( &handler );
|
|
reader.setDTDHandler( &handler );
|
|
// reader.setErrorHandler( &handler );
|
|
reader.parse( source );
|
|
MsgList english = handler.getList();
|
|
|
|
bool changed = false;
|
|
|
|
do {
|
|
changed = false;
|
|
TQMap<TQString, TQString> msgids;
|
|
|
|
for (MsgList::Iterator it = english.begin();
|
|
it != english.end(); it++)
|
|
{
|
|
TQMap<TQString,TQString>::Iterator found = msgids.find((*it).msgid);
|
|
if ((*it).msgid.length() < 4) {
|
|
(*it).msgid = TQString("<%1>").arg((*it).tag) + (*it).msgid +
|
|
TQString("</%1>").arg((*it).tag);
|
|
changed = true;
|
|
break;
|
|
}
|
|
if (found != msgids.end()) {
|
|
if (found.data() != (*it).tag) {
|
|
#ifdef POXML_DEBUG
|
|
qDebug("same msgid for '%s' and '%s'", found.data().latin1(), (*it).tag.latin1());
|
|
#endif
|
|
changed = true;
|
|
TQString msgid = (*it).msgid;
|
|
for (MsgList::Iterator it2 = english.begin();
|
|
it2 != english.end(); it2++)
|
|
{
|
|
if ((*it2).msgid == msgid)
|
|
(*it2).msgid = TQString("<%1>").arg((*it2).tag) + msgid + TQString("</%1>").arg((*it2).tag);
|
|
}
|
|
break;
|
|
}
|
|
} else {
|
|
msgids.insert((*it).msgid, (*it).tag);
|
|
}
|
|
}
|
|
} while (changed);
|
|
|
|
return english;
|
|
}
|
|
|