You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdelibs/tdehtml/xml/xml_tokenizer.cpp

610 lines
19 KiB

/**
* This file is part of the DOM implementation for KDE.
*
* Copyright (C) 2000 Peter Kelly (pmk@post.com)
* Copyright (C) 2003 Apple Computer, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "xml_tokenizer.h"
#include "xml/dom_docimpl.h"
#include "xml/dom_textimpl.h"
#include "xml/dom_xmlimpl.h"
#include "html/html_tableimpl.h"
#include "html/html_headimpl.h"
#include "rendering/render_object.h"
#include "misc/htmltags.h"
#include "misc/htmlattrs.h"
#include "misc/loader.h"
#include "tdehtmlview.h"
#include "tdehtml_part.h"
#include <tqvariant.h>
#include <kdebug.h>
#include <klocale.h>
using namespace DOM;
using namespace tdehtml;
XMLIncrementalSource::XMLIncrementalSource()
: TQXmlInputSource(), m_pos( 0 ), m_unicode( 0 ),
m_finished( false )
{
}
void XMLIncrementalSource::fetchData()
{
//just a dummy to overwrite default behavior
}
TQChar XMLIncrementalSource::next()
{
if ( m_finished )
return TQXmlInputSource::EndOfDocument;
else if ( m_data.length() <= m_pos )
return TQXmlInputSource::EndOfData;
else
return m_unicode[m_pos++];
}
void XMLIncrementalSource::setData( const TQString& str )
{
m_data = str;
m_unicode = m_data.unicode();
m_pos = 0;
if ( !str.isEmpty() )
m_finished = false;
}
void XMLIncrementalSource::setData( const TQByteArray& data )
{
setData( fromRawData( data, true ) );
}
void XMLIncrementalSource::appendXML( const TQString& str )
{
m_data += str;
m_unicode = m_data.unicode();
}
TQString XMLIncrementalSource::data()
{
return m_data;
}
void XMLIncrementalSource::setFinished( bool finished )
{
m_finished = finished;
}
XMLHandler::XMLHandler(DocumentImpl *_doc, KHTMLView *_view)
: errorLine(0)
{
m_doc = _doc;
m_view = _view;
pushNode( _doc );
}
XMLHandler::~XMLHandler()
{
}
void XMLHandler::pushNode( NodeImpl *node )
{
m_nodes.push( node );
}
NodeImpl *XMLHandler::popNode()
{
return m_nodes.pop();
}
NodeImpl *XMLHandler::currentNode() const
{
return m_nodes.current();
}
TQString XMLHandler::errorProtocol()
{
return errorProt;
}
bool XMLHandler::startDocument()
{
// at the beginning of parsing: do some initialization
errorProt = "";
state = StateInit;
return true;
}
bool XMLHandler::startPrefixMapping(const TQString& prefix, const TQString& uri)
{
namespaceInfo[prefix].push(uri);
return true;
}
bool XMLHandler::endPrefixMapping(const TQString& prefix)
{
TQValueStack<TQString>& stack = namespaceInfo[prefix];
stack.pop();
if (stack.isEmpty())
namespaceInfo.remove(prefix);
return true;
}
void XMLHandler::fixUpNSURI(TQString& uri, const TQString& qname)
{
/* QXml does not resolve the namespaces of attributes in the same
tag that preceed the xmlns declaration. This fixes up that case */
if (uri.isEmpty() && qname.find(':') != -1) {
TQXmlNamespaceSupport ns;
TQString localName, prefix;
ns.splitName(qname, prefix, localName);
if (namespaceInfo.contains(prefix)) {
uri = namespaceInfo[prefix].top();
}
}
}
bool XMLHandler::startElement( const TQString& namespaceURI, const TQString& /*localName*/,
const TQString& qName, const TQXmlAttributes& atts )
{
if (currentNode()->nodeType() == Node::TEXT_NODE)
exitText();
DOMString nsURI;
if (!namespaceURI.isNull())
nsURI = DOMString(namespaceURI);
else
// No namespace declared, default to the no namespace
nsURI = DOMString("");
ElementImpl *newElement = m_doc->createElementNS(nsURI,qName);
if (!newElement)
return false;
int i;
for (i = 0; i < atts.length(); i++) {
int exceptioncode = 0;
TQString uriString = atts.uri(i);
TQString qnString = atts.qName(i);
fixUpNSURI(uriString, qnString);
DOMString uri(uriString);
DOMString qn(qnString);
DOMString val(atts.value(i));
newElement->setAttributeNS(uri, qn, val, exceptioncode);
if (exceptioncode) // exception setting attributes
return false;
}
if (newElement->id() == ID_SCRIPT || newElement->id() == makeId(xhtmlNamespace, ID_SCRIPT))
static_cast<HTMLScriptElementImpl *>(newElement)->setCreatedByParser(true);
//this is tricky. in general the node doesn't have to attach to the one it's in. as far
//as standards go this is wrong, but there's literally thousands of documents where
//we see <p><ul>...</ul></p>. the following code is there for those cases.
//when we can't attach to the currently holding us node we try to attach to its parent
bool attached = false;
for ( NodeImpl *current = currentNode(); current; current = current->parent() ) {
attached = current->addChild( newElement );
if ( attached )
break;
}
if (attached) {
if (m_view && !newElement->attached() && !m_doc->hasPendingSheets())
newElement->attach();
pushNode( newElement );
return true;
}
else {
delete newElement;
return false;
}
// ### DOM spec states: "if there is no markup inside an element's content, the text is contained in a
// single object implementing the Text interface that is the only child of the element."... do we
// need to ensure that empty elements always have an empty text child?
}
bool XMLHandler::endElement( const TQString& /*namespaceURI*/, const TQString& /*localName*/, const TQString& /*qName*/ )
{
if (currentNode()->nodeType() == Node::TEXT_NODE)
exitText();
NodeImpl *node = popNode();
if ( node ) {
node->close();
while ( currentNode() && currentNode()->implicitNode() ) //for the implicit HTMLTableSectionElementImpl
popNode()->close();
} else
return false;
return true;
}
bool XMLHandler::startCDATA()
{
if (currentNode()->nodeType() == Node::TEXT_NODE)
exitText();
NodeImpl *newNode = m_doc->createCDATASection(new DOMStringImpl(""));
if (currentNode()->addChild(newNode)) {
if (m_view && !newNode->attached() && !m_doc->hasPendingSheets())
newNode->attach();
pushNode( newNode );
return true;
}
else {
delete newNode;
return false;
}
}
bool XMLHandler::endCDATA()
{
popNode();
Q_ASSERT( currentNode() );
return currentNode();
}
bool XMLHandler::characters( const TQString& ch )
{
if (currentNode()->nodeType() == Node::TEXT_NODE ||
currentNode()->nodeType() == Node::CDATA_SECTION_NODE ||
enterText()) {
int exceptioncode = 0;
static_cast<TextImpl*>(currentNode())->appendData(ch,exceptioncode);
if (exceptioncode)
return false;
return true;
}
else {
// Don't worry about white-space violating DTD
if (ch.stripWhiteSpace().isEmpty()) return true;
return false;
}
}
bool XMLHandler::comment(const TQString & ch)
{
if (currentNode()->nodeType() == Node::TEXT_NODE)
exitText();
// ### handle exceptions
currentNode()->addChild(m_doc->createComment(new DOMStringImpl(ch.unicode(), ch.length())));
return true;
}
bool XMLHandler::processingInstruction(const TQString &target, const TQString &data)
{
if (currentNode()->nodeType() == Node::TEXT_NODE)
exitText();
// ### handle exceptions
ProcessingInstructionImpl *pi =
m_doc->createProcessingInstruction(target, new DOMStringImpl(data.unicode(), data.length()));
currentNode()->addChild(pi);
pi->checkStyleSheet();
return true;
}
TQString XMLHandler::errorString()
{
// ### Make better error-messages
return i18n("the document is not in the correct file format");
}
bool XMLHandler::fatalError( const TQXmlParseException& exception )
{
errorProt += i18n( "fatal parsing error: %1 in line %2, column %3" )
.arg( exception.message() )
.arg( exception.lineNumber() )
.arg( exception.columnNumber() );
errorLine = exception.lineNumber();
errorCol = exception.columnNumber();
return false;
}
bool XMLHandler::enterText()
{
NodeImpl *newNode = m_doc->createTextNode("");
if (currentNode()->addChild(newNode)) {
pushNode( newNode );
return true;
}
else {
delete newNode;
return false;
}
}
void XMLHandler::exitText()
{
if ( m_view && !currentNode()->attached() && !m_doc->hasPendingSheets() )
currentNode()->attach();
popNode();
}
bool XMLHandler::attributeDecl(const TQString &/*eName*/, const TQString &/*aName*/, const TQString &/*type*/,
const TQString &/*valueDefault*/, const TQString &/*value*/)
{
// qt's xml parser (as of 2.2.3) does not currently give us values for type, valueDefault and
// value. When it does, we can store these somewhere and have default attributes on elements
return true;
}
bool XMLHandler::externalEntityDecl(const TQString &/*name*/, const TQString &/*publicId*/, const TQString &/*systemId*/)
{
// ### insert these too - is there anything special we have to do here?
return true;
}
bool XMLHandler::internalEntityDecl(const TQString &name, const TQString &value)
{
EntityImpl *e = new EntityImpl(m_doc,name);
// ### further parse entities inside the value and add them as separate nodes (or entityreferences)?
e->addChild(m_doc->createTextNode(new DOMStringImpl(value.unicode(), value.length())));
if (m_doc->doctype())
static_cast<GenericRONamedNodeMapImpl*>(m_doc->doctype()->entities())->addNode(e);
return true;
}
bool XMLHandler::notationDecl(const TQString &/*name*/, const TQString &/*publicId*/, const TQString &/*systemId*/)
{
// ### FIXME
// if (m_doc->document()->doctype()) {
// NotationImpl *n = new NotationImpl(m_doc,name,publicId,systemId);
// static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->notations())->addNode(n);
// }
return true;
}
bool XMLHandler::unparsedEntityDecl(const TQString &/*name*/, const TQString &/*publicId*/,
const TQString &/*systemId*/, const TQString &/*notationName*/)
{
// ###
return true;
}
//------------------------------------------------------------------------------
XMLTokenizer::XMLTokenizer(DOM::DocumentImpl *_doc, KHTMLView *_view)
: m_handler(_doc,_view)
{
m_doc = _doc;
m_view = _view;
m_scriptsIt = 0;
m_cachedScript = 0;
m_noErrors = true;
m_reader.setContentHandler( &m_handler );
m_reader.setLexicalHandler( &m_handler );
m_reader.setErrorHandler( &m_handler );
m_reader.setDeclHandler( &m_handler );
m_reader.setDTDHandler( &m_handler );
m_reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
}
XMLTokenizer::~XMLTokenizer()
{
if (m_scriptsIt)
delete m_scriptsIt;
if (m_cachedScript)
m_cachedScript->deref(this);
}
void XMLTokenizer::begin()
{
// parse xml file
m_reader.parse( &m_source, true );
}
void XMLTokenizer::write( const TokenizerString &str, bool appendData )
{
if ( !m_noErrors && appendData )
return;
if ( appendData ) {
m_source.appendXML( str.toString() );
} else {
m_source.setData( str.toString() );
}
m_noErrors = m_reader.parseContinue();
}
void XMLTokenizer::end()
{
m_source.setFinished( true );
//if ( m_noErrors )
//m_noErrors = m_reader.parseContinue();
emit finishedParsing();
}
void XMLTokenizer::finish()
{
m_source.setFinished( true );
if (!m_noErrors) {
// An error occurred during parsing of the code. Display an error page to the user (the DOM
// tree is created manually and includes an excerpt from the code where the error is located)
// ### for multiple error messages, display the code for each (can this happen?)
// Clear the document
int exceptioncode = 0;
while (m_doc->hasChildNodes())
static_cast<NodeImpl*>(m_doc)->removeChild(m_doc->firstChild(),exceptioncode);
TQString line, errorLocPtr;
if ( m_handler.errorLine ) {
TQString xmlCode = m_source.data();
TQTextIStream stream(&xmlCode);
for (unsigned long lineno = 0; lineno < m_handler.errorLine-1; lineno++)
stream.readLine();
line = stream.readLine();
for (unsigned long colno = 0; colno < m_handler.errorCol-1; colno++)
errorLocPtr += " ";
errorLocPtr += "^";
}
// Create elements for display
DocumentImpl *doc = m_doc;
NodeImpl *html = doc->createElementNS(XHTML_NAMESPACE,"html");
NodeImpl *body = doc->createElementNS(XHTML_NAMESPACE,"body");
NodeImpl *h1 = doc->createElementNS(XHTML_NAMESPACE,"h1");
NodeImpl *headingText = doc->createTextNode(i18n("XML parsing error"));
NodeImpl *errorText = doc->createTextNode(m_handler.errorProtocol());
NodeImpl *hr = 0;
NodeImpl *pre = 0;
NodeImpl *lineText = 0;
NodeImpl *errorLocText = 0;
if ( !line.isNull() ) {
hr = doc->createElementNS(XHTML_NAMESPACE,"hr");
pre = doc->createElementNS(XHTML_NAMESPACE,"pre");
lineText = doc->createTextNode(line+"\n");
errorLocText = doc->createTextNode(errorLocPtr);
}
// Construct DOM tree. We ignore exceptions as we assume they will not be thrown here (due to the
// fact we are using a known tag set)
doc->appendChild(html,exceptioncode);
html->appendChild(body,exceptioncode);
if ( body )
body->appendChild(h1,exceptioncode);
h1->appendChild(headingText,exceptioncode);
body->appendChild(errorText,exceptioncode);
body->appendChild(hr,exceptioncode);
body->appendChild(pre,exceptioncode);
if ( pre ) {
pre->appendChild(lineText,exceptioncode);
pre->appendChild(errorLocText,exceptioncode);
}
// Close the renderers so that they update their display correctly
// ### this should not be necessary, but requires changes in the rendering code...
h1->close();
if ( pre ) pre->close();
body->close();
m_doc->recalcStyle( NodeImpl::Inherit );
m_doc->updateRendering();
end();
}
else {
// Parsing was successful. Now locate all html <script> tags in the document and execute them
// one by one
addScripts(m_doc);
m_scriptsIt = new TQPtrListIterator<HTMLScriptElementImpl>(m_scripts);
executeScripts();
}
}
void XMLTokenizer::addScripts(NodeImpl *n)
{
// Recursively go through the entire document tree, looking for html <script> tags. For each of these
// that is found, add it to the m_scripts list from which they will be executed
if (n->id() == ID_SCRIPT || n->id() == makeId(xhtmlNamespace, ID_SCRIPT)) {
m_scripts.append(static_cast<HTMLScriptElementImpl*>(n));
}
NodeImpl *child;
for (child = n->firstChild(); child; child = child->nextSibling())
addScripts(child);
}
void XMLTokenizer::executeScripts()
{
// Iterate through all of the html <script> tags in the document. For those that have a src attribute,
// start loading the script and return (executeScripts() will be called again once the script is loaded
// and continue where it left off). For scripts that don't have a src attribute, execute the code
// inside the tag
while (m_scriptsIt->current()) {
DOMString scriptSrc = m_scriptsIt->current()->getAttribute(ATTR_SRC);
TQString charset = m_scriptsIt->current()->getAttribute(ATTR_CHARSET).string();
if (!scriptSrc.isEmpty()) {
// we have a src attribute
m_cachedScript = m_doc->docLoader()->requestScript(scriptSrc, charset);
++(*m_scriptsIt);
if (m_cachedScript) {
m_cachedScript->ref(this); // will call executeScripts() again if already cached
return;
}
}
else {
// no src attribute - execute from contents of tag
TQString scriptCode = "";
NodeImpl *child;
for (child = m_scriptsIt->current()->firstChild(); child; child = child->nextSibling()) {
if ( ( child->nodeType() == Node::TEXT_NODE || child->nodeType() == Node::CDATA_SECTION_NODE) &&
static_cast<TextImpl*>(child)->string() )
scriptCode += TQConstString(static_cast<TextImpl*>(child)->string()->s,
static_cast<TextImpl*>(child)->string()->l).string();
}
// the script cannot do document.write until we support incremental parsing
// ### handle the case where the script deletes the node or redirects to
// another page, etc. (also in notifyFinished())
// ### the script may add another script node after this one which should be executed
if (m_view) {
m_view->part()->executeScript(DOM::Node(), scriptCode);
}
++(*m_scriptsIt);
}
}
// All scripts have finished executing, so calculate the style for the document and close
// the last element
m_doc->updateStyleSelector();
// We are now finished parsing
end();
}
void XMLTokenizer::notifyFinished(CachedObject *finishedObj)
{
// This is called when a script has finished loading that was requested from executeScripts(). We execute
// the script, and then call executeScripts() again to continue iterating through the list of scripts in
// the document
if (finishedObj == m_cachedScript) {
DOMString scriptSource = m_cachedScript->script();
m_cachedScript->deref(this);
m_cachedScript = 0;
if (m_view)
m_view->part()->executeScript(DOM::Node(), scriptSource.string());
executeScripts();
}
}
bool XMLTokenizer::isWaitingForScripts() const
{
return m_cachedScript != 0;
}
#include "xml_tokenizer.moc"