You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
610 lines
19 KiB
610 lines
19 KiB
/**
|
|
* This file is part of the DOM implementation for KDE.
|
|
*
|
|
* Copyright (C) 2000 Peter Kelly (pmk@post.com)
|
|
* Copyright (C) 2003 Apple Computer, Inc.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public License
|
|
* along with this library; see the file COPYING.LIB. If not, write to
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
|
|
#include "xml_tokenizer.h"
|
|
#include "xml/dom_docimpl.h"
|
|
#include "xml/dom_textimpl.h"
|
|
#include "xml/dom_xmlimpl.h"
|
|
#include "html/html_tableimpl.h"
|
|
#include "html/html_headimpl.h"
|
|
#include "rendering/render_object.h"
|
|
#include "misc/htmltags.h"
|
|
#include "misc/htmlattrs.h"
|
|
#include "misc/loader.h"
|
|
|
|
#include "khtmlview.h"
|
|
#include "khtml_part.h"
|
|
#include <tqvariant.h>
|
|
#include <kdebug.h>
|
|
#include <klocale.h>
|
|
|
|
using namespace DOM;
|
|
using namespace khtml;
|
|
|
|
XMLIncrementalSource::XMLIncrementalSource()
|
|
: TQXmlInputSource(), m_pos( 0 ), m_tqunicode( 0 ),
|
|
m_finished( false )
|
|
{
|
|
}
|
|
|
|
void XMLIncrementalSource::fetchData()
|
|
{
|
|
//just a dummy to overwrite default behavior
|
|
}
|
|
|
|
TQChar XMLIncrementalSource::next()
|
|
{
|
|
if ( m_finished )
|
|
return TQXmlInputSource::EndOfDocument;
|
|
else if ( m_data.length() <= m_pos )
|
|
return TQXmlInputSource::EndOfData;
|
|
else
|
|
return m_tqunicode[m_pos++];
|
|
}
|
|
|
|
void XMLIncrementalSource::setData( const TQString& str )
|
|
{
|
|
m_data = str;
|
|
m_tqunicode = m_data.tqunicode();
|
|
m_pos = 0;
|
|
if ( !str.isEmpty() )
|
|
m_finished = false;
|
|
}
|
|
void XMLIncrementalSource::setData( const TQByteArray& data )
|
|
{
|
|
setData( fromRawData( data, true ) );
|
|
}
|
|
|
|
void XMLIncrementalSource::appendXML( const TQString& str )
|
|
{
|
|
m_data += str;
|
|
m_tqunicode = m_data.tqunicode();
|
|
}
|
|
|
|
TQString XMLIncrementalSource::data()
|
|
{
|
|
return m_data;
|
|
}
|
|
|
|
void XMLIncrementalSource::setFinished( bool finished )
|
|
{
|
|
m_finished = finished;
|
|
}
|
|
|
|
XMLHandler::XMLHandler(DocumentImpl *_doc, KHTMLView *_view)
|
|
: errorLine(0)
|
|
{
|
|
m_doc = _doc;
|
|
m_view = _view;
|
|
pushNode( _doc );
|
|
}
|
|
|
|
XMLHandler::~XMLHandler()
|
|
{
|
|
}
|
|
|
|
void XMLHandler::pushNode( NodeImpl *node )
|
|
{
|
|
m_nodes.push( node );
|
|
}
|
|
|
|
NodeImpl *XMLHandler::popNode()
|
|
{
|
|
return m_nodes.pop();
|
|
}
|
|
|
|
NodeImpl *XMLHandler::currentNode() const
|
|
{
|
|
return m_nodes.current();
|
|
}
|
|
|
|
TQString XMLHandler::errorProtocol()
|
|
{
|
|
return errorProt;
|
|
}
|
|
|
|
|
|
bool XMLHandler::startDocument()
|
|
{
|
|
// at the beginning of parsing: do some initialization
|
|
errorProt = "";
|
|
state = StateInit;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool XMLHandler::startPrefixMapping(const TQString& prefix, const TQString& uri)
|
|
{
|
|
namespaceInfo[prefix].push(uri);
|
|
return true;
|
|
}
|
|
|
|
bool XMLHandler::endPrefixMapping(const TQString& prefix)
|
|
{
|
|
TQValueStack<TQString>& stack = namespaceInfo[prefix];
|
|
stack.pop();
|
|
if (stack.isEmpty())
|
|
namespaceInfo.remove(prefix);
|
|
return true;
|
|
}
|
|
|
|
void XMLHandler::fixUpNSURI(TQString& uri, const TQString& qname)
|
|
{
|
|
/* QXml does not resolve the namespaces of attributes in the same
|
|
tag that preceed the xmlns declaration. This fixes up that case */
|
|
if (uri.isEmpty() && qname.tqfind(':') != -1) {
|
|
TQXmlNamespaceSupport ns;
|
|
TQString localName, prefix;
|
|
ns.splitName(qname, prefix, localName);
|
|
if (namespaceInfo.contains(prefix)) {
|
|
uri = namespaceInfo[prefix].top();
|
|
}
|
|
}
|
|
}
|
|
|
|
bool XMLHandler::startElement( const TQString& namespaceURI, const TQString& /*localName*/,
|
|
const TQString& qName, const TQXmlAttributes& atts )
|
|
{
|
|
if (currentNode()->nodeType() == Node::TEXT_NODE)
|
|
exitText();
|
|
|
|
DOMString nsURI;
|
|
if (!namespaceURI.isNull())
|
|
nsURI = DOMString(namespaceURI);
|
|
else
|
|
// No namespace declared, default to the no namespace
|
|
nsURI = DOMString("");
|
|
ElementImpl *newElement = m_doc->createElementNS(nsURI,qName);
|
|
if (!newElement)
|
|
return false;
|
|
int i;
|
|
for (i = 0; i < atts.length(); i++) {
|
|
int exceptioncode = 0;
|
|
TQString uriString = atts.uri(i);
|
|
TQString qnString = atts.qName(i);
|
|
fixUpNSURI(uriString, qnString);
|
|
DOMString uri(uriString);
|
|
DOMString qn(qnString);
|
|
DOMString val(atts.value(i));
|
|
newElement->setAttributeNS(uri, qn, val, exceptioncode);
|
|
if (exceptioncode) // exception setting attributes
|
|
return false;
|
|
}
|
|
|
|
if (newElement->id() == ID_SCRIPT || newElement->id() == makeId(xhtmlNamespace, ID_SCRIPT))
|
|
static_cast<HTMLScriptElementImpl *>(newElement)->setCreatedByParser(true);
|
|
|
|
//this is tricky. in general the node doesn't have to attach to the one it's in. as far
|
|
//as standards go this is wrong, but there's literally thousands of documents where
|
|
//we see <p><ul>...</ul></p>. the following code is there for those cases.
|
|
//when we can't attach to the currently holding us node we try to attach to its parent
|
|
bool attached = false;
|
|
for ( NodeImpl *current = currentNode(); current; current = current->parent() ) {
|
|
attached = current->addChild( newElement );
|
|
if ( attached )
|
|
break;
|
|
}
|
|
if (attached) {
|
|
if (m_view && !newElement->attached() && !m_doc->hasPendingSheets())
|
|
newElement->attach();
|
|
pushNode( newElement );
|
|
return true;
|
|
}
|
|
else {
|
|
delete newElement;
|
|
return false;
|
|
}
|
|
|
|
// ### DOM spec states: "if there is no markup inside an element's content, the text is contained in a
|
|
// single object implementing the Text interface that is the only child of the element."... do we
|
|
// need to ensure that empty elements always have an empty text child?
|
|
}
|
|
|
|
|
|
bool XMLHandler::endElement( const TQString& /*namespaceURI*/, const TQString& /*localName*/, const TQString& /*qName*/ )
|
|
{
|
|
if (currentNode()->nodeType() == Node::TEXT_NODE)
|
|
exitText();
|
|
|
|
NodeImpl *node = popNode();
|
|
if ( node ) {
|
|
node->close();
|
|
while ( currentNode() && currentNode()->implicitNode() ) //for the implicit HTMLTableSectionElementImpl
|
|
popNode()->close();
|
|
} else
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool XMLHandler::startCDATA()
|
|
{
|
|
if (currentNode()->nodeType() == Node::TEXT_NODE)
|
|
exitText();
|
|
|
|
NodeImpl *newNode = m_doc->createCDATASection(new DOMStringImpl(""));
|
|
if (currentNode()->addChild(newNode)) {
|
|
if (m_view && !newNode->attached() && !m_doc->hasPendingSheets())
|
|
newNode->attach();
|
|
pushNode( newNode );
|
|
return true;
|
|
}
|
|
else {
|
|
delete newNode;
|
|
return false;
|
|
}
|
|
|
|
}
|
|
|
|
bool XMLHandler::endCDATA()
|
|
{
|
|
popNode();
|
|
Q_ASSERT( currentNode() );
|
|
return currentNode();
|
|
}
|
|
|
|
bool XMLHandler::characters( const TQString& ch )
|
|
{
|
|
if (currentNode()->nodeType() == Node::TEXT_NODE ||
|
|
currentNode()->nodeType() == Node::CDATA_SECTION_NODE ||
|
|
enterText()) {
|
|
int exceptioncode = 0;
|
|
static_cast<TextImpl*>(currentNode())->appendData(ch,exceptioncode);
|
|
if (exceptioncode)
|
|
return false;
|
|
return true;
|
|
}
|
|
else {
|
|
// Don't worry about white-space violating DTD
|
|
if (ch.stripWhiteSpace().isEmpty()) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
}
|
|
|
|
bool XMLHandler::comment(const TQString & ch)
|
|
{
|
|
if (currentNode()->nodeType() == Node::TEXT_NODE)
|
|
exitText();
|
|
// ### handle exceptions
|
|
currentNode()->addChild(m_doc->createComment(new DOMStringImpl(ch.tqunicode(), ch.length())));
|
|
return true;
|
|
}
|
|
|
|
bool XMLHandler::processingInstruction(const TQString &target, const TQString &data)
|
|
{
|
|
if (currentNode()->nodeType() == Node::TEXT_NODE)
|
|
exitText();
|
|
// ### handle exceptions
|
|
ProcessingInstructionImpl *pi =
|
|
m_doc->createProcessingInstruction(target, new DOMStringImpl(data.tqunicode(), data.length()));
|
|
currentNode()->addChild(pi);
|
|
pi->checkStyleSheet();
|
|
return true;
|
|
}
|
|
|
|
|
|
TQString XMLHandler::errorString()
|
|
{
|
|
// ### Make better error-messages
|
|
return i18n("the document is not in the correct file format");
|
|
}
|
|
|
|
|
|
bool XMLHandler::fatalError( const TQXmlParseException& exception )
|
|
{
|
|
errorProt += i18n( "fatal parsing error: %1 in line %2, column %3" )
|
|
.arg( exception.message() )
|
|
.arg( exception.lineNumber() )
|
|
.arg( exception.columnNumber() );
|
|
|
|
errorLine = exception.lineNumber();
|
|
errorCol = exception.columnNumber();
|
|
|
|
return false;
|
|
}
|
|
|
|
bool XMLHandler::enterText()
|
|
{
|
|
NodeImpl *newNode = m_doc->createTextNode("");
|
|
if (currentNode()->addChild(newNode)) {
|
|
pushNode( newNode );
|
|
return true;
|
|
}
|
|
else {
|
|
delete newNode;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
void XMLHandler::exitText()
|
|
{
|
|
if ( m_view && !currentNode()->attached() && !m_doc->hasPendingSheets() )
|
|
currentNode()->attach();
|
|
popNode();
|
|
}
|
|
|
|
bool XMLHandler::attributeDecl(const TQString &/*eName*/, const TQString &/*aName*/, const TQString &/*type*/,
|
|
const TQString &/*valueDefault*/, const TQString &/*value*/)
|
|
{
|
|
// qt's xml parser (as of 2.2.3) does not currently give us values for type, valueDefault and
|
|
// value. When it does, we can store these somewhere and have default attributes on elements
|
|
return true;
|
|
}
|
|
|
|
bool XMLHandler::externalEntityDecl(const TQString &/*name*/, const TQString &/*publicId*/, const TQString &/*systemId*/)
|
|
{
|
|
// ### insert these too - is there anything special we have to do here?
|
|
return true;
|
|
}
|
|
|
|
bool XMLHandler::internalEntityDecl(const TQString &name, const TQString &value)
|
|
{
|
|
EntityImpl *e = new EntityImpl(m_doc,name);
|
|
// ### further parse entities inside the value and add them as separate nodes (or entityreferences)?
|
|
e->addChild(m_doc->createTextNode(new DOMStringImpl(value.tqunicode(), value.length())));
|
|
if (m_doc->doctype())
|
|
static_cast<GenericRONamedNodeMapImpl*>(m_doc->doctype()->entities())->addNode(e);
|
|
return true;
|
|
}
|
|
|
|
bool XMLHandler::notationDecl(const TQString &/*name*/, const TQString &/*publicId*/, const TQString &/*systemId*/)
|
|
{
|
|
// ### FIXME
|
|
// if (m_doc->document()->doctype()) {
|
|
// NotationImpl *n = new NotationImpl(m_doc,name,publicId,systemId);
|
|
// static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->notations())->addNode(n);
|
|
// }
|
|
return true;
|
|
}
|
|
|
|
bool XMLHandler::unparsedEntityDecl(const TQString &/*name*/, const TQString &/*publicId*/,
|
|
const TQString &/*systemId*/, const TQString &/*notationName*/)
|
|
{
|
|
// ###
|
|
return true;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
|
|
XMLTokenizer::XMLTokenizer(DOM::DocumentImpl *_doc, KHTMLView *_view)
|
|
: m_handler(_doc,_view)
|
|
{
|
|
m_doc = _doc;
|
|
m_view = _view;
|
|
m_scriptsIt = 0;
|
|
m_cachedScript = 0;
|
|
m_noErrors = true;
|
|
m_reader.setContentHandler( &m_handler );
|
|
m_reader.setLexicalHandler( &m_handler );
|
|
m_reader.setErrorHandler( &m_handler );
|
|
m_reader.setDeclHandler( &m_handler );
|
|
m_reader.setDTDHandler( &m_handler );
|
|
m_reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
|
|
}
|
|
|
|
XMLTokenizer::~XMLTokenizer()
|
|
{
|
|
if (m_scriptsIt)
|
|
delete m_scriptsIt;
|
|
if (m_cachedScript)
|
|
m_cachedScript->deref(this);
|
|
}
|
|
|
|
|
|
void XMLTokenizer::begin()
|
|
{
|
|
// parse xml file
|
|
m_reader.parse( &m_source, true );
|
|
}
|
|
|
|
void XMLTokenizer::write( const TokenizerString &str, bool appendData )
|
|
{
|
|
if ( !m_noErrors && appendData )
|
|
return;
|
|
if ( appendData ) {
|
|
m_source.appendXML( str.toString() );
|
|
|
|
} else {
|
|
m_source.setData( str.toString() );
|
|
}
|
|
m_noErrors = m_reader.parseContinue();
|
|
}
|
|
|
|
void XMLTokenizer::end()
|
|
{
|
|
m_source.setFinished( true );
|
|
//if ( m_noErrors )
|
|
//m_noErrors = m_reader.parseContinue();
|
|
emit finishedParsing();
|
|
}
|
|
|
|
void XMLTokenizer::finish()
|
|
{
|
|
m_source.setFinished( true );
|
|
if (!m_noErrors) {
|
|
// An error occurred during parsing of the code. Display an error page to the user (the DOM
|
|
// tree is created manually and includes an excerpt from the code where the error is located)
|
|
|
|
// ### for multiple error messages, display the code for each (can this happen?)
|
|
|
|
// Clear the document
|
|
int exceptioncode = 0;
|
|
while (m_doc->hasChildNodes())
|
|
static_cast<NodeImpl*>(m_doc)->removeChild(m_doc->firstChild(),exceptioncode);
|
|
|
|
TQString line, errorLocPtr;
|
|
if ( m_handler.errorLine ) {
|
|
TQString xmlCode = m_source.data();
|
|
TQTextIStream stream(&xmlCode);
|
|
for (unsigned long lineno = 0; lineno < m_handler.errorLine-1; lineno++)
|
|
stream.readLine();
|
|
line = stream.readLine();
|
|
|
|
for (unsigned long colno = 0; colno < m_handler.errorCol-1; colno++)
|
|
errorLocPtr += " ";
|
|
errorLocPtr += "^";
|
|
}
|
|
|
|
// Create elements for display
|
|
DocumentImpl *doc = m_doc;
|
|
NodeImpl *html = doc->createElementNS(XHTML_NAMESPACE,"html");
|
|
NodeImpl *body = doc->createElementNS(XHTML_NAMESPACE,"body");
|
|
NodeImpl *h1 = doc->createElementNS(XHTML_NAMESPACE,"h1");
|
|
NodeImpl *headingText = doc->createTextNode(i18n("XML parsing error"));
|
|
NodeImpl *errorText = doc->createTextNode(m_handler.errorProtocol());
|
|
NodeImpl *hr = 0;
|
|
NodeImpl *pre = 0;
|
|
NodeImpl *lineText = 0;
|
|
NodeImpl *errorLocText = 0;
|
|
if ( !line.isNull() ) {
|
|
hr = doc->createElementNS(XHTML_NAMESPACE,"hr");
|
|
pre = doc->createElementNS(XHTML_NAMESPACE,"pre");
|
|
lineText = doc->createTextNode(line+"\n");
|
|
errorLocText = doc->createTextNode(errorLocPtr);
|
|
}
|
|
|
|
// Construct DOM tree. We ignore exceptions as we assume they will not be thrown here (due to the
|
|
// fact we are using a known tag set)
|
|
doc->appendChild(html,exceptioncode);
|
|
html->appendChild(body,exceptioncode);
|
|
if ( body )
|
|
body->appendChild(h1,exceptioncode);
|
|
h1->appendChild(headingText,exceptioncode);
|
|
body->appendChild(errorText,exceptioncode);
|
|
body->appendChild(hr,exceptioncode);
|
|
body->appendChild(pre,exceptioncode);
|
|
if ( pre ) {
|
|
pre->appendChild(lineText,exceptioncode);
|
|
pre->appendChild(errorLocText,exceptioncode);
|
|
}
|
|
|
|
// Close the renderers so that they update their display correctly
|
|
// ### this should not be necessary, but requires changes in the rendering code...
|
|
h1->close();
|
|
if ( pre ) pre->close();
|
|
body->close();
|
|
|
|
m_doc->recalcStyle( NodeImpl::Inherit );
|
|
m_doc->updateRendering();
|
|
|
|
end();
|
|
}
|
|
else {
|
|
// Parsing was successful. Now locate all html <script> tags in the document and execute them
|
|
// one by one
|
|
addScripts(m_doc);
|
|
m_scriptsIt = new TQPtrListIterator<HTMLScriptElementImpl>(m_scripts);
|
|
executeScripts();
|
|
}
|
|
|
|
}
|
|
|
|
void XMLTokenizer::addScripts(NodeImpl *n)
|
|
{
|
|
// Recursively go through the entire document tree, looking for html <script> tags. For each of these
|
|
// that is found, add it to the m_scripts list from which they will be executed
|
|
|
|
if (n->id() == ID_SCRIPT || n->id() == makeId(xhtmlNamespace, ID_SCRIPT)) {
|
|
m_scripts.append(static_cast<HTMLScriptElementImpl*>(n));
|
|
}
|
|
|
|
NodeImpl *child;
|
|
for (child = n->firstChild(); child; child = child->nextSibling())
|
|
addScripts(child);
|
|
}
|
|
|
|
void XMLTokenizer::executeScripts()
|
|
{
|
|
// Iterate through all of the html <script> tags in the document. For those that have a src attribute,
|
|
// start loading the script and return (executeScripts() will be called again once the script is loaded
|
|
// and continue where it left off). For scripts that don't have a src attribute, execute the code
|
|
// inside the tag
|
|
while (m_scriptsIt->current()) {
|
|
DOMString scriptSrc = m_scriptsIt->current()->getAttribute(ATTR_SRC);
|
|
TQString charset = m_scriptsIt->current()->getAttribute(ATTR_CHARSET).string();
|
|
|
|
if (!scriptSrc.isEmpty()) {
|
|
// we have a src attribute
|
|
m_cachedScript = m_doc->docLoader()->requestScript(scriptSrc, charset);
|
|
++(*m_scriptsIt);
|
|
if (m_cachedScript) {
|
|
m_cachedScript->ref(this); // will call executeScripts() again if already cached
|
|
return;
|
|
}
|
|
}
|
|
else {
|
|
// no src attribute - execute from contents of tag
|
|
TQString scriptCode = "";
|
|
NodeImpl *child;
|
|
for (child = m_scriptsIt->current()->firstChild(); child; child = child->nextSibling()) {
|
|
if ( ( child->nodeType() == Node::TEXT_NODE || child->nodeType() == Node::CDATA_SECTION_NODE) &&
|
|
static_cast<TextImpl*>(child)->string() )
|
|
scriptCode += TQConstString(static_cast<TextImpl*>(child)->string()->s,
|
|
static_cast<TextImpl*>(child)->string()->l).string();
|
|
}
|
|
// the script cannot do document.write until we support incremental parsing
|
|
// ### handle the case where the script deletes the node or redirects to
|
|
// another page, etc. (also in notifyFinished())
|
|
// ### the script may add another script node after this one which should be executed
|
|
if (m_view) {
|
|
m_view->part()->executeScript(DOM::Node(), scriptCode);
|
|
}
|
|
++(*m_scriptsIt);
|
|
}
|
|
}
|
|
|
|
// All scripts have finished executing, so calculate the style for the document and close
|
|
// the last element
|
|
m_doc->updateStyleSelector();
|
|
|
|
// We are now finished parsing
|
|
end();
|
|
}
|
|
|
|
void XMLTokenizer::notifyFinished(CachedObject *finishedObj)
|
|
{
|
|
// This is called when a script has finished loading that was requested from executeScripts(). We execute
|
|
// the script, and then call executeScripts() again to continue iterating through the list of scripts in
|
|
// the document
|
|
if (finishedObj == m_cachedScript) {
|
|
DOMString scriptSource = m_cachedScript->script();
|
|
m_cachedScript->deref(this);
|
|
m_cachedScript = 0;
|
|
if (m_view)
|
|
m_view->part()->executeScript(DOM::Node(), scriptSource.string());
|
|
executeScripts();
|
|
}
|
|
}
|
|
|
|
bool XMLTokenizer::isWaitingForScripts() const
|
|
{
|
|
return m_cachedScript != 0;
|
|
}
|
|
|
|
#include "xml_tokenizer.moc"
|
|
|