You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
704 lines
22 KiB
704 lines
22 KiB
/***************************************************************************
|
|
* Copyright (C) 2004 by Puto Moura *
|
|
* mojo@localhost.localdomain *
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of the GNU General Public License as published by *
|
|
* the Free Software Foundation; either version 2 of the License, or *
|
|
* (at your option) any later version. *
|
|
* *
|
|
* This program is distributed in the hope that it will be useful, *
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
|
* GNU General Public License for more details. *
|
|
* *
|
|
* You should have received a copy of the GNU General Public License *
|
|
* along with this program; if not, write to the *
|
|
* Free Software Foundation, Inc., *
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
|
|
***************************************************************************/
|
|
#include "linkchecker.h"
|
|
#include "searchmanager.h"
|
|
#include "../utils/utils.h"
|
|
#include "../parser/htmlparser.h"
|
|
|
|
#include <tqstring.h>
|
|
#include <tqtimer.h>
|
|
#include <tqtextcodec.h>
|
|
#include <tqcstring.h>
|
|
|
|
#include <tdeio/netaccess.h>
|
|
#include <tdeio/global.h>
|
|
#include <tdeio/job.h>
|
|
#include <tdeio/scheduler.h>
|
|
#include <tdeio/slave.h>
|
|
#include <kmimetype.h>
|
|
#include <tdeapplication.h>
|
|
#include <tdelocale.h>
|
|
#include <tdehtml_part.h>
|
|
#include <dom/html_misc.h>
|
|
#include <dom/dom_node.h>
|
|
#include <dom/dom_string.h>
|
|
|
|
|
|
int LinkChecker::count_ = 0;
|
|
|
|
LinkChecker::LinkChecker(LinkStatus* linkstatus, int time_out,
|
|
TQObject *parent, const char *name)
|
|
: TQObject(parent, name), search_manager_(0),
|
|
linkstatus_(linkstatus), t_job_(0), time_out_(time_out), checker_(0), document_charset_(),
|
|
redirection_(false), header_checked_(false), finnished_(false),
|
|
parsing_(false), is_charset_checked_(false), has_defined_charset_(false)
|
|
{
|
|
Q_ASSERT(linkstatus_);
|
|
Q_ASSERT(!linkstatus_->checked());
|
|
|
|
kdDebug(23100) << endl << ++count_ << ": " << "Checking " << linkstatus_->absoluteUrl().url() << endl;
|
|
}
|
|
|
|
LinkChecker::~LinkChecker()
|
|
{}
|
|
|
|
void LinkChecker::setSearchManager(SearchManager* search_manager)
|
|
{
|
|
Q_ASSERT(search_manager);
|
|
search_manager_ = search_manager;
|
|
}
|
|
|
|
void LinkChecker::check()
|
|
{
|
|
Q_ASSERT(!finnished_);
|
|
|
|
KURL url(linkStatus()->absoluteUrl());
|
|
Q_ASSERT(url.isValid());
|
|
|
|
if(url.hasRef()) {
|
|
KMimeType::Ptr mimeType = KMimeType::findByURL(url);
|
|
if(mimeType->is("text/html") || mimeType->is("application/xml")) {
|
|
checkRef();
|
|
return;
|
|
}
|
|
}
|
|
|
|
t_job_ = TDEIO::get(url, false, false);
|
|
|
|
t_job_->addMetaData("PropagateHttpHeader", "true"); // to have the http header
|
|
|
|
if (linkstatus_->parent()) {
|
|
t_job_->addMetaData("referrer", linkstatus_->parent()->absoluteUrl().prettyURL());
|
|
}
|
|
|
|
if(search_manager_->sendIdentification())
|
|
{
|
|
t_job_->addMetaData("SendUserAgent", "true");
|
|
t_job_->addMetaData("UserAgent", search_manager_->userAgent());
|
|
}
|
|
else
|
|
t_job_->addMetaData("SendUserAgent", "false");
|
|
|
|
|
|
TQObject::connect(t_job_, TQT_SIGNAL(data(TDEIO::Job *, const TQByteArray &)),
|
|
this, TQT_SLOT(slotData(TDEIO::Job *, const TQByteArray &)));
|
|
TQObject::connect(t_job_, TQT_SIGNAL(mimetype(TDEIO::Job *, const TQString &)),
|
|
this, TQT_SLOT(slotMimetype(TDEIO::Job *, const TQString &)));
|
|
TQObject::connect(t_job_, TQT_SIGNAL(result(TDEIO::Job *)),
|
|
this, TQT_SLOT(slotResult(TDEIO::Job *)));
|
|
TQObject::connect(t_job_, TQT_SIGNAL(redirection(TDEIO::Job *, const KURL &)),
|
|
this, TQT_SLOT(slotRedirection(TDEIO::Job *, const KURL &)));
|
|
|
|
TQTimer::singleShot( time_out_ * 1000, this, TQT_SLOT(slotTimeOut()) );
|
|
|
|
t_job_->setInteractive(false);
|
|
}
|
|
|
|
void LinkChecker::slotTimeOut()
|
|
{
|
|
if(!finnished_ && !parsing_)
|
|
{
|
|
kdDebug(23100) << "timeout: " << linkstatus_->absoluteUrl().url() << endl;
|
|
if(t_job_ && t_job_->slave())
|
|
kdDebug(23100) << " - " << t_job_->slave() << "/" << t_job_->slave()->slave_pid() << endl;
|
|
else
|
|
kdDebug(23100) << endl;
|
|
|
|
|
|
// Q_ASSERT(t_job_); // can happen: e.g. bad result signal
|
|
if(t_job_->error() != TDEIO::ERR_USER_CANCELED)
|
|
{
|
|
linkstatus_->setErrorOccurred(true);
|
|
linkstatus_->setChecked(true);
|
|
linkstatus_->setError(i18n("Timeout"));
|
|
linkstatus_->setStatus(LinkStatus::TIMEOUT);
|
|
|
|
killJob();
|
|
finnish();
|
|
}
|
|
}
|
|
}
|
|
|
|
void LinkChecker::slotMimetype (TDEIO::Job* /*job*/, const TQString &type)
|
|
{
|
|
if(finnished_)
|
|
return;
|
|
|
|
// kdDebug(23100) << "LinkChecker::slotMimetype:" << type << "-> " << linkstatus_->absoluteUrl().url()
|
|
// << " - " << t_job_->slave() << "/" << t_job_->slave()->slave_pid() << endl;
|
|
|
|
Q_ASSERT(t_job_);
|
|
|
|
LinkStatus* ls = 0;
|
|
/* if(redirection_)
|
|
ls = linkStatus()->redirection();
|
|
else*/
|
|
ls = linkstatus_;
|
|
Q_ASSERT(ls);
|
|
|
|
ls->setMimeType(type);
|
|
KURL url = ls->absoluteUrl();
|
|
|
|
// we doesn't do nothing if file is http or https because we need the header
|
|
// which is only available in the data response
|
|
if(!t_job_->error()) // if a error happened let result() handle that
|
|
{
|
|
if(ls->onlyCheckHeader())
|
|
{
|
|
//kdDebug(23100) << "only check header: " << ls->absoluteUrl().prettyURL() << endl;
|
|
|
|
// file is OK (http can have an error page though job->error() is false)
|
|
if(!url.protocol().startsWith("http"))
|
|
{
|
|
ls->setStatusText("OK");
|
|
ls->setStatus(LinkStatus::SUCCESSFULL);
|
|
|
|
killJob();
|
|
finnish();
|
|
}
|
|
}
|
|
else // !ls->onlyCheckHeader()
|
|
{
|
|
//kdDebug(23100) << "NOT only check header: " << ls->absoluteUrl().prettyURL() << endl;
|
|
|
|
// file is OK (http can have an error page though job->error() is false)
|
|
if(!url.protocol().startsWith("http")) // if not, it have to go trough slotData to get the http header
|
|
{
|
|
// it's not an html page, so we don't want the file content
|
|
if(type != "text/html"/* && type != "text/plain"*/)
|
|
{
|
|
//kdDebug(23100) << "mimetype: " << type << endl;
|
|
ls->setStatusText("OK");
|
|
ls->setStatus(LinkStatus::SUCCESSFULL);
|
|
|
|
killJob();
|
|
finnish();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void LinkChecker::slotData(TDEIO::Job* /*job*/, const TQByteArray& data)
|
|
{
|
|
if(finnished_)
|
|
return;
|
|
|
|
kdDebug(23100) << "LinkChecker::slotData -> " << linkstatus_->absoluteUrl().url()
|
|
<< " - " << t_job_->slave() << "/" << t_job_->slave()->slave_pid() << endl;
|
|
|
|
Q_ASSERT(t_job_);
|
|
|
|
LinkStatus* ls = 0;
|
|
/* if(redirection_)
|
|
ls = linkStatus()->redirection();
|
|
else*/
|
|
ls = linkstatus_;
|
|
Q_ASSERT(ls);
|
|
|
|
KURL url = ls->absoluteUrl();
|
|
|
|
if(!t_job_->error())
|
|
{
|
|
if(ls->onlyCheckHeader())
|
|
{
|
|
Q_ASSERT(header_checked_ == false);
|
|
// the job should have been killed in slotMimetype
|
|
Q_ASSERT(url.protocol() == "http" || url.protocol() == "https");
|
|
|
|
// get the header and quit
|
|
if(url.protocol().startsWith("http"))
|
|
{
|
|
// get the header
|
|
ls->setHttpHeader(getHttpHeader(t_job_));
|
|
|
|
if(t_job_->isErrorPage())
|
|
ls->setIsErrorPage(true);
|
|
|
|
if(header_checked_)
|
|
{
|
|
killJob();
|
|
linkstatus_->setStatus(getHttpStatus());
|
|
linkstatus_->setChecked(true);
|
|
finnish();
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(url.protocol().startsWith("http"))
|
|
{
|
|
if(!header_checked_)
|
|
{
|
|
ls->setHttpHeader(getHttpHeader(t_job_));
|
|
}
|
|
if(ls->mimeType() != "text/html" && header_checked_)
|
|
{
|
|
//kdDebug(23100) << "mimetype of " << ls->absoluteUrl().prettyURL() << ": " << ls->mimeType() << endl;
|
|
ls->setStatus(getHttpStatus());
|
|
killJob();
|
|
finnish(); // if finnish is called before kill what you get is a segfault, don't know why
|
|
return;
|
|
}
|
|
else if(t_job_->isErrorPage() && header_checked_)
|
|
{
|
|
//kdDebug(23100) << "ERROR PAGE" << endl;
|
|
ls->setIsErrorPage(true);
|
|
ls->setStatus(getHttpStatus());
|
|
killJob();
|
|
finnish();
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Q_ASSERT(ls->mimeType() == "text/html");
|
|
}
|
|
if(!is_charset_checked_)
|
|
findDocumentCharset(data);
|
|
|
|
TQTextCodec* codec = 0;
|
|
if(has_defined_charset_)
|
|
codec = TQTextCodec::codecForName(document_charset_);
|
|
if(!codec)
|
|
codec = TQTextCodec::codecForName("iso8859-1"); // default
|
|
|
|
doc_html_ += codec->toUnicode(data);
|
|
}
|
|
}
|
|
}
|
|
|
|
void LinkChecker::findDocumentCharset(TQString const& doc)
|
|
{
|
|
Q_ASSERT(!is_charset_checked_);
|
|
|
|
is_charset_checked_ = true; // only check the first stream of data
|
|
|
|
if(header_checked_)
|
|
document_charset_ = linkstatus_->httpHeader().charset();
|
|
|
|
// try to look in the meta elements
|
|
if(document_charset_.isNull() || document_charset_.isEmpty())
|
|
document_charset_ = HtmlParser::findCharsetInMetaElement(doc);
|
|
|
|
if(!document_charset_.isNull() && !document_charset_.isEmpty())
|
|
has_defined_charset_ = true;
|
|
}
|
|
|
|
// only comes here if an error happened or in case of a clean html page
|
|
// if onlyCheckHeader is false
|
|
void LinkChecker::slotResult(TDEIO::Job* /*job*/)
|
|
{
|
|
if(finnished_)
|
|
return;
|
|
|
|
kdDebug(23100) << "LinkChecker::slotResult -> " << linkstatus_->absoluteUrl().url() << endl;
|
|
|
|
Q_ASSERT(t_job_);
|
|
if(!t_job_)
|
|
return;
|
|
|
|
if(redirection_) {
|
|
if(!processRedirection(redirection_url_)) {
|
|
t_job_ = 0;
|
|
linkstatus_->setChecked(true);
|
|
finnish();
|
|
return;
|
|
}
|
|
}
|
|
|
|
TDEIO::TransferJob* job = t_job_;
|
|
t_job_ = 0;
|
|
|
|
emit jobFinnished(this);
|
|
|
|
if(job->error() == TDEIO::ERR_USER_CANCELED)
|
|
{
|
|
// FIXME This can happen! If the job is non interactive...
|
|
kdWarning(23100) << endl << "Job killed quietly, yet signal result was emited..." << endl;
|
|
kdDebug(23100) << linkstatus_->toString() << endl;
|
|
finnish();
|
|
return;
|
|
}
|
|
|
|
LinkStatus* ls = 0;
|
|
if(redirection_)
|
|
ls = linkStatus()->redirection();
|
|
else
|
|
ls = linkstatus_;
|
|
Q_ASSERT(ls);
|
|
|
|
if(!(!ls->onlyCheckHeader() ||
|
|
job->error() ||
|
|
!header_checked_))
|
|
kdWarning(23100) << ls->toString() << endl;
|
|
|
|
Q_ASSERT(!ls->onlyCheckHeader() || job->error() || !header_checked_);
|
|
|
|
if(ls->isErrorPage())
|
|
kdWarning(23100) << "\n\n" << ls->toString() << endl << endl;
|
|
|
|
Q_ASSERT(!job->isErrorPage());
|
|
|
|
if(job->error())
|
|
{
|
|
kdDebug(23100) << "Job error: " << job->errorString() << endl;
|
|
kdDebug(23100) << "Job error code: " << job->error() << endl;
|
|
|
|
if(job->error() == TDEIO::ERR_IS_DIRECTORY)
|
|
{
|
|
ls->setStatusText("OK");
|
|
ls->setStatus(LinkStatus::SUCCESSFULL);
|
|
}
|
|
else
|
|
{
|
|
ls->setErrorOccurred(true);
|
|
if(job->error() == TDEIO::ERR_SERVER_TIMEOUT)
|
|
ls->setStatus(LinkStatus::TIMEOUT);
|
|
else
|
|
ls->setStatus(LinkStatus::BROKEN);
|
|
|
|
if(job->errorString().isEmpty())
|
|
kdWarning(23100) << "\n\nError string is empty, error = " << job->error() << "\n\n\n";
|
|
if(job->error() != TDEIO::ERR_NO_CONTENT)
|
|
ls->setError(job->errorString());
|
|
else
|
|
ls->setError(i18n("No Content"));
|
|
}
|
|
}
|
|
|
|
else
|
|
{
|
|
if(!ls->absoluteUrl().protocol().startsWith("http")) {
|
|
ls->setStatusText("OK");
|
|
ls->setStatus(LinkStatus::SUCCESSFULL);
|
|
}
|
|
else
|
|
{
|
|
if(!header_checked_)
|
|
{
|
|
kdDebug(23100) << "\n\nheader not received... checking again...\n\n\n";
|
|
//check again
|
|
check();
|
|
return;
|
|
}
|
|
Q_ASSERT(header_checked_);
|
|
|
|
ls->setStatus(getHttpStatus());
|
|
}
|
|
|
|
if(!doc_html_.isNull() && !doc_html_.isEmpty())
|
|
{
|
|
ls->setDocHtml(doc_html_);
|
|
|
|
parsing_ = true;
|
|
HtmlParser parser(doc_html_);
|
|
|
|
if(parser.hasBaseUrl())
|
|
ls->setBaseURI(KURL(parser.baseUrl().url()));
|
|
if(parser.hasTitle())
|
|
ls->setHtmlDocTitle(parser.title().attributeTITLE());
|
|
|
|
ls->setChildrenNodes(parser.nodes());
|
|
parsing_ = false;
|
|
}
|
|
}
|
|
finnish();
|
|
}
|
|
|
|
|
|
void LinkChecker::slotRedirection (TDEIO::Job* /*job*/, const KURL &url)
|
|
{
|
|
kdDebug(23100) << "LinkChecker::slotRedirection -> " <<
|
|
linkstatus_->absoluteUrl().url() << " -> " << url.url() << endl;
|
|
// << " - " << t_job_->slave() << "/" << t_job_->slave()->slave_pid() << endl;
|
|
|
|
redirection_ = true;
|
|
redirection_url_ = url;
|
|
}
|
|
|
|
bool LinkChecker::processRedirection(KURL const& toUrl)
|
|
{
|
|
if(finnished_)
|
|
return true;
|
|
|
|
kdDebug(23100) << "LinkChecker::processRedirection -> " << linkstatus_->absoluteUrl().url() << " -> " << toUrl.url() << endl;
|
|
|
|
Q_ASSERT(t_job_);
|
|
Q_ASSERT(linkstatus_->absoluteUrl().protocol().startsWith("http"));
|
|
Q_ASSERT(redirection_);
|
|
|
|
linkstatus_->setHttpHeader(getHttpHeader(t_job_, false));
|
|
linkstatus_->setIsRedirection(true);
|
|
linkstatus_->setStatusText("redirection");
|
|
linkstatus_->setStatus(LinkStatus::HTTP_REDIRECTION);
|
|
linkstatus_->setChecked(true);
|
|
|
|
LinkStatus* ls_red = new LinkStatus(*linkstatus_);
|
|
ls_red->setAbsoluteUrl(toUrl);
|
|
ls_red->setRootUrl(linkstatus_->rootUrl());
|
|
|
|
if(!linkstatus_->onlyCheckHeader())
|
|
ls_red->setOnlyCheckHeader(false);
|
|
|
|
linkstatus_->setRedirection(ls_red);
|
|
ls_red->setParent(linkstatus_);
|
|
ls_red->setOriginalUrl(toUrl.url());
|
|
|
|
Q_ASSERT(search_manager_);
|
|
|
|
if(search_manager_->localDomain(ls_red->absoluteUrl()))
|
|
ls_red->setExternalDomainDepth(-1);
|
|
else
|
|
{
|
|
if(search_manager_->localDomain(linkstatus_->absoluteUrl()))
|
|
ls_red->setExternalDomainDepth(linkstatus_->externalDomainDepth() + 1);
|
|
else
|
|
ls_red->setExternalDomainDepth(linkstatus_->externalDomainDepth());
|
|
}
|
|
|
|
if(!toUrl.isValid() || search_manager_->existUrl(toUrl, linkstatus_->absoluteUrl()))
|
|
{
|
|
ls_red->setChecked(false);
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
ls_red->setChecked(true);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
void LinkChecker::finnish()
|
|
{
|
|
Q_ASSERT(!t_job_);
|
|
|
|
if(!finnished_)
|
|
{
|
|
kdDebug(23100) << "LinkChecker::finnish -> " << linkstatus_->absoluteUrl().url() << endl;
|
|
|
|
finnished_ = true;
|
|
|
|
if(redirection_)
|
|
Q_ASSERT(linkstatus_->checked());
|
|
else
|
|
linkstatus_->setChecked(true);
|
|
|
|
emit transactionFinished(linkstatus_, this);
|
|
}
|
|
}
|
|
|
|
HttpResponseHeader LinkChecker::getHttpHeader(TDEIO::Job* /*job*/, bool remember_check)
|
|
{
|
|
//kdDebug(23100) << "LinkChecker::getHttpHeader -> " << linkstatus_->absoluteUrl().url() << endl;
|
|
|
|
Q_ASSERT(!finnished_);
|
|
Q_ASSERT(t_job_);
|
|
|
|
TQString header_string = t_job_->queryMetaData("HTTP-Headers");
|
|
// Q_ASSERT(!header_string.isNull() && !header_string.isEmpty());
|
|
// kdDebug(23100) << "HTTP header: " << endl << header_string << endl;
|
|
// kdDebug(23100) << "Keys: " << HttpResponseHeader(header_string).keys() << endl;
|
|
// kdDebug(23100) << "Content-type: " << HttpResponseHeader(header_string).contentType() << endl;
|
|
// kdDebug(23100) << "Content-type: " << HttpResponseHeader(header_string).value("content-type") << endl;
|
|
|
|
if(header_string.isNull() || header_string.isEmpty())
|
|
{
|
|
header_checked_ = false;
|
|
kdWarning(23100) << "header_string.isNull() || header_string.isEmpty(): "
|
|
<< linkstatus_->toString() << endl;
|
|
}
|
|
else if(remember_check)
|
|
header_checked_ = true;
|
|
|
|
return HttpResponseHeader(header_string);
|
|
}
|
|
|
|
void LinkChecker::checkRef()
|
|
{
|
|
KURL url(linkStatus()->absoluteUrl());
|
|
Q_ASSERT(url.hasRef());
|
|
|
|
TQString ref = url.ref();
|
|
if(ref == "" || ref == "top") {
|
|
linkstatus_->setStatusText("OK");
|
|
linkstatus_->setStatus(LinkStatus::SUCCESSFULL);
|
|
finnish();
|
|
return;
|
|
}
|
|
|
|
TQString url_base;
|
|
LinkStatus const* ls_parent = 0;
|
|
int i_ref = -1;
|
|
|
|
if(linkStatus()->originalUrl().startsWith("#"))
|
|
ls_parent = linkStatus()->parent();
|
|
|
|
else
|
|
{
|
|
i_ref = url.url().find("#");
|
|
url_base = url.url().left(i_ref);
|
|
//kdDebug(23100) << "url_base: " << url_base << endl;
|
|
|
|
Q_ASSERT(search_manager_);
|
|
|
|
ls_parent = search_manager_->linkStatus(url_base);
|
|
}
|
|
|
|
if(ls_parent)
|
|
checkRef(ls_parent);
|
|
else
|
|
{
|
|
url = KURL::fromPathOrURL(url.url().left(i_ref));
|
|
checkRef(url);
|
|
}
|
|
}
|
|
|
|
void LinkChecker::checkRef(KURL const& url)
|
|
{
|
|
Q_ASSERT(search_manager_);
|
|
|
|
TQString url_string = url.url();
|
|
TDEHTMLPart* html_part = search_manager_->htmlPart(url_string);
|
|
if(!html_part)
|
|
{
|
|
kdDebug() << "new TDEHTMLPart: " + url_string << endl;
|
|
|
|
html_part = new TDEHTMLPart();
|
|
html_part->setOnlyLocalReferences(true);
|
|
|
|
TQString tmpFile;
|
|
if(TDEIO::NetAccess::download(url, tmpFile, 0))
|
|
{
|
|
TQString doc_html = FileManager::read(tmpFile);
|
|
html_part->begin();
|
|
html_part->write(doc_html);
|
|
html_part->end();
|
|
|
|
TDEIO::NetAccess::removeTempFile(tmpFile);
|
|
}
|
|
else
|
|
{
|
|
kdDebug(23100) << TDEIO::NetAccess::lastErrorString() << endl;
|
|
}
|
|
|
|
search_manager_->addHtmlPart(url_string, html_part);
|
|
}
|
|
|
|
if(hasAnchor(html_part, linkStatus()->absoluteUrl().ref()))
|
|
{
|
|
linkstatus_->setStatusText("OK");
|
|
linkstatus_->setStatus(LinkStatus::SUCCESSFULL);
|
|
}
|
|
else
|
|
{
|
|
linkstatus_->setErrorOccurred(true);
|
|
linkstatus_->setError(i18n( "Link destination not found." ));
|
|
linkstatus_->setStatus(LinkStatus::BROKEN);
|
|
}
|
|
|
|
finnish();
|
|
}
|
|
|
|
void LinkChecker::checkRef(LinkStatus const* linkstatus_parent)
|
|
{
|
|
Q_ASSERT(search_manager_);
|
|
|
|
TQString url_string = linkstatus_parent->absoluteUrl().url();
|
|
TDEHTMLPart* html_part = search_manager_->htmlPart(url_string);
|
|
if(!html_part)
|
|
{
|
|
kdDebug() << "new TDEHTMLPart: " + url_string << endl;
|
|
|
|
html_part = new TDEHTMLPart();
|
|
html_part->setOnlyLocalReferences(true);
|
|
|
|
html_part->begin();
|
|
html_part->write(linkstatus_parent->docHtml());
|
|
html_part->end();
|
|
|
|
search_manager_->addHtmlPart(url_string, html_part);
|
|
}
|
|
|
|
if(hasAnchor(html_part, linkStatus()->absoluteUrl().ref()))
|
|
{
|
|
linkstatus_->setStatusText("OK");
|
|
linkstatus_->setStatus(LinkStatus::SUCCESSFULL);
|
|
}
|
|
else
|
|
{
|
|
linkstatus_->setErrorOccurred(true);
|
|
linkstatus_->setError(i18n( "Link destination not found." ));
|
|
linkstatus_->setStatus(LinkStatus::BROKEN);
|
|
}
|
|
|
|
finnish();
|
|
}
|
|
|
|
bool LinkChecker::hasAnchor(TDEHTMLPart* html_part, TQString const& anchor)
|
|
{
|
|
DOM::HTMLDocument htmlDocument = html_part->htmlDocument();
|
|
DOM::HTMLCollection anchors = htmlDocument.anchors();
|
|
|
|
DOM::DOMString name_ref(anchor);
|
|
Q_ASSERT(!name_ref.isNull());
|
|
|
|
DOM::Node node = anchors.namedItem(name_ref);
|
|
if(node.isNull())
|
|
{
|
|
node = htmlDocument.getElementById(name_ref);
|
|
}
|
|
|
|
if(!node.isNull())
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
|
|
void LinkChecker::killJob()
|
|
{
|
|
if(!t_job_)
|
|
return;
|
|
|
|
TDEIO::TransferJob* aux = t_job_;
|
|
t_job_ = 0;
|
|
aux->disconnect(this);
|
|
aux->kill(true); // quietly
|
|
}
|
|
|
|
LinkStatus::Status LinkChecker::getHttpStatus() const
|
|
{
|
|
TQString status_code = TQString::number(linkstatus_->httpHeader().statusCode());
|
|
|
|
if(status_code[0] == '2')
|
|
return LinkStatus::SUCCESSFULL;
|
|
else if(status_code[0] == '3')
|
|
return LinkStatus::HTTP_REDIRECTION;
|
|
else if(status_code[0] == '4')
|
|
return LinkStatus::HTTP_CLIENT_ERROR;
|
|
else if(status_code[0] == '5')
|
|
return LinkStatus::HTTP_SERVER_ERROR;
|
|
else
|
|
return LinkStatus::UNDETERMINED;
|
|
}
|
|
|
|
#include "linkchecker.moc"
|