/*************************************************************************** copyright : (C) 2007 by Robby Stephenson email : robby@periapsis.org ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of version 2 of the GNU General Public License as * * published by the Free Software Foundation; * * * ***************************************************************************/ #include "arxivfetcher.h" #include "messagehandler.h" #include "../translators/xslthandler.h" #include "../translators/tellicoimporter.h" #include "../tellico_kernel.h" #include "../tellico_utils.h" #include "../collection.h" #include "../entry.h" #include "../core/netaccess.h" #include "../imagefactory.h" #include "../tellico_debug.h" #include #include #include #include #include #include //#define ARXIV_TEST namespace { static const int ARXIV_RETURNS_PER_REQUEST = 20; static const char* ARXIV_BASE_URL = "http://export.arxiv.org/api/query"; } using Tellico::Fetch::ArxivFetcher; ArxivFetcher::ArxivFetcher(TQObject* parent_) : Fetcher(parent_), m_xsltHandler(0), m_start(0), m_job(0), m_started(false) { } ArxivFetcher::~ArxivFetcher() { delete m_xsltHandler; m_xsltHandler = 0; } TQString ArxivFetcher::defaultName() { return i18n("arXiv.org"); } TQString ArxivFetcher::source() const { return m_name.isEmpty() ? defaultName() : m_name; } bool ArxivFetcher::canFetch(int type) const { return type == Data::Collection::Bibtex; } void ArxivFetcher::readConfigHook(const TDEConfigGroup&) { } void ArxivFetcher::search(FetchKey key_, const TQString& value_) { m_key = key_; m_value = value_.stripWhiteSpace(); m_started = true; m_start = 0; m_total = -1; doSearch(); } void ArxivFetcher::continueSearch() { m_started = true; doSearch(); } void ArxivFetcher::doSearch() { if(!canFetch(Kernel::self()->collectionType())) { message(i18n("%1 does not allow searching for this collection type.").arg(source()), MessageHandler::Warning); stop(); return; } m_data.truncate(0); // myDebug() << "ArxivFetcher::search() - value = " << value_ << endl; KURL u = searchURL(m_key, m_value); if(u.isEmpty()) { stop(); return; } m_job = TDEIO::get(u, false, false); connect(m_job, TQT_SIGNAL(data(TDEIO::Job*, const TQByteArray&)), TQT_SLOT(slotData(TDEIO::Job*, const TQByteArray&))); connect(m_job, TQT_SIGNAL(result(TDEIO::Job*)), TQT_SLOT(slotComplete(TDEIO::Job*))); } void ArxivFetcher::stop() { if(!m_started) { return; } // myDebug() << "ArxivFetcher::stop()" << endl; if(m_job) { m_job->kill(); m_job = 0; } m_data.truncate(0); m_started = false; emit signalDone(this); } void ArxivFetcher::slotData(TDEIO::Job*, const TQByteArray& data_) { TQDataStream stream(m_data, IO_WriteOnly | IO_Append); stream.writeRawBytes(data_.data(), data_.size()); } void ArxivFetcher::slotComplete(TDEIO::Job* job_) { // myDebug() << "ArxivFetcher::slotComplete()" << endl; // since the fetch is done, don't worry about holding the job pointer m_job = 0; if(job_->error()) { job_->showErrorDialog(Kernel::self()->widget()); stop(); return; } if(m_data.isEmpty()) { myDebug() << "ArxivFetcher::slotComplete() - no data" << endl; stop(); return; } #if 0 kdWarning() << "Remove debug from arxivfetcher.cpp" << endl; TQFile f(TQString::fromLatin1("/tmp/test.xml")); if(f.open(IO_WriteOnly)) { TQTextStream t(&f); t.setEncoding(TQTextStream::UnicodeUTF8); t << TQCString(m_data, m_data.size()+1); } f.close(); #endif if(!m_xsltHandler) { initXSLTHandler(); if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading stop(); return; } } if(m_total == -1) { TQDomDocument dom; if(!dom.setContent(m_data, true /*namespace*/)) { kdWarning() << "ArxivFetcher::slotComplete() - server did not return valid XML." << endl; return; } // total is top level element, with attribute totalResultsAvailable TQDomNodeList list = dom.elementsByTagNameNS(TQString::fromLatin1("http://a9.com/-/spec/opensearch/1.1/"), TQString::fromLatin1("totalResults")); if(list.count() > 0) { m_total = list.item(0).toElement().text().toInt(); } } // assume result is always utf-8 TQString str = m_xsltHandler->applyStylesheet(TQString::fromUtf8(m_data, m_data.size())); Import::TellicoImporter imp(str); Data::CollPtr coll = imp.collection(); if(!coll) { myDebug() << "ArxivFetcher::slotComplete() - no valid result" << endl; stop(); return; } Data::EntryVec entries = coll->entries(); for(Data::EntryVec::Iterator entry = entries.begin(); entry != entries.end(); ++entry) { if(!m_started) { // might get aborted break; } TQString desc = entry->field(TQString::fromLatin1("author")) + TQChar('/') + entry->field(TQString::fromLatin1("publisher")); if(!entry->field(TQString::fromLatin1("year")).isEmpty()) { desc += TQChar('/') + entry->field(TQString::fromLatin1("year")); } SearchResult* r = new SearchResult(this, entry->title(), desc, entry->field(TQString::fromLatin1("isbn"))); m_entries.insert(r->uid, Data::EntryPtr(entry)); emit signalResultFound(r); } m_start = m_entries.count(); m_hasMoreResults = m_start < m_total; stop(); // required } Tellico::Data::EntryPtr ArxivFetcher::fetchEntry(uint uid_) { Data::EntryPtr entry = m_entries[uid_]; // if URL but no cover image, fetch it if(!entry->field(TQString::fromLatin1("url")).isEmpty()) { Data::CollPtr coll = entry->collection(); Data::FieldPtr field = coll->fieldByName(TQString::fromLatin1("cover")); if(!field && !coll->imageFields().isEmpty()) { field = coll->imageFields().front(); } else if(!field) { field = new Data::Field(TQString::fromLatin1("cover"), i18n("Front Cover"), Data::Field::Image); coll->addField(field); } if(entry->field(field).isEmpty()) { TQPixmap pix = NetAccess::filePreview(entry->field(TQString::fromLatin1("url"))); if(!pix.isNull()) { TQString id = ImageFactory::addImage(pix, TQString::fromLatin1("PNG")); if(!id.isEmpty()) { entry->setField(field, id); } } } } TQRegExp versionRx(TQString::fromLatin1("v\\d+$")); // if the original search was not for a versioned ID, remove it if(m_key != ArxivID || !m_value.contains(versionRx)) { TQString arxiv = entry->field(TQString::fromLatin1("arxiv")); arxiv.remove(versionRx); entry->setField(TQString::fromLatin1("arxiv"), arxiv); } return entry; } void ArxivFetcher::initXSLTHandler() { TQString xsltfile = locate("appdata", TQString::fromLatin1("arxiv2tellico.xsl")); if(xsltfile.isEmpty()) { kdWarning() << "ArxivFetcher::initXSLTHandler() - can not locate arxiv2tellico.xsl." << endl; return; } KURL u; u.setPath(xsltfile); delete m_xsltHandler; m_xsltHandler = new XSLTHandler(u); if(!m_xsltHandler->isValid()) { kdWarning() << "ArxivFetcher::initXSLTHandler() - error in arxiv2tellico.xsl." << endl; delete m_xsltHandler; m_xsltHandler = 0; return; } } KURL ArxivFetcher::searchURL(FetchKey key_, const TQString& value_) const { KURL u(TQString::fromLatin1(ARXIV_BASE_URL)); u.addQueryItem(TQString::fromLatin1("start"), TQString::number(m_start)); u.addQueryItem(TQString::fromLatin1("max_results"), TQString::number(ARXIV_RETURNS_PER_REQUEST)); // quotes should be used if spaces are present, just use all the time TQString quotedValue = '"' + value_ + '"'; switch(key_) { case Title: u.addQueryItem(TQString::fromLatin1("search_query"), TQString::fromLatin1("ti:%1").arg(quotedValue)); break; case Person: u.addQueryItem(TQString::fromLatin1("search_query"), TQString::fromLatin1("au:%1").arg(quotedValue)); break; case Keyword: // keyword gets to use all the words without being quoted u.addQueryItem(TQString::fromLatin1("search_query"), TQString::fromLatin1("all:%1").arg(value_)); break; case ArxivID: { // remove prefix and/or version number TQString value = value_; value.remove(TQRegExp(TQString::fromLatin1("^arxiv:"), false)); value.remove(TQRegExp(TQString::fromLatin1("v\\d+$"))); u.addQueryItem(TQString::fromLatin1("search_query"), TQString::fromLatin1("id:%1").arg(value)); } break; default: kdWarning() << "ArxivFetcher::search() - key not recognized: " << m_key << endl; return KURL(); } #ifdef ARXIV_TEST u = KURL::fromPathOrURL(TQString::fromLatin1("/home/robby/arxiv.xml")); #endif myDebug() << "ArxivFetcher::search() - url: " << u.url() << endl; return u; } void ArxivFetcher::updateEntry(Data::EntryPtr entry_) { TQString id = entry_->field(TQString::fromLatin1("arxiv")); if(!id.isEmpty()) { search(Fetch::ArxivID, id); return; } // optimistically try searching for title and rely on Collection::sameEntry() to figure things out TQString t = entry_->field(TQString::fromLatin1("title")); if(!t.isEmpty()) { search(Fetch::Title, t); return; } myDebug() << "ArxivFetcher::updateEntry() - insufficient info to search" << endl; emit signalDone(this); // always need to emit this if not continuing with the search } void ArxivFetcher::updateEntrySynchronous(Data::EntryPtr entry) { if(!entry) { return; } TQString arxiv = entry->field(TQString::fromLatin1("arxiv")); if(arxiv.isEmpty()) { return; } KURL u = searchURL(ArxivID, arxiv); TQString xml = FileHandler::readTextFile(u, true, true); if(xml.isEmpty()) { return; } if(!m_xsltHandler) { initXSLTHandler(); if(!m_xsltHandler) { // probably an error somewhere in the stylesheet loading return; } } // assume result is always utf-8 TQString str = m_xsltHandler->applyStylesheet(xml); Import::TellicoImporter imp(str); Data::CollPtr coll = imp.collection(); if(coll && coll->entryCount() > 0) { myLog() << "ArxivFetcher::updateEntrySynchronous() - found Arxiv result, merging" << endl; Data::Collection::mergeEntry(entry, coll->entries().front(), false /*overwrite*/); // the arxiv id might have a version# entry->setField(TQString::fromLatin1("arxiv"), coll->entries().front()->field(TQString::fromLatin1("arxiv"))); } } Tellico::Fetch::ConfigWidget* ArxivFetcher::configWidget(TQWidget* parent_) const { return new ArxivFetcher::ConfigWidget(parent_, this); } ArxivFetcher::ConfigWidget::ConfigWidget(TQWidget* parent_, const ArxivFetcher*) : Fetch::ConfigWidget(parent_) { TQVBoxLayout* l = new TQVBoxLayout(optionsWidget()); l->addWidget(new TQLabel(i18n("This source has no options."), optionsWidget())); l->addStretch(); } void ArxivFetcher::ConfigWidget::saveConfig(TDEConfigGroup&) { } TQString ArxivFetcher::ConfigWidget::preferredName() const { return ArxivFetcher::defaultName(); } #include "arxivfetcher.moc"