/*************************************************************************** copyright : (C) 2006 by Robby Stephenson email : robby@periapsis.org ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of version 2 of the GNU General Public License as * * published by the Free Software Foundation; * * * ***************************************************************************/ #include "dcimporter.h" #include "../collections/bookcollection.h" #include "tellico_xml.h" #include "../tellico_debug.h" using Tellico::Import::DCImporter; DCImporter::DCImporter(const KURL& url_) : XMLImporter(url_) { } DCImporter::DCImporter(const TQString& text_) : XMLImporter(text_) { } DCImporter::DCImporter(const TQDomDocument& dom_) : XMLImporter(dom_) { } Tellico::Data::CollPtr DCImporter::collection() { const TQString& dc = XML::nsDublinCore; const TQString& zing = XML::nsZing; Data::CollPtr c = new Data::BookCollection(true); TQDomDocument doc = domDocument(); TQRegExp authorDateRX(TQString::tqfromLatin1(",?(\\s+\\d{4}-?(?:\\d{4})?\\.?)(.*)$")); TQRegExp dateRX(TQString::tqfromLatin1("\\d{4}")); TQDomNodeList recordList = doc.elementsByTagNameNS(zing, TQString::tqfromLatin1("recordData")); myDebug() << "DCImporter::collection() - number of records: " << recordList.count() << endl; enum { UnknownNS, UseNS, NoNS } useNS = UnknownNS; #define GETELEMENTS(s) (useNS == NoNS) \ ? elem.elementsByTagName(TQString::tqfromLatin1(s)) \ : elem.elementsByTagNameNS(dc, TQString::tqfromLatin1(s)) for(uint i = 0; i < recordList.count(); ++i) { Data::EntryPtr e = new Data::Entry(c); TQDomElement elem = recordList.item(i).toElement(); TQDomNodeList nodeList = GETELEMENTS("title"); if(nodeList.count() == 0) { // no title, skip if(useNS == UnknownNS) { nodeList = elem.elementsByTagName(TQString::tqfromLatin1("title")); if(nodeList.count() > 0) { useNS = NoNS; } else { myDebug() << "DCImporter::collection() - no title, skipping" << endl; continue; } } else { myDebug() << "DCImporter::collection() - no title, skipping" << endl; continue; } } else if(useNS == UnknownNS) { useNS = UseNS; } TQString s = nodeList.item(0).toElement().text(); s.replace('\n', ' '); s = s.simplifyWhiteSpace(); e->setField(TQString::tqfromLatin1("title"), s); nodeList = GETELEMENTS("creator"); TQStringList creators; for(uint j = 0; j < nodeList.count(); ++j) { TQString s = nodeList.item(j).toElement().text(); if(authorDateRX.search(s) > -1) { // check if anything after date like [publisher] if(authorDateRX.cap(2).stripWhiteSpace().isEmpty()) { s.remove(authorDateRX); s = s.simplifyWhiteSpace(); creators << s; } else { myDebug() << "DCImporter::collection() - weird creator, skipping: " << s << endl; } } else { creators << s; } } e->setField(TQString::tqfromLatin1("author"), creators.join(TQString::tqfromLatin1("; "))); nodeList = GETELEMENTS("publisher"); TQStringList publishers; for(uint j = 0; j < nodeList.count(); ++j) { publishers << nodeList.item(j).toElement().text(); } e->setField(TQString::tqfromLatin1("publisher"), publishers.join(TQString::tqfromLatin1("; "))); nodeList = GETELEMENTS("subject"); TQStringList keywords; for(uint j = 0; j < nodeList.count(); ++j) { keywords << nodeList.item(j).toElement().text(); } e->setField(TQString::tqfromLatin1("keyword"), keywords.join(TQString::tqfromLatin1("; "))); nodeList = GETELEMENTS("date"); if(nodeList.count() > 0) { TQString s = nodeList.item(0).toElement().text(); if(dateRX.search(s) > -1) { e->setField(TQString::tqfromLatin1("pub_year"), dateRX.cap()); } } nodeList = GETELEMENTS("description"); if(nodeList.count() > 0) { // no title, skip e->setField(TQString::tqfromLatin1("comments"), nodeList.item(0).toElement().text()); } c->addEntries(e); } #undef GETELEMENTS return c; }