You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tellico/src/translators/dcimporter.cpp

129 lines
4.5 KiB

/***************************************************************************
copyright : (C) 2006 by Robby Stephenson
email : robby@periapsis.org
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of version 2 of the GNU General Public License as *
* published by the Free Software Foundation; *
* *
***************************************************************************/
#include "dcimporter.h"
#include "../collections/bookcollection.h"
#include "tellico_xml.h"
#include "../tellico_debug.h"
using Tellico::Import::DCImporter;
DCImporter::DCImporter(const KURL& url_) : XMLImporter(url_) {
}
DCImporter::DCImporter(const TQString& text_) : XMLImporter(text_) {
}
DCImporter::DCImporter(const TQDomDocument& dom_) : XMLImporter(dom_) {
}
Tellico::Data::CollPtr DCImporter::collection() {
const TQString& dc = XML::nsDublinCore;
const TQString& zing = XML::nsZing;
Data::CollPtr c = new Data::BookCollection(true);
TQDomDocument doc = domDocument();
TQRegExp authorDateRX(TQString::fromLatin1(",?(\\s+\\d{4}-?(?:\\d{4})?\\.?)(.*)$"));
TQRegExp dateRX(TQString::fromLatin1("\\d{4}"));
TQDomNodeList recordList = doc.elementsByTagNameNS(zing, TQString::fromLatin1("recordData"));
myDebug() << "DCImporter::collection() - number of records: " << recordList.count() << endl;
enum { UnknownNS, UseNS, NoNS } useNS = UnknownNS;
#define GETELEMENTS(s) (useNS == NoNS) \
? elem.elementsByTagName(TQString::fromLatin1(s)) \
: elem.elementsByTagNameNS(dc, TQString::fromLatin1(s))
for(uint i = 0; i < recordList.count(); ++i) {
Data::EntryPtr e = new Data::Entry(c);
TQDomElement elem = recordList.item(i).toElement();
TQDomNodeList nodeList = GETELEMENTS("title");
if(nodeList.count() == 0) { // no title, skip
if(useNS == UnknownNS) {
nodeList = elem.elementsByTagName(TQString::fromLatin1("title"));
if(nodeList.count() > 0) {
useNS = NoNS;
} else {
myDebug() << "DCImporter::collection() - no title, skipping" << endl;
continue;
}
} else {
myDebug() << "DCImporter::collection() - no title, skipping" << endl;
continue;
}
} else if(useNS == UnknownNS) {
useNS = UseNS;
}
TQString s = nodeList.item(0).toElement().text();
s.replace('\n', ' ');
s = s.simplifyWhiteSpace();
e->setField(TQString::fromLatin1("title"), s);
nodeList = GETELEMENTS("creator");
TQStringList creators;
for(uint j = 0; j < nodeList.count(); ++j) {
TQString s = nodeList.item(j).toElement().text();
if(authorDateRX.search(s) > -1) {
// check if anything after date like [publisher]
if(authorDateRX.cap(2).stripWhiteSpace().isEmpty()) {
s.remove(authorDateRX);
s = s.simplifyWhiteSpace();
creators << s;
} else {
myDebug() << "DCImporter::collection() - weird creator, skipping: " << s << endl;
}
} else {
creators << s;
}
}
e->setField(TQString::fromLatin1("author"), creators.join(TQString::fromLatin1("; ")));
nodeList = GETELEMENTS("publisher");
TQStringList publishers;
for(uint j = 0; j < nodeList.count(); ++j) {
publishers << nodeList.item(j).toElement().text();
}
e->setField(TQString::fromLatin1("publisher"), publishers.join(TQString::fromLatin1("; ")));
nodeList = GETELEMENTS("subject");
TQStringList keywords;
for(uint j = 0; j < nodeList.count(); ++j) {
keywords << nodeList.item(j).toElement().text();
}
e->setField(TQString::fromLatin1("keyword"), keywords.join(TQString::fromLatin1("; ")));
nodeList = GETELEMENTS("date");
if(nodeList.count() > 0) {
TQString s = nodeList.item(0).toElement().text();
if(dateRX.search(s) > -1) {
e->setField(TQString::fromLatin1("pub_year"), dateRX.cap());
}
}
nodeList = GETELEMENTS("description");
if(nodeList.count() > 0) { // no title, skip
e->setField(TQString::fromLatin1("comments"), nodeList.item(0).toElement().text());
}
c->addEntries(e);
}
#undef GETELEMENTS
return c;
}