You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tellico/src/fetch/z3950connection.cpp

504 lines
17 KiB

/***************************************************************************
copyright : (C) 2005-2006 by Robby Stephenson
email : $EMAIL
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of version 2 of the GNU General Public License as *
* published by the Free Software Foundation; *
* *
***************************************************************************/
#include "z3950connection.h"
#include "z3950fetcher.h"
#include "messagehandler.h"
#include "../latin1literal.h"
#include "../tellico_debug.h"
#include "../iso5426converter.h"
#include "../iso6937converter.h"
#include <config.h>
#ifdef HAVE_YAZ
extern "C" {
#include <yaz/zoom.h>
#include <yaz/marcdisp.h>
#include <yaz/yaz-version.h>
}
#endif
#include <klocale.h>
#include <tqfile.h>
namespace {
static const size_t Z3950_DEFAULT_MAX_RECORDS = 20;
}
using Tellico::Fetch::Z3950ResultFound;
using Tellico::Fetch::Z3950Connection;
Z3950ResultFound::Z3950ResultFound(const TQString& s) : TQCustomEvent(uid())
, m_result(TQDeepCopy<TQString>(s)) {
++Z3950Connection::resultsLeft;
}
Z3950ResultFound::~Z3950ResultFound() {
--Z3950Connection::resultsLeft;
}
class Z3950Connection::Private {
public:
Private() {}
#ifdef HAVE_YAZ
~Private() {
ZOOM_options_destroy(conn_opt);
ZOOM_connection_destroy(conn);
};
ZOOM_options conn_opt;
ZOOM_connection conn;
#endif
};
int Z3950Connection::resultsLeft = 0;
// since the character set goes into a yaz api call
// I'm paranoid about user insertions, so just grab 64
// characters at most
Z3950Connection::Z3950Connection(Z3950Fetcher* fetcher,
const TQString& host,
uint port,
const TQString& dbname,
const TQString& sourceCharSet,
const TQString& syntax,
const TQString& esn)
: TQThread()
, d(new Private())
, m_connected(false)
, m_aborted(false)
, m_fetcher(fetcher)
, m_host(TQDeepCopy<TQString>(host))
, m_port(port)
, m_dbname(TQDeepCopy<TQString>(dbname))
, m_sourceCharSet(TQDeepCopy<TQString>(sourceCharSet.left(64)))
, m_syntax(TQDeepCopy<TQString>(syntax))
, m_esn(TQDeepCopy<TQString>(esn))
, m_start(0)
, m_limit(Z3950_DEFAULT_MAX_RECORDS)
, m_hasMore(false) {
}
Z3950Connection::~Z3950Connection() {
m_connected = false;
delete d;
d = 0;
}
void Z3950Connection::reset() {
m_start = 0;
m_limit = Z3950_DEFAULT_MAX_RECORDS;
}
void Z3950Connection::setQuery(const TQString& query_) {
m_pqn = TQDeepCopy<TQString>(query_);
}
void Z3950Connection::setUserPassword(const TQString& user_, const TQString& pword_) {
m_user = TQDeepCopy<TQString>(user_);
m_password = TQDeepCopy<TQString>(pword_);
}
void Z3950Connection::run() {
// myDebug() << "Z3950Connection::run() - " << m_fetcher->source() << endl;
m_aborted = false;
m_hasMore = false;
resultsLeft = 0;
#ifdef HAVE_YAZ
if(!makeConnection()) {
done();
return;
}
ZOOM_query query = ZOOM_query_create();
myLog() << "Z3950Connection::run() - pqn = " << toCString(m_pqn) << endl;
int errcode = ZOOM_query_prefix(query, toCString(m_pqn));
if(errcode != 0) {
myDebug() << "Z3950Connection::run() - query error: " << m_pqn << endl;
ZOOM_query_destroy(query);
TQString s = i18n("Query error!");
s += ' ' + m_pqn;
done(s, MessageHandler::Error);
return;
}
ZOOM_resultset resultSet = ZOOM_connection_search(d->conn, query);
// check abort status
if(m_aborted) {
done();
return;
}
// I know the LOC wants the syntax = "xml" and esn = "mods"
// to get MODS data, that seems a bit odd...
// esn only makes sense for marc and grs-1
// if syntax is mods, set esn to mods too
TQCString type = "raw";
if(m_syntax == Latin1Literal("mods")) {
m_syntax = TQString::tqfromLatin1("xml");
ZOOM_resultset_option_set(resultSet, "elementSetName", "mods");
type = "xml";
} else {
ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1());
}
ZOOM_resultset_option_set(resultSet, "start", TQCString().setNum(m_start));
ZOOM_resultset_option_set(resultSet, "count", TQCString().setNum(m_limit-m_start));
// search in default syntax, unless syntax is already set
if(!m_syntax.isEmpty()) {
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", m_syntax.latin1());
}
const char* errmsg;
const char* addinfo;
errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo);
if(errcode != 0) {
ZOOM_resultset_destroy(resultSet);
ZOOM_query_destroy(query);
m_connected = false;
TQString s = i18n("Connection search error %1: %2").tqarg(errcode).tqarg(toString(errmsg));
if(!TQCString(addinfo).isEmpty()) {
s += " (" + toString(addinfo) + ")";
}
myDebug() << "Z3950Connection::run() - " << s << endl;
done(s, MessageHandler::Error);
return;
}
const size_t numResults = ZOOM_resultset_size(resultSet);
TQString newSyntax = m_syntax;
if(numResults > 0) {
myLog() << "Z3950Connection::run() - current syntax is " << m_syntax << " (" << numResults << " results)" << endl;
// so now we know that results exist, might have to check syntax
int len;
ZOOM_record rec = ZOOM_resultset_record(resultSet, 0);
// want raw unless it's mods
ZOOM_record_get(rec, type, &len);
if(len > 0 && m_syntax.isEmpty()) {
newSyntax = TQString::tqfromLatin1(ZOOM_record_get(rec, "syntax", &len)).lower();
myLog() << "Z3950Connection::run() - syntax guess is " << newSyntax << endl;
if(newSyntax == Latin1Literal("mods") || newSyntax == Latin1Literal("xml")) {
m_syntax = TQString::tqfromLatin1("xml");
ZOOM_resultset_option_set(resultSet, "elementSetName", "mods");
} else if(newSyntax == Latin1Literal("grs-1")) {
// if it's defaulting to grs-1, go ahead and change it to try to get a marc
// record since grs-1 is a last resort for us
newSyntax.truncate(0);
}
}
// right now, we just understand mods, unimarc, marc21/usmarc, and grs-1
if(newSyntax != Latin1Literal("xml") &&
newSyntax != Latin1Literal("usmarc") &&
newSyntax != Latin1Literal("marc21") &&
newSyntax != Latin1Literal("unimarc") &&
newSyntax != Latin1Literal("grs-1")) {
myLog() << "Z3950Connection::run() - changing z39.50 syntax to MODS" << endl;
newSyntax = TQString::tqfromLatin1("xml");
ZOOM_resultset_option_set(resultSet, "elementSetName", "mods");
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
rec = ZOOM_resultset_record(resultSet, 0);
ZOOM_record_get(rec, "xml", &len);
if(len == 0) {
// change set name back
ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1());
newSyntax = TQString::tqfromLatin1("usmarc"); // try usmarc
myLog() << "Z3950Connection::run() - changing z39.50 syntax to USMARC" << endl;
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
rec = ZOOM_resultset_record(resultSet, 0);
ZOOM_record_get(rec, "raw", &len);
}
if(len == 0) {
newSyntax = TQString::tqfromLatin1("marc21"); // try marc21
myLog() << "Z3950Connection::run() - changing z39.50 syntax to MARC21" << endl;
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
rec = ZOOM_resultset_record(resultSet, 0);
ZOOM_record_get(rec, "raw", &len);
}
if(len == 0) {
newSyntax = TQString::tqfromLatin1("unimarc"); // try unimarc
myLog() << "Z3950Connection::run() - changing z39.50 syntax to UNIMARC" << endl;
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
rec = ZOOM_resultset_record(resultSet, 0);
ZOOM_record_get(rec, "raw", &len);
}
if(len == 0) {
newSyntax = TQString::tqfromLatin1("grs-1"); // try grs-1
myLog() << "Z3950Connection::run() - changing z39.50 syntax to GRS-1" << endl;
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
rec = ZOOM_resultset_record(resultSet, 0);
ZOOM_record_get(rec, "raw", &len);
}
if(len == 0) {
myLog() << "Z3950Connection::run() - giving up" << endl;
ZOOM_resultset_destroy(resultSet);
ZOOM_query_destroy(query);
done(i18n("Record syntax error"), MessageHandler::Error);
return;
}
myLog() << "Z3950Connection::run() - final syntax is " << newSyntax << endl;
}
}
// go back to fooling ourselves and calling it mods
if(m_syntax == Latin1Literal("xml")) {
m_syntax = TQString::tqfromLatin1("mods");
}
if(newSyntax == Latin1Literal("xml")) {
newSyntax = TQString::tqfromLatin1("mods");
}
// save syntax change for next time
if(m_syntax != newSyntax) {
kapp->postEvent(m_fetcher, new Z3950SyntaxChange(newSyntax));
m_syntax = newSyntax;
}
if(m_sourceCharSet.isEmpty()) {
m_sourceCharSet = TQString::tqfromLatin1("marc-8");
}
const size_t realLimit = TQMIN(numResults, m_limit);
for(size_t i = m_start; i < realLimit && !m_aborted; ++i) {
myLog() << "Z3950Connection::run() - grabbing index " << i << endl;
ZOOM_record rec = ZOOM_resultset_record(resultSet, i);
if(!rec) {
myDebug() << "Z3950Connection::run() - no record returned for index " << i << endl;
continue;
}
int len;
TQString data;
if(m_syntax == Latin1Literal("mods")) {
data = toString(ZOOM_record_get(rec, "xml", &len));
} else if(m_syntax == Latin1Literal("grs-1")) { // grs-1
// we're going to parse the rendered data, very ugly...
data = toString(ZOOM_record_get(rec, "render", &len));
} else {
#if 0
kdWarning() << "Remove debug from z3950connection.cpp" << endl;
{
TQFile f1(TQString::tqfromLatin1("/tmp/z3950.raw"));
if(f1.open(IO_WriteOnly)) {
TQDataStream t(&f1);
t << ZOOM_record_get(rec, "raw", &len);
}
f1.close();
}
#endif
data = toXML(ZOOM_record_get(rec, "raw", &len), m_sourceCharSet);
}
Z3950ResultFound* ev = new Z3950ResultFound(data);
TQApplication::postEvent(m_fetcher, ev);
}
ZOOM_resultset_destroy(resultSet);
ZOOM_query_destroy(query);
m_hasMore = m_limit < numResults;
if(m_hasMore) {
m_start = m_limit;
m_limit += Z3950_DEFAULT_MAX_RECORDS;
}
#endif
done();
}
bool Z3950Connection::makeConnection() {
if(m_connected) {
return true;
}
// myDebug() << "Z3950Connection::makeConnection() - " << m_fetcher->source() << endl;
// I don't know what to do except assume database, user, and password are in locale encoding
#ifdef HAVE_YAZ
d->conn_opt = ZOOM_options_create();
ZOOM_options_set(d->conn_opt, "implementationName", "Tellico");
ZOOM_options_set(d->conn_opt, "databaseName", toCString(m_dbname));
ZOOM_options_set(d->conn_opt, "user", toCString(m_user));
ZOOM_options_set(d->conn_opt, "password", toCString(m_password));
d->conn = ZOOM_connection_create(d->conn_opt);
ZOOM_connection_connect(d->conn, m_host.latin1(), m_port);
int errcode;
const char* errmsg; // unused: carries same info as 'errcode'
const char* addinfo;
errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo);
if(errcode != 0) {
ZOOM_options_destroy(d->conn_opt);
ZOOM_connection_destroy(d->conn);
m_connected = false;
TQString s = i18n("Connection error %1: %2").tqarg(errcode).tqarg(toString(errmsg));
if(!TQCString(addinfo).isEmpty()) {
s += " (" + toString(addinfo) + ")";
}
myDebug() << "Z3950Connection::makeConnection() - " << s << endl;
done(s, MessageHandler::Error);
return false;
}
#endif
m_connected = true;
return true;
}
void Z3950Connection::done() {
checkPendingEvents();
kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore));
}
void Z3950Connection::done(const TQString& msg_, int type_) {
checkPendingEvents();
if(m_aborted) {
kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore));
} else {
kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore, msg_, type_));
}
}
void Z3950Connection::checkPendingEvents() {
// if there's still some pending result events, go ahead and just wait 1 second
if(resultsLeft > 0) {
sleep(1);
}
}
inline
TQCString Z3950Connection::toCString(const TQString& text_) {
return iconvRun(text_.utf8(), TQString::tqfromLatin1("utf-8"), m_sourceCharSet);
}
inline
TQString Z3950Connection::toString(const TQCString& text_) {
return TQString::fromUtf8(iconvRun(text_, m_sourceCharSet, TQString::tqfromLatin1("utf-8")));
}
// static
TQCString Z3950Connection::iconvRun(const TQCString& text_, const TQString& fromCharSet_, const TQString& toCharSet_) {
#ifdef HAVE_YAZ
if(text_.isEmpty()) {
return text_;
}
if(fromCharSet_ == toCharSet_) {
return text_;
}
yaz_iconv_t cd = yaz_iconv_open(toCharSet_.latin1(), fromCharSet_.latin1());
if(!cd) {
// maybe it's iso 5426, which we sorta support
TQString charSetLower = fromCharSet_.lower();
charSetLower.remove('-').remove(' ');
if(charSetLower == Latin1Literal("iso5426")) {
return iconvRun(Iso5426Converter::toUtf8(text_).utf8(), TQString::tqfromLatin1("utf-8"), toCharSet_);
} else if(charSetLower == Latin1Literal("iso6937")) {
return iconvRun(Iso6937Converter::toUtf8(text_).utf8(), TQString::tqfromLatin1("utf-8"), toCharSet_);
}
kdWarning() << "Z3950Connection::iconvRun() - conversion from " << fromCharSet_
<< " to " << toCharSet_ << " is unsupported" << endl;
return text_;
}
const char* input = text_;
size_t inlen = text_.length();
size_t outlen = 2 * inlen; // this is enough, right?
TQMemArray<char> result0(outlen);
char* result = result0.data();
int r = yaz_iconv(cd, const_cast<char**>(&input), &inlen, &result, &outlen);
if(r <= 0) {
myDebug() << "Z3950Connection::iconvRun() - can't decode buffer" << endl;
return text_;
}
// bug in yaz, need to flush buffer to catch last character
yaz_iconv(cd, 0, 0, &result, &outlen);
// length is pointer difference
size_t len = result - result0;
TQCString output = TQCString(result0, len+1);
// myDebug() << "-------------------------------------------" << endl;
// myDebug() << output << endl;
// myDebug() << "-------------------------------------------" << endl;
yaz_iconv_close(cd);
return output;
#endif
return text_;
}
TQString Z3950Connection::toXML(const TQCString& marc_, const TQString& charSet_) {
#ifdef HAVE_YAZ
if(marc_.isEmpty()) {
myDebug() << "Z3950Connection::toXML() - empty string" << endl;
return TQString();
}
yaz_iconv_t cd = yaz_iconv_open("utf-8", charSet_.latin1());
if(!cd) {
// maybe it's iso 5426, which we sorta support
TQString charSetLower = charSet_.lower();
charSetLower.remove('-').remove(' ');
if(charSetLower == Latin1Literal("iso5426")) {
return toXML(Iso5426Converter::toUtf8(marc_).utf8(), TQString::tqfromLatin1("utf-8"));
} else if(charSetLower == Latin1Literal("iso6937")) {
return toXML(Iso6937Converter::toUtf8(marc_).utf8(), TQString::tqfromLatin1("utf-8"));
}
kdWarning() << "Z3950Connection::toXML() - conversion from " << charSet_ << " is unsupported" << endl;
return TQString();
}
yaz_marc_t mt = yaz_marc_create();
yaz_marc_iconv(mt, cd);
yaz_marc_xml(mt, YAZ_MARC_MARCXML);
// first 5 bytes are length
bool ok;
#if YAZ_VERSIONL < 0x030000
int len = marc_.left(5).toInt(&ok);
#else
size_t len = marc_.left(5).toInt(&ok);
#endif
if(ok && (len < 25 || len > 100000)) {
myDebug() << "Z3950Connection::toXML() - bad length: " << (ok ? len : -1) << endl;
return TQString();
}
#if YAZ_VERSIONL < 0x030000
char* result;
#else
const char* result;
#endif
int r = yaz_marc_decode_buf(mt, marc_, -1, &result, &len);
if(r <= 0) {
myDebug() << "Z3950Connection::toXML() - can't decode buffer" << endl;
return TQString();
}
TQString output = TQString::tqfromLatin1("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
output += TQString::fromUtf8(TQCString(result, len+1), len+1);
// myDebug() << TQCString(result) << endl;
// myDebug() << "-------------------------------------------" << endl;
// myDebug() << output << endl;
yaz_iconv_close(cd);
yaz_marc_destroy(mt);
return output;
#else // no yaz
return TQString();
#endif
}