You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
504 lines
17 KiB
504 lines
17 KiB
/***************************************************************************
|
|
copyright : (C) 2005-2006 by Robby Stephenson
|
|
email : $EMAIL
|
|
***************************************************************************/
|
|
|
|
/***************************************************************************
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of version 2 of the GNU General Public License as *
|
|
* published by the Free Software Foundation; *
|
|
* *
|
|
***************************************************************************/
|
|
|
|
#include "z3950connection.h"
|
|
#include "z3950fetcher.h"
|
|
#include "messagehandler.h"
|
|
#include "../latin1literal.h"
|
|
#include "../tellico_debug.h"
|
|
#include "../iso5426converter.h"
|
|
#include "../iso6937converter.h"
|
|
|
|
#include <config.h>
|
|
|
|
#ifdef HAVE_YAZ
|
|
extern "C" {
|
|
#include <yaz/zoom.h>
|
|
#include <yaz/marcdisp.h>
|
|
#include <yaz/yaz-version.h>
|
|
}
|
|
#endif
|
|
|
|
#include <tdelocale.h>
|
|
|
|
#include <tqfile.h>
|
|
|
|
namespace {
|
|
static const size_t Z3950_DEFAULT_MAX_RECORDS = 20;
|
|
}
|
|
|
|
using Tellico::Fetch::Z3950ResultFound;
|
|
using Tellico::Fetch::Z3950Connection;
|
|
|
|
Z3950ResultFound::Z3950ResultFound(const TQString& s) : TQCustomEvent(uid())
|
|
, m_result(TQDeepCopy<TQString>(s)) {
|
|
++Z3950Connection::resultsLeft;
|
|
}
|
|
|
|
Z3950ResultFound::~Z3950ResultFound() {
|
|
--Z3950Connection::resultsLeft;
|
|
}
|
|
|
|
class Z3950Connection::Private {
|
|
public:
|
|
Private() {}
|
|
#ifdef HAVE_YAZ
|
|
~Private() {
|
|
ZOOM_options_destroy(conn_opt);
|
|
ZOOM_connection_destroy(conn);
|
|
};
|
|
|
|
ZOOM_options conn_opt;
|
|
ZOOM_connection conn;
|
|
#endif
|
|
};
|
|
|
|
int Z3950Connection::resultsLeft = 0;
|
|
|
|
// since the character set goes into a yaz api call
|
|
// I'm paranoid about user insertions, so just grab 64
|
|
// characters at most
|
|
Z3950Connection::Z3950Connection(Z3950Fetcher* fetcher,
|
|
const TQString& host,
|
|
uint port,
|
|
const TQString& dbname,
|
|
const TQString& sourceCharSet,
|
|
const TQString& syntax,
|
|
const TQString& esn)
|
|
: TQThread()
|
|
, d(new Private())
|
|
, m_connected(false)
|
|
, m_aborted(false)
|
|
, m_fetcher(fetcher)
|
|
, m_host(TQDeepCopy<TQString>(host))
|
|
, m_port(port)
|
|
, m_dbname(TQDeepCopy<TQString>(dbname))
|
|
, m_sourceCharSet(TQDeepCopy<TQString>(sourceCharSet.left(64)))
|
|
, m_syntax(TQDeepCopy<TQString>(syntax))
|
|
, m_esn(TQDeepCopy<TQString>(esn))
|
|
, m_start(0)
|
|
, m_limit(Z3950_DEFAULT_MAX_RECORDS)
|
|
, m_hasMore(false) {
|
|
}
|
|
|
|
Z3950Connection::~Z3950Connection() {
|
|
m_connected = false;
|
|
delete d;
|
|
d = 0;
|
|
}
|
|
|
|
void Z3950Connection::reset() {
|
|
m_start = 0;
|
|
m_limit = Z3950_DEFAULT_MAX_RECORDS;
|
|
}
|
|
|
|
void Z3950Connection::setQuery(const TQString& query_) {
|
|
m_pqn = TQDeepCopy<TQString>(query_);
|
|
}
|
|
|
|
void Z3950Connection::setUserPassword(const TQString& user_, const TQString& pword_) {
|
|
m_user = TQDeepCopy<TQString>(user_);
|
|
m_password = TQDeepCopy<TQString>(pword_);
|
|
}
|
|
|
|
void Z3950Connection::run() {
|
|
// myDebug() << "Z3950Connection::run() - " << m_fetcher->source() << endl;
|
|
m_aborted = false;
|
|
m_hasMore = false;
|
|
resultsLeft = 0;
|
|
#ifdef HAVE_YAZ
|
|
|
|
if(!makeConnection()) {
|
|
done();
|
|
return;
|
|
}
|
|
|
|
ZOOM_query query = ZOOM_query_create();
|
|
myLog() << "Z3950Connection::run() - pqn = " << toCString(m_pqn) << endl;
|
|
int errcode = ZOOM_query_prefix(query, toCString(m_pqn));
|
|
if(errcode != 0) {
|
|
myDebug() << "Z3950Connection::run() - query error: " << m_pqn << endl;
|
|
ZOOM_query_destroy(query);
|
|
TQString s = i18n("Query error!");
|
|
s += ' ' + m_pqn;
|
|
done(s, MessageHandler::Error);
|
|
return;
|
|
}
|
|
|
|
ZOOM_resultset resultSet = ZOOM_connection_search(d->conn, query);
|
|
|
|
// check abort status
|
|
if(m_aborted) {
|
|
done();
|
|
return;
|
|
}
|
|
|
|
// I know the LOC wants the syntax = "xml" and esn = "mods"
|
|
// to get MODS data, that seems a bit odd...
|
|
// esn only makes sense for marc and grs-1
|
|
// if syntax is mods, set esn to mods too
|
|
TQCString type = "raw";
|
|
if(m_syntax == Latin1Literal("mods")) {
|
|
m_syntax = TQString::fromLatin1("xml");
|
|
ZOOM_resultset_option_set(resultSet, "elementSetName", "mods");
|
|
type = "xml";
|
|
} else {
|
|
ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1());
|
|
}
|
|
ZOOM_resultset_option_set(resultSet, "start", TQCString().setNum(m_start));
|
|
ZOOM_resultset_option_set(resultSet, "count", TQCString().setNum(m_limit-m_start));
|
|
// search in default syntax, unless syntax is already set
|
|
if(!m_syntax.isEmpty()) {
|
|
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", m_syntax.latin1());
|
|
}
|
|
|
|
const char* errmsg;
|
|
const char* addinfo;
|
|
errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo);
|
|
if(errcode != 0) {
|
|
ZOOM_resultset_destroy(resultSet);
|
|
ZOOM_query_destroy(query);
|
|
m_connected = false;
|
|
|
|
TQString s = i18n("Connection search error %1: %2").arg(errcode).arg(toString(errmsg));
|
|
if(!TQCString(addinfo).isEmpty()) {
|
|
s += " (" + toString(addinfo) + ")";
|
|
}
|
|
myDebug() << "Z3950Connection::run() - " << s << endl;
|
|
done(s, MessageHandler::Error);
|
|
return;
|
|
}
|
|
|
|
const size_t numResults = ZOOM_resultset_size(resultSet);
|
|
|
|
TQString newSyntax = m_syntax;
|
|
if(numResults > 0) {
|
|
myLog() << "Z3950Connection::run() - current syntax is " << m_syntax << " (" << numResults << " results)" << endl;
|
|
// so now we know that results exist, might have to check syntax
|
|
int len;
|
|
ZOOM_record rec = ZOOM_resultset_record(resultSet, 0);
|
|
// want raw unless it's mods
|
|
ZOOM_record_get(rec, type, &len);
|
|
if(len > 0 && m_syntax.isEmpty()) {
|
|
newSyntax = TQString::fromLatin1(ZOOM_record_get(rec, "syntax", &len)).lower();
|
|
myLog() << "Z3950Connection::run() - syntax guess is " << newSyntax << endl;
|
|
if(newSyntax == Latin1Literal("mods") || newSyntax == Latin1Literal("xml")) {
|
|
m_syntax = TQString::fromLatin1("xml");
|
|
ZOOM_resultset_option_set(resultSet, "elementSetName", "mods");
|
|
} else if(newSyntax == Latin1Literal("grs-1")) {
|
|
// if it's defaulting to grs-1, go ahead and change it to try to get a marc
|
|
// record since grs-1 is a last resort for us
|
|
newSyntax.truncate(0);
|
|
}
|
|
}
|
|
// right now, we just understand mods, unimarc, marc21/usmarc, and grs-1
|
|
if(newSyntax != Latin1Literal("xml") &&
|
|
newSyntax != Latin1Literal("usmarc") &&
|
|
newSyntax != Latin1Literal("marc21") &&
|
|
newSyntax != Latin1Literal("unimarc") &&
|
|
newSyntax != Latin1Literal("grs-1")) {
|
|
myLog() << "Z3950Connection::run() - changing z39.50 syntax to MODS" << endl;
|
|
newSyntax = TQString::fromLatin1("xml");
|
|
ZOOM_resultset_option_set(resultSet, "elementSetName", "mods");
|
|
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
|
|
rec = ZOOM_resultset_record(resultSet, 0);
|
|
ZOOM_record_get(rec, "xml", &len);
|
|
if(len == 0) {
|
|
// change set name back
|
|
ZOOM_resultset_option_set(resultSet, "elementSetName", m_esn.latin1());
|
|
newSyntax = TQString::fromLatin1("usmarc"); // try usmarc
|
|
myLog() << "Z3950Connection::run() - changing z39.50 syntax to USMARC" << endl;
|
|
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
|
|
rec = ZOOM_resultset_record(resultSet, 0);
|
|
ZOOM_record_get(rec, "raw", &len);
|
|
}
|
|
if(len == 0) {
|
|
newSyntax = TQString::fromLatin1("marc21"); // try marc21
|
|
myLog() << "Z3950Connection::run() - changing z39.50 syntax to MARC21" << endl;
|
|
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
|
|
rec = ZOOM_resultset_record(resultSet, 0);
|
|
ZOOM_record_get(rec, "raw", &len);
|
|
}
|
|
if(len == 0) {
|
|
newSyntax = TQString::fromLatin1("unimarc"); // try unimarc
|
|
myLog() << "Z3950Connection::run() - changing z39.50 syntax to UNIMARC" << endl;
|
|
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
|
|
rec = ZOOM_resultset_record(resultSet, 0);
|
|
ZOOM_record_get(rec, "raw", &len);
|
|
}
|
|
if(len == 0) {
|
|
newSyntax = TQString::fromLatin1("grs-1"); // try grs-1
|
|
myLog() << "Z3950Connection::run() - changing z39.50 syntax to GRS-1" << endl;
|
|
ZOOM_resultset_option_set(resultSet, "preferredRecordSyntax", newSyntax.latin1());
|
|
rec = ZOOM_resultset_record(resultSet, 0);
|
|
ZOOM_record_get(rec, "raw", &len);
|
|
}
|
|
if(len == 0) {
|
|
myLog() << "Z3950Connection::run() - giving up" << endl;
|
|
ZOOM_resultset_destroy(resultSet);
|
|
ZOOM_query_destroy(query);
|
|
done(i18n("Record syntax error"), MessageHandler::Error);
|
|
return;
|
|
}
|
|
myLog() << "Z3950Connection::run() - final syntax is " << newSyntax << endl;
|
|
}
|
|
}
|
|
|
|
// go back to fooling ourselves and calling it mods
|
|
if(m_syntax == Latin1Literal("xml")) {
|
|
m_syntax = TQString::fromLatin1("mods");
|
|
}
|
|
if(newSyntax == Latin1Literal("xml")) {
|
|
newSyntax = TQString::fromLatin1("mods");
|
|
}
|
|
// save syntax change for next time
|
|
if(m_syntax != newSyntax) {
|
|
kapp->postEvent(m_fetcher, new Z3950SyntaxChange(newSyntax));
|
|
m_syntax = newSyntax;
|
|
}
|
|
|
|
if(m_sourceCharSet.isEmpty()) {
|
|
m_sourceCharSet = TQString::fromLatin1("marc-8");
|
|
}
|
|
|
|
const size_t realLimit = TQMIN(numResults, m_limit);
|
|
|
|
for(size_t i = m_start; i < realLimit && !m_aborted; ++i) {
|
|
myLog() << "Z3950Connection::run() - grabbing index " << i << endl;
|
|
ZOOM_record rec = ZOOM_resultset_record(resultSet, i);
|
|
if(!rec) {
|
|
myDebug() << "Z3950Connection::run() - no record returned for index " << i << endl;
|
|
continue;
|
|
}
|
|
int len;
|
|
TQString data;
|
|
if(m_syntax == Latin1Literal("mods")) {
|
|
data = toString(ZOOM_record_get(rec, "xml", &len));
|
|
} else if(m_syntax == Latin1Literal("grs-1")) { // grs-1
|
|
// we're going to parse the rendered data, very ugly...
|
|
data = toString(ZOOM_record_get(rec, "render", &len));
|
|
} else {
|
|
#if 0
|
|
kdWarning() << "Remove debug from z3950connection.cpp" << endl;
|
|
{
|
|
TQFile f1(TQString::fromLatin1("/tmp/z3950.raw"));
|
|
if(f1.open(IO_WriteOnly)) {
|
|
TQDataStream t(&f1);
|
|
t << ZOOM_record_get(rec, "raw", &len);
|
|
}
|
|
f1.close();
|
|
}
|
|
#endif
|
|
data = toXML(ZOOM_record_get(rec, "raw", &len), m_sourceCharSet);
|
|
}
|
|
Z3950ResultFound* ev = new Z3950ResultFound(data);
|
|
TQApplication::postEvent(m_fetcher, ev);
|
|
}
|
|
|
|
ZOOM_resultset_destroy(resultSet);
|
|
ZOOM_query_destroy(query);
|
|
|
|
m_hasMore = m_limit < numResults;
|
|
if(m_hasMore) {
|
|
m_start = m_limit;
|
|
m_limit += Z3950_DEFAULT_MAX_RECORDS;
|
|
}
|
|
#endif
|
|
done();
|
|
}
|
|
|
|
bool Z3950Connection::makeConnection() {
|
|
if(m_connected) {
|
|
return true;
|
|
}
|
|
// myDebug() << "Z3950Connection::makeConnection() - " << m_fetcher->source() << endl;
|
|
// I don't know what to do except assume database, user, and password are in locale encoding
|
|
#ifdef HAVE_YAZ
|
|
d->conn_opt = ZOOM_options_create();
|
|
ZOOM_options_set(d->conn_opt, "implementationName", "Tellico");
|
|
ZOOM_options_set(d->conn_opt, "databaseName", toCString(m_dbname));
|
|
ZOOM_options_set(d->conn_opt, "user", toCString(m_user));
|
|
ZOOM_options_set(d->conn_opt, "password", toCString(m_password));
|
|
|
|
d->conn = ZOOM_connection_create(d->conn_opt);
|
|
ZOOM_connection_connect(d->conn, m_host.latin1(), m_port);
|
|
|
|
int errcode;
|
|
const char* errmsg; // unused: carries same info as 'errcode'
|
|
const char* addinfo;
|
|
errcode = ZOOM_connection_error(d->conn, &errmsg, &addinfo);
|
|
if(errcode != 0) {
|
|
ZOOM_options_destroy(d->conn_opt);
|
|
ZOOM_connection_destroy(d->conn);
|
|
m_connected = false;
|
|
|
|
TQString s = i18n("Connection error %1: %2").arg(errcode).arg(toString(errmsg));
|
|
if(!TQCString(addinfo).isEmpty()) {
|
|
s += " (" + toString(addinfo) + ")";
|
|
}
|
|
myDebug() << "Z3950Connection::makeConnection() - " << s << endl;
|
|
done(s, MessageHandler::Error);
|
|
return false;
|
|
}
|
|
#endif
|
|
m_connected = true;
|
|
return true;
|
|
}
|
|
|
|
void Z3950Connection::done() {
|
|
checkPendingEvents();
|
|
kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore));
|
|
}
|
|
|
|
void Z3950Connection::done(const TQString& msg_, int type_) {
|
|
checkPendingEvents();
|
|
if(m_aborted) {
|
|
kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore));
|
|
} else {
|
|
kapp->postEvent(m_fetcher, new Z3950ConnectionDone(m_hasMore, msg_, type_));
|
|
}
|
|
}
|
|
|
|
void Z3950Connection::checkPendingEvents() {
|
|
// if there's still some pending result events, go ahead and just wait 1 second
|
|
if(resultsLeft > 0) {
|
|
sleep(1);
|
|
}
|
|
}
|
|
|
|
inline
|
|
TQCString Z3950Connection::toCString(const TQString& text_) {
|
|
return iconvRun(text_.utf8(), TQString::fromLatin1("utf-8"), m_sourceCharSet);
|
|
}
|
|
|
|
inline
|
|
TQString Z3950Connection::toString(const TQCString& text_) {
|
|
return TQString::fromUtf8(iconvRun(text_, m_sourceCharSet, TQString::fromLatin1("utf-8")));
|
|
}
|
|
|
|
// static
|
|
TQCString Z3950Connection::iconvRun(const TQCString& text_, const TQString& fromCharSet_, const TQString& toCharSet_) {
|
|
#ifdef HAVE_YAZ
|
|
if(text_.isEmpty()) {
|
|
return text_;
|
|
}
|
|
|
|
if(fromCharSet_ == toCharSet_) {
|
|
return text_;
|
|
}
|
|
|
|
yaz_iconv_t cd = yaz_iconv_open(toCharSet_.latin1(), fromCharSet_.latin1());
|
|
if(!cd) {
|
|
// maybe it's iso 5426, which we sorta support
|
|
TQString charSetLower = fromCharSet_.lower();
|
|
charSetLower.remove('-').remove(' ');
|
|
if(charSetLower == Latin1Literal("iso5426")) {
|
|
return iconvRun(Iso5426Converter::toUtf8(text_).utf8(), TQString::fromLatin1("utf-8"), toCharSet_);
|
|
} else if(charSetLower == Latin1Literal("iso6937")) {
|
|
return iconvRun(Iso6937Converter::toUtf8(text_).utf8(), TQString::fromLatin1("utf-8"), toCharSet_);
|
|
}
|
|
kdWarning() << "Z3950Connection::iconvRun() - conversion from " << fromCharSet_
|
|
<< " to " << toCharSet_ << " is unsupported" << endl;
|
|
return text_;
|
|
}
|
|
|
|
const char* input = text_;
|
|
size_t inlen = text_.length();
|
|
|
|
size_t outlen = 2 * inlen; // this is enough, right?
|
|
TQMemArray<char> result0(outlen);
|
|
char* result = result0.data();
|
|
|
|
int r = yaz_iconv(cd, const_cast<char**>(&input), &inlen, &result, &outlen);
|
|
if(r <= 0) {
|
|
myDebug() << "Z3950Connection::iconvRun() - can't decode buffer" << endl;
|
|
return text_;
|
|
}
|
|
// bug in yaz, need to flush buffer to catch last character
|
|
yaz_iconv(cd, 0, 0, &result, &outlen);
|
|
|
|
// length is pointer difference
|
|
size_t len = result - result0;
|
|
|
|
TQCString output = TQCString(result0, len+1);
|
|
// myDebug() << "-------------------------------------------" << endl;
|
|
// myDebug() << output << endl;
|
|
// myDebug() << "-------------------------------------------" << endl;
|
|
yaz_iconv_close(cd);
|
|
return output;
|
|
#endif
|
|
return text_;
|
|
}
|
|
|
|
TQString Z3950Connection::toXML(const TQCString& marc_, const TQString& charSet_) {
|
|
#ifdef HAVE_YAZ
|
|
if(marc_.isEmpty()) {
|
|
myDebug() << "Z3950Connection::toXML() - empty string" << endl;
|
|
return TQString();
|
|
}
|
|
|
|
yaz_iconv_t cd = yaz_iconv_open("utf-8", charSet_.latin1());
|
|
if(!cd) {
|
|
// maybe it's iso 5426, which we sorta support
|
|
TQString charSetLower = charSet_.lower();
|
|
charSetLower.remove('-').remove(' ');
|
|
if(charSetLower == Latin1Literal("iso5426")) {
|
|
return toXML(Iso5426Converter::toUtf8(marc_).utf8(), TQString::fromLatin1("utf-8"));
|
|
} else if(charSetLower == Latin1Literal("iso6937")) {
|
|
return toXML(Iso6937Converter::toUtf8(marc_).utf8(), TQString::fromLatin1("utf-8"));
|
|
}
|
|
kdWarning() << "Z3950Connection::toXML() - conversion from " << charSet_ << " is unsupported" << endl;
|
|
return TQString();
|
|
}
|
|
|
|
yaz_marc_t mt = yaz_marc_create();
|
|
yaz_marc_iconv(mt, cd);
|
|
yaz_marc_xml(mt, YAZ_MARC_MARCXML);
|
|
|
|
// first 5 bytes are length
|
|
bool ok;
|
|
#if YAZ_VERSIONL < 0x030000
|
|
int len = marc_.left(5).toInt(&ok);
|
|
#else
|
|
size_t len = marc_.left(5).toInt(&ok);
|
|
#endif
|
|
if(ok && (len < 25 || len > 100000)) {
|
|
myDebug() << "Z3950Connection::toXML() - bad length: " << (ok ? len : -1) << endl;
|
|
return TQString();
|
|
}
|
|
|
|
#if YAZ_VERSIONL < 0x030000
|
|
char* result;
|
|
#else
|
|
const char* result;
|
|
#endif
|
|
int r = yaz_marc_decode_buf(mt, marc_, -1, &result, &len);
|
|
if(r <= 0) {
|
|
myDebug() << "Z3950Connection::toXML() - can't decode buffer" << endl;
|
|
return TQString();
|
|
}
|
|
|
|
TQString output = TQString::fromLatin1("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
|
|
output += TQString::fromUtf8(TQCString(result, len+1), len+1);
|
|
// myDebug() << TQCString(result) << endl;
|
|
// myDebug() << "-------------------------------------------" << endl;
|
|
// myDebug() << output << endl;
|
|
yaz_iconv_close(cd);
|
|
yaz_marc_destroy(mt);
|
|
|
|
return output;
|
|
#else // no yaz
|
|
return TQString();
|
|
#endif
|
|
}
|