You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kbibtex/src/z3950connection.cpp

590 lines
20 KiB

/***************************************************************************
copyright : (C) 2005-2006 by Robby Stephenson
email : $EMAIL
***************************************************************************/
/***************************************************************************
* *
* This file has been modified to match the requirements of KBibTeX. *
* In case of problems or bugs arising from this implementation, please *
* contact the KBibTeX team first. *
* Thomas Fischer <fischer@unix-ag.uni-kl.de> *
* *
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of version 2 of the GNU General Public License as *
* published by the Free Software Foundation; *
* *
***************************************************************************/
#include "z3950connection.h"
// #include "z3950fetcher.h"
#include "messagehandler.h"
#include "latin1literal.h"
#include <kdebug.h>
#include "iso5426converter.h"
#include "iso6937converter.h"
#include <ntqapplication.h>
#include <config.h>
#ifdef HAVE_YAZ
extern "C"
{
#include <yaz/zoom.h>
#include <yaz/marcdisp.h>
#include <yaz/yaz-version.h>
}
#endif
#include <tdelocale.h>
#include <ntqfile.h>
namespace
{
static const size_t Z3950_DEFAULT_MAX_RECORDS = 20;
}
using KBibTeX::Z3950ResultFound;
using KBibTeX::Z3950Connection;
Z3950ResultFound::Z3950ResultFound( const TQString& s ) : TQCustomEvent( uid() )
, m_result( TQDeepCopy<TQString>( s ) )
{
++Z3950Connection::resultsLeft;
}
Z3950ResultFound::~Z3950ResultFound()
{
--Z3950Connection::resultsLeft;
}
class Z3950Connection::Private
{
public:
Private() {}
#ifdef HAVE_YAZ
~Private()
{
ZOOM_options_destroy( conn_opt );
ZOOM_connection_destroy( conn );
};
ZOOM_options conn_opt;
ZOOM_connection conn;
#endif
};
int Z3950Connection::resultsLeft = 0;
// since the character set goes into a yaz api call
// I'm paranoid about user insertions, so just grab 64
// characters at most
Z3950Connection::Z3950Connection( TQObject* fetcher,
const TQString& host,
uint port,
const TQString& dbname,
const TQString& sourceCharSet,
const TQString& syntax,
const TQString& esn )
: TQThread()
, d( new Private() )
, m_connected( false )
, m_aborted( false )
, m_fetcher( fetcher )
, m_host( TQDeepCopy<TQString>( host ) )
, m_port( port )
, m_dbname( TQDeepCopy<TQString>( dbname ) )
, m_sourceCharSet( TQDeepCopy<TQString>( sourceCharSet.left( 64 ) ) )
, m_syntax( TQDeepCopy<TQString>( syntax ) )
, m_esn( TQDeepCopy<TQString>( esn ) )
, m_start( 0 )
, m_limit( Z3950_DEFAULT_MAX_RECORDS )
, m_hasMore( false )
{
}
Z3950Connection::~Z3950Connection()
{
m_connected = false;
delete d;
d = 0;
}
void Z3950Connection::reset()
{
m_start = 0;
m_limit = Z3950_DEFAULT_MAX_RECORDS;
}
void Z3950Connection::setQuery( const TQString& query_, unsigned int numHits )
{
m_pqn = TQDeepCopy<TQString>( query_ );
m_limit = Z3950_DEFAULT_MAX_RECORDS < numHits ? Z3950_DEFAULT_MAX_RECORDS : numHits;
}
void Z3950Connection::setUserPassword( const TQString& user_, const TQString& pword_ )
{
m_user = TQDeepCopy<TQString>( user_ );
m_password = TQDeepCopy<TQString>( pword_ );
}
void Z3950Connection::run()
{
// kdDebug() << "Z3950Connection::run() - " << m_fetcher->source() << endl;
m_aborted = false;
m_hasMore = false;
resultsLeft = 0;
#ifdef HAVE_YAZ
if ( !makeConnection() )
{
done();
return;
}
ZOOM_query query = ZOOM_query_create();
kdDebug() << "Z3950Connection::run() - pqn = " << toCString( m_pqn ) << endl;
int errcode = ZOOM_query_prefix( query, toCString( m_pqn ) );
if ( errcode != 0 )
{
kdDebug() << "Z3950Connection::run() - query error: " << m_pqn << endl;
ZOOM_query_destroy( query );
TQString s = i18n( "Query error!" );
s += ' ' + m_pqn;
done( s, MessageHandler::Error );
return;
}
ZOOM_resultset resultSet = ZOOM_connection_search( d->conn, query );
// check abort status
if ( m_aborted )
{
done();
return;
}
// I know the LOC wants the syntax = "xml" and esn = "mods"
// to get MODS data, that seems a bit odd...
// esn only makes sense for marc and grs-1
// if syntax is mods, set esn to mods too
TQCString type = "raw";
if ( m_syntax == Latin1Literal( "mods" ) )
{
m_syntax = TQString::fromLatin1( "xml" );
ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" );
type = "xml";
}
else
{
ZOOM_resultset_option_set( resultSet, "elementSetName", m_esn.latin1() );
}
ZOOM_resultset_option_set( resultSet, "start", TQCString().setNum( m_start ) );
ZOOM_resultset_option_set( resultSet, "count", TQCString().setNum( m_limit - m_start ) );
// search in default syntax, unless syntax is already set
if ( !m_syntax.isEmpty() )
{
ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", m_syntax.latin1() );
}
const char* errmsg;
const char* addinfo;
errcode = ZOOM_connection_error( d->conn, &errmsg, &addinfo );
if ( errcode != 0 )
{
ZOOM_resultset_destroy( resultSet );
ZOOM_query_destroy( query );
m_connected = false;
TQString s = i18n( "Connection search error %1: %2" ).arg( errcode ).arg( toString( errmsg ) );
if ( !TQCString( addinfo ).isEmpty() )
{
s += " (" + toString( addinfo ) + ")";
}
kdDebug() << "Z3950Connection::run() - " << s << endl;
done( s, MessageHandler::Error );
return;
}
const size_t numResults = ZOOM_resultset_size( resultSet );
TQString newSyntax = m_syntax;
if ( numResults > 0 )
{
kdDebug() << "Z3950Connection::run() - current syntax is " << m_syntax << " (" << numResults << " results)" << endl;
// so now we know that results exist, might have to check syntax
int len;
ZOOM_record rec = ZOOM_resultset_record( resultSet, 0 );
// want raw unless it's mods
ZOOM_record_get( rec, type, &len );
if ( len > 0 && m_syntax.isEmpty() )
{
newSyntax = TQString::fromLatin1( ZOOM_record_get( rec, "syntax", &len ) ).lower();
kdDebug() << "Z3950Connection::run() - syntax guess is " << newSyntax << endl;
if ( newSyntax == Latin1Literal( "mods" ) || newSyntax == Latin1Literal( "xml" ) )
{
m_syntax = TQString::fromLatin1( "xml" );
ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" );
}
else if ( newSyntax == Latin1Literal( "grs-1" ) )
{
// if it's defaulting to grs-1, go ahead and change it to try to get a marc
// record since grs-1 is a last resort for us
newSyntax.truncate( 0 );
}
}
// right now, we just understand mods, unimarc, marc21/usmarc, and grs-1
if ( newSyntax != Latin1Literal( "xml" ) &&
newSyntax != Latin1Literal( "usmarc" ) &&
newSyntax != Latin1Literal( "marc21" ) &&
newSyntax != Latin1Literal( "unimarc" ) &&
newSyntax != Latin1Literal( "grs-1" ) )
{
kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to MODS" << endl;
newSyntax = TQString::fromLatin1( "xml" );
ZOOM_resultset_option_set( resultSet, "elementSetName", "mods" );
ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() );
rec = ZOOM_resultset_record( resultSet, 0 );
ZOOM_record_get( rec, "xml", &len );
if ( len == 0 )
{
// change set name back
ZOOM_resultset_option_set( resultSet, "elementSetName", m_esn.latin1() );
newSyntax = TQString::fromLatin1( "usmarc" ); // try usmarc
kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to USMARC" << endl;
ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() );
rec = ZOOM_resultset_record( resultSet, 0 );
ZOOM_record_get( rec, "raw", &len );
}
if ( len == 0 )
{
newSyntax = TQString::fromLatin1( "marc21" ); // try marc21
kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to MARC21" << endl;
ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() );
rec = ZOOM_resultset_record( resultSet, 0 );
ZOOM_record_get( rec, "raw", &len );
}
if ( len == 0 )
{
newSyntax = TQString::fromLatin1( "unimarc" ); // try unimarc
kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to UNIMARC" << endl;
ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() );
rec = ZOOM_resultset_record( resultSet, 0 );
ZOOM_record_get( rec, "raw", &len );
}
if ( len == 0 )
{
newSyntax = TQString::fromLatin1( "grs-1" ); // try grs-1
kdDebug() << "Z3950Connection::run() - changing z39.50 syntax to GRS-1" << endl;
ZOOM_resultset_option_set( resultSet, "preferredRecordSyntax", newSyntax.latin1() );
rec = ZOOM_resultset_record( resultSet, 0 );
ZOOM_record_get( rec, "raw", &len );
}
if ( len == 0 )
{
kdDebug() << "Z3950Connection::run() - giving up" << endl;
ZOOM_resultset_destroy( resultSet );
ZOOM_query_destroy( query );
done( i18n( "Record syntax error" ), MessageHandler::Error );
return;
}
kdDebug() << "Z3950Connection::run() - final syntax is " << newSyntax << endl;
}
}
// go back to fooling ourselves and calling it mods
if ( m_syntax == Latin1Literal( "xml" ) )
{
m_syntax = TQString::fromLatin1( "mods" );
}
if ( newSyntax == Latin1Literal( "xml" ) )
{
newSyntax = TQString::fromLatin1( "mods" );
}
// save syntax change for next time
if ( m_syntax != newSyntax )
{
tqApp->postEvent( m_fetcher, new Z3950SyntaxChange( newSyntax ) );
m_syntax = newSyntax;
}
if ( m_sourceCharSet.isEmpty() )
{
m_sourceCharSet = TQString::fromLatin1( "marc-8" );
}
const size_t realLimit = TQMIN( numResults, m_limit );
for ( size_t i = m_start; i < realLimit && !m_aborted; ++i )
{
kdDebug() << "Z3950Connection::run() - grabbing index " << i << endl;
ZOOM_record rec = ZOOM_resultset_record( resultSet, i );
if ( !rec )
{
kdDebug() << "Z3950Connection::run() - no record returned for index " << i << endl;
continue;
}
int len;
TQString data;
if ( m_syntax == Latin1Literal( "mods" ) )
{
data = toString( ZOOM_record_get( rec, "xml", &len ) );
}
else if ( m_syntax == Latin1Literal( "grs-1" ) ) // grs-1
{
// we're going to parse the rendered data, very ugly...
data = toString( ZOOM_record_get( rec, "render", &len ) );
}
else
{
#if 0
kdWarning() << "Remove debug from z3950connection.cpp" << endl;
{
TQFile f1( TQString::fromLatin1( "/tmp/z3950.raw" ) );
if ( f1.open( IO_WriteOnly ) )
{
TQDataStream t( &f1 );
t << ZOOM_record_get( rec, "raw", &len );
}
f1.close();
}
#endif
data = toXML( ZOOM_record_get( rec, "raw", &len ), m_sourceCharSet );
}
Z3950ResultFound* ev = new Z3950ResultFound( data );
TQApplication::postEvent( m_fetcher, ev );
}
ZOOM_resultset_destroy( resultSet );
ZOOM_query_destroy( query );
m_hasMore = m_limit < numResults;
if ( m_hasMore )
{
m_start = m_limit;
m_limit += Z3950_DEFAULT_MAX_RECORDS;
}
#endif
done();
}
bool Z3950Connection::makeConnection()
{
if ( m_connected )
{
return true;
}
// kdDebug() << "Z3950Connection::makeConnection() - " << m_fetcher->source() << endl;
// I don't know what to do except assume database, user, and password are in locale encoding
#ifdef HAVE_YAZ
d->conn_opt = ZOOM_options_create();
ZOOM_options_set( d->conn_opt, "implementationName", "KBibTeX" );
ZOOM_options_set( d->conn_opt, "databaseName", toCString( m_dbname ) );
ZOOM_options_set( d->conn_opt, "user", toCString( m_user ) );
ZOOM_options_set( d->conn_opt, "password", toCString( m_password ) );
d->conn = ZOOM_connection_create( d->conn_opt );
ZOOM_connection_connect( d->conn, m_host.latin1(), m_port );
int errcode;
const char* errmsg; // unused: carries same info as 'errcode'
const char* addinfo;
errcode = ZOOM_connection_error( d->conn, &errmsg, &addinfo );
if ( errcode != 0 )
{
ZOOM_options_destroy( d->conn_opt );
ZOOM_connection_destroy( d->conn );
m_connected = false;
TQString s = i18n( "Connection error %1: %2" ).arg( errcode ).arg( toString( errmsg ) );
if ( !TQCString( addinfo ).isEmpty() )
{
s += " (" + toString( addinfo ) + ")";
}
kdDebug() << "Z3950Connection::makeConnection() - " << s << endl;
done( s, MessageHandler::Error );
return false;
}
#endif
m_connected = true;
return true;
}
void Z3950Connection::done()
{
checkPendingEvents();
tqApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore ) );
}
void Z3950Connection::done( const TQString& msg_, int type_ )
{
checkPendingEvents();
if ( m_aborted )
{
tqApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore ) );
}
else
{
tqApp->postEvent( m_fetcher, new Z3950ConnectionDone( m_hasMore, msg_, type_ ) );
}
}
void Z3950Connection::checkPendingEvents()
{
// if there's still some pending result events, go ahead and just wait 1 second
if ( resultsLeft > 0 )
{
sleep( 1 );
}
}
inline
TQCString Z3950Connection::toCString( const TQString& text_ )
{
return iconvRun( text_.utf8(), TQString::fromLatin1( "utf-8" ), m_sourceCharSet );
}
inline
TQString Z3950Connection::toString( const TQCString& text_ )
{
return TQString::fromUtf8( iconvRun( text_, m_sourceCharSet, TQString::fromLatin1( "utf-8" ) ) );
}
// static
TQCString Z3950Connection::iconvRun( const TQCString& text_, const TQString& fromCharSet_, const TQString& toCharSet_ )
{
#ifdef HAVE_YAZ
if ( text_.isEmpty() )
{
return text_;
}
if ( fromCharSet_ == toCharSet_ )
{
return text_;
}
yaz_iconv_t cd = yaz_iconv_open( toCharSet_.latin1(), fromCharSet_.latin1() );
if ( !cd )
{
// maybe it's iso 5426, which we sorta support
TQString charSetLower = fromCharSet_.lower();
charSetLower.remove( '-' ).remove( ' ' );
if ( charSetLower == Latin1Literal( "iso5426" ) )
{
return iconvRun( Iso5426Converter::toUtf8( text_ ).utf8(), TQString::fromLatin1( "utf-8" ), toCharSet_ );
}
else if ( charSetLower == Latin1Literal( "iso6937" ) )
{
return iconvRun( Iso6937Converter::toUtf8( text_ ).utf8(), TQString::fromLatin1( "utf-8" ), toCharSet_ );
}
kdWarning() << "Z3950Connection::iconvRun() - conversion from " << fromCharSet_
<< " to " << toCharSet_ << " is unsupported" << endl;
return text_;
}
const char* input = text_;
size_t inlen = text_.length();
size_t outlen = 2 * inlen; // this is enough, right?
TQMemArray<char> result0( outlen );
char* result = result0.data();
int r = yaz_iconv( cd, const_cast<char**>( &input ), &inlen, &result, &outlen );
if ( r <= 0 )
{
kdDebug() << "Z3950Connection::iconvRun() - can't decode buffer" << endl;
return text_;
}
// bug in yaz, need to flush buffer to catch last character
yaz_iconv( cd, 0, 0, &result, &outlen );
// length is pointer difference
size_t len = result - result0;
TQCString output = TQCString( result0, len + 1 );
// kdDebug() << "-------------------------------------------" << endl;
// kdDebug() << output << endl;
// kdDebug() << "-------------------------------------------" << endl;
yaz_iconv_close( cd );
return output;
#endif
return text_;
}
TQString Z3950Connection::toXML( const TQCString& marc_, const TQString& charSet_ )
{
#ifdef HAVE_YAZ
if ( marc_.isEmpty() )
{
kdDebug() << "Z3950Connection::toXML() - empty string" << endl;
return TQString::null;
}
yaz_iconv_t cd = yaz_iconv_open( "utf-8", charSet_.latin1() );
if ( !cd )
{
// maybe it's iso 5426, which we sorta support
TQString charSetLower = charSet_.lower();
charSetLower.remove( '-' ).remove( ' ' );
if ( charSetLower == Latin1Literal( "iso5426" ) )
{
return toXML( Iso5426Converter::toUtf8( marc_ ).utf8(), TQString::fromLatin1( "utf-8" ) );
}
else if ( charSetLower == Latin1Literal( "iso6937" ) )
{
return toXML( Iso6937Converter::toUtf8( marc_ ).utf8(), TQString::fromLatin1( "utf-8" ) );
}
kdWarning() << "Z3950Connection::toXML() - conversion from " << charSet_ << " is unsupported" << endl;
return TQString::null;
}
yaz_marc_t mt = yaz_marc_create();
yaz_marc_iconv( mt, cd );
yaz_marc_xml( mt, YAZ_MARC_MARCXML );
// first 5 bytes are length
bool ok;
#if YAZ_VERSIONL < 0x030000
int len = marc_.left( 5 ).toInt( &ok );
#else
size_t len = marc_.left( 5 ).toInt( &ok );
#endif
if ( ok && ( len < 25 || len > 100000 ) )
{
kdDebug() << "Z3950Connection::toXML() - bad length: " << ( ok ? len : -1 ) << endl;
return TQString::null;
}
#if YAZ_VERSIONL < 0x030000
char* result;
#else
const char* result;
#endif
int r = yaz_marc_decode_buf( mt, marc_, -1, &result, &len );
if ( r <= 0 )
{
kdDebug() << "Z3950Connection::toXML() - can't decode buffer" << endl;
return TQString::null;
}
TQString output = TQString::fromLatin1( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" );
output += TQString::fromUtf8( TQCString( result, len + 1 ), len + 1 );
// kdDebug() << TQCString(result) << endl;
// kdDebug() << "-------------------------------------------" << endl;
// kdDebug() << output << endl;
yaz_iconv_close( cd );
yaz_marc_destroy( mt );
return output;
#else // no yaz
return TQString::null;
#endif
}