/***************************************************** vim:set ts=4 sw=4 sts=4:
SSMLConvert class
This class is in charge of converting SSML text into a format that can
be handled by individual synths .
- - - - - - - - - - - - - - - - - - -
Copyright :
( C ) 2004 by Paul Giannaros < ceruleanblaze @ gmail . com >
( C ) 2004 by Gary Cramblitt < garycramblitt @ comcast . net >
- - - - - - - - - - - - - - - - - - -
Original author : Paul Giannaros < ceruleanblaze @ gmail . com >
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/***************************************************************************
* *
* This program is free software ; you can redistribute it and / or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation ; version 2 of the License . *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
// TQt includes.
# include <tqstring.h>
# include <tqstringlist.h>
# include <tqdom.h>
# include <tqfile.h>
# include <tqtextstream.h>
// KDE includes.
# include <tdeversion.h>
# include <kstandarddirs.h>
# include <kprocess.h>
# include <tdetempfile.h>
# include <kdebug.h>
// SSMLConvert includes.
# include "ssmlconvert.h"
# include "ssmlconvert.moc"
/// Constructor.
SSMLConvert : : SSMLConvert ( ) {
m_talkers = TQStringList ( ) ;
m_xsltProc = 0 ;
m_state = tsIdle ;
}
/// Constructor. Set the talkers to be used as reference for entered text.
SSMLConvert : : SSMLConvert ( const TQStringList & talkers ) {
m_talkers = talkers ;
m_xsltProc = 0 ;
m_state = tsIdle ;
}
/// Destructor.
SSMLConvert : : ~ SSMLConvert ( ) {
delete m_xsltProc ;
if ( ! m_inFilename . isEmpty ( ) ) TQFile : : remove ( m_inFilename ) ;
if ( ! m_outFilename . isEmpty ( ) ) TQFile : : remove ( m_outFilename ) ;
}
/// Set the talkers to be used as reference for entered text.
void SSMLConvert : : setTalkers ( const TQStringList & talkers ) {
m_talkers = talkers ;
}
TQString SSMLConvert : : extractTalker ( const TQString & talkercode ) {
TQString t = talkercode . section ( " synthesizer= " , 1 , 1 ) ;
t = t . section ( ' " ' , 1 , 1 ) ;
if ( t . contains ( " flite " ) )
return " flite " ;
else
return t . left ( t . find ( " " ) ) . lower ( ) ;
}
/**
* Return the most appropriate talker for the text to synth talker code .
* @ param text the text that will be parsed .
* @ returns the appropriate talker for the job as a talker code .
*
* The appropriate talker is the one that has the most features that are required in some
* SSML markup . In the future i ' m hoping to make the importance of individual features
* configurable , but better to walk before you can run .
* Currently , the searching method in place is like a filter : Those that meet the criteria we ' re
* searchin for stay while others are sifted out . This should leave us with the right talker to use .
* It ' s not a very good method , but should be appropriate in most cases and should do just fine for now .
*
* As it stands , here is the list of things that are looked for , in order of most importance :
* - Language
* Obviously the most important . If a language is specified , look for the talkers that support it .
* Default to en ( or some form of en - en_US , en_GB , etc ) . Only one language at a time is allowed
* at the moment , and must be specified in the root speak element ( < speak xml : lang = " en-US " > )
* - Gender
* If a gender is specified , look for talkers that comply . There is no default so if no gender is
* specified , no talkers will be removed . The only gender that will be searched for is the one
* specified in the root speak element . This should change in the future .
* - Prosody
* Check if prosody modification is allowed by the talker . Currently this is hardcoded ( it
* is stated which talkers do and do not in a variable somewhere ) .
*
* Bear in mind that the XSL stylesheet that will be applied to the SSML is the same regardless
* of the how the talker is chosen , meaning that you don ' t lose some features of the talker if this
* search doesn ' t encompass them .
*
* TQDom is the item of choice for the matching . Just walk the tree . .
*/
TQString SSMLConvert : : appropriateTalker ( const TQString & text ) const {
TQDomDocument ssml ;
ssml . setContent ( text , false ) ; // No namespace processing.
/// Matches are stored here. Obviously to begin with every talker matches.
TQStringList matches = m_talkers ;
/// Check that this is (well formed) SSML and all our searching will not be in vain.
TQDomElement root = ssml . documentElement ( ) ;
if ( root . tagName ( ) ! = " speak " ) {
// Not SSML.
return TQString ( ) ;
}
/**
* For each rule that we are looking through , iterate over all currently
* matching talkers and remove all the talkers that don ' t match .
*
* Storage for talker code components .
*/
TQString talklang , talkvoice , talkgender , talkvolume , talkrate , talkname ;
kdDebug ( ) < < " SSMLConvert::appropriateTalker: BEFORE LANGUAGE SEARCH: " < < matches . join ( " " ) < < endl ; ;
/**
* Language searching
*/
if ( root . hasAttribute ( " xml:lang " ) ) {
TQString lang = root . attribute ( " xml:lang " ) ;
kdDebug ( ) < < " SSMLConvert::appropriateTalker: xml:lang found ( " < < lang < < " ) " < < endl ;
/// If it is set to en*, then match all english speakers. They all sound the same anyways.
if ( lang . contains ( " en- " ) ) {
kdDebug ( ) < < " SSMLConvert::appropriateTalker: English " < < endl ;
lang = " en " ;
}
/// Find all hits and place them in matches. We don't search for the closing " because if
/// the talker emits lang="en-UK" or something we'll be ignoring it, which we don't what.
matches = matches . grep ( " lang= \" " + lang ) ;
}
else {
kdDebug ( ) < < " SSMLConvert::appropriateTalker: no xml:lang found. Defaulting to en.. " < < endl ;
matches = matches . grep ( " lang= \" en " ) ;
}
kdDebug ( ) < < " SSMLConvert::appropriateTalker: AFTER LANGUAGE SEARCH: " < < matches . join ( " " ) < < endl ; ;
/**
* Gender searching
* If , for example , male is specified and only female is found ,
* ignore the choice and just use female .
*/
if ( root . hasAttribute ( " gender " ) ) {
TQString gender = root . attribute ( " gender " ) ;
kdDebug ( ) < < " SSMLConvert::appropriateTalker: gender found ( " < < gender < < " ) " < < endl ;
/// If the gender found is not 'male' or 'female' then ignore it.
if ( ! ( gender = = " male " | | gender = = " female " ) ) {
/// Make sure that we don't strip away all the talkers because of no matches.
if ( matches . grep ( " gender= \" " + gender ) . count ( ) > = 1 )
matches = matches . grep ( " gender= \" " + gender ) ;
}
}
else {
kdDebug ( ) < < " SSMLConvert::appropriateTalker: no gender found. " < < endl ;
}
/**
* Prosody
* Search for talkers that allow modification of the synth output - louder , higher ,
* slower , etc . There should be a direct way to query each synth to find out if this
* is supported ( some function in PlugInConf ) , but for now , hardcode all the way : (
*/
/// Known to support (feel free to add to the list and if search):
/// Festival Int (not flite), Hadifix
if ( matches . grep ( " synthesizer= \" Festival Interactive " ) . count ( ) > = 1 | |
matches . grep ( " synthesizer= \" Hadifix " ) . count ( ) > = 1 ) {
kdDebug ( ) < < " SSMLConvert::appropriateTalker: Prosody allowed " < < endl ;
TQStringList tmpmatches = matches . grep ( " synthesizer= \" Festival Interactive " ) ;
matches = matches . grep ( " synthesizer= \" Hadifix " ) ;
matches = tmpmatches + matches ;
}
else
kdDebug ( ) < < " SSMLConvert::appropriateTalker: No prosody-supporting talkers found " < < endl ;
/// Return the first match that complies. Maybe a discrete way to
/// choose between all the matches could be offered in the future. Some form of preference.
return matches [ 0 ] ;
}
/**
* Applies the spreadsheet for a talker to the SSML and returns the talker - native output .
* @ param text The markup to apply the spreadsheet to .
* @ param xsltFilename The name of the stylesheet file that will be applied ( i . e freetts , flite ) .
* @ returns False if an error occurs .
*
* This converts a piece of SSML into a format the given talker can understand . It applies
* an XSLT spreadsheet to the SSML and returns the output .
*
* Emits transformFinished signal when completed . Caller then calls getOutput to retrieve
* the transformed text .
*/
bool SSMLConvert : : transform ( const TQString & text , const TQString & xsltFilename ) {
m_xsltFilename = xsltFilename ;
/// Write @param text to a temporary file.
KTempFile inFile ( locateLocal ( " tmp " , " kttsd- " ) , " .ssml " ) ;
m_inFilename = inFile . file ( ) - > name ( ) ;
TQTextStream * wstream = inFile . textStream ( ) ;
if ( wstream = = 0 ) {
/// wtf...
kdDebug ( ) < < " SSMLConvert::transform: Can't write to " < < m_inFilename < < endl ; ;
return false ;
}
// TODO: Is encoding an issue here?
// TODO: It would be nice if we detected whether the XML is properly formed
// with the required xml processing instruction and encoding attribute. If
// not wrap it in such. But maybe this should be handled by SpeechData::setText()?
* wstream < < text ;
inFile . close ( ) ;
# if TDE_VERSION >= TDE_MAKE_VERSION (3,3,0)
inFile . sync ( ) ;
# endif
// Get a temporary output file name.
KTempFile outFile ( locateLocal ( " tmp " , " kttsd- " ) , " .output " ) ;
m_outFilename = outFile . file ( ) - > name ( ) ;
outFile . close ( ) ;
// outFile.unlink(); // only activate this if necessary.
/// Spawn an xsltproc process to apply our stylesheet to our SSML file.
m_xsltProc = new TDEProcess ;
* m_xsltProc < < " xsltproc " ;
* m_xsltProc < < " -o " < < m_outFilename < < " --novalid "
< < m_xsltFilename < < m_inFilename ;
// Warning: This won't compile under KDE 3.2. See FreeTTS::argsToStringList().
// kdDebug() << "SSMLConvert::transform: executing command: " <<
// m_xsltProc->args() << endl;
connect ( m_xsltProc , TQT_SIGNAL ( processExited ( TDEProcess * ) ) ,
this , TQT_SLOT ( slotProcessExited ( TDEProcess * ) ) ) ;
if ( ! m_xsltProc - > start ( TDEProcess : : NotifyOnExit , TDEProcess : : NoCommunication ) )
{
kdDebug ( ) < < " SSMLConvert::transform: Error starting xsltproc " < < endl ;
return false ;
}
m_state = tsTransforming ;
return true ;
}
void SSMLConvert : : slotProcessExited ( TDEProcess * /*proc*/ )
{
m_xsltProc - > deleteLater ( ) ;
m_xsltProc = 0 ;
m_state = tsFinished ;
emit transformFinished ( ) ;
}
/**
* Returns current processing state .
*/
int SSMLConvert : : getState ( ) { return m_state ; }
/**
* Returns the output from call to transform .
*/
TQString SSMLConvert : : getOutput ( )
{
/// Read back the data that was written to /tmp/fileName.output.
TQFile readfile ( m_outFilename ) ;
if ( ! readfile . open ( IO_ReadOnly ) ) {
/// uhh yeah... Issues writing to the SSML file.
kdDebug ( ) < < " SSMLConvert::slotProcessExited: Could not read file " < < m_outFilename < < endl ;
return TQString ( ) ;
}
TQTextStream rstream ( & readfile ) ;
TQString convertedData = rstream . read ( ) ;
readfile . close ( ) ;
// kdDebug() << "SSMLConvert::slotProcessExited: Read SSML file at " + m_inFilename + " and created " + m_outFilename + " based on the stylesheet at " << m_xsltFilename << endl;
// Clean up.
TQFile : : remove ( m_inFilename ) ;
m_inFilename = TQString ( ) ;
TQFile : : remove ( m_outFilename ) ;
m_outFilename = TQString ( ) ;
// Ready for another transform.
m_state = tsIdle ;
return convertedData ;
}