You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tdeaccessibility/kttsd/plugins/festivalint/festivalintproc.cpp

663 lines
24 KiB

/***************************************************** vim:set ts=4 sw=4 sts=4:
Main speaking functions for the Festival (Interactive) Plug in
-------------------
Copyright:
(C) 2004 by Gary Cramblitt <garycramblitt@comcast.net>
-------------------
Original author: Gary Cramblitt <garycramblitt@comcast.net>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
******************************************************************************/
// C++ includes.
#include <math.h>
// TQt includes.
#include <tqstring.h>
#include <tqstringlist.h>
#include <tqthread.h>
#include <tqtextcodec.h>
// KDE includes.
#include <kdebug.h>
#include <kconfig.h>
#include <kstandarddirs.h>
// KTTS includes.
#include "utils.h"
// FestivalInt includes.
#include "festivalintproc.h"
#include "festivalintproc.moc"
/** Constructor */
FestivalIntProc::FestivalIntProc( TQObject* parent, const char* name, const TQStringList& ) :
PlugInProc( parent, name ){
// kdDebug() << "FestivalIntProc::FestivalIntProc: Running" << endl;
m_ready = true;
m_writingStdin = false;
m_waitingQueryVoices = false;
m_waitingStop = false;
m_festProc = 0;
m_state = psIdle;
m_supportsSSML = ssUnknown;
m_languageCode = "en";
m_codec = TQTextCodec::codecForName("ISO8859-1");
}
/** Destructor */
FestivalIntProc::~FestivalIntProc(){
// kdDebug() << "FestivalIntProc::~FestivalIntProc: Running" << endl;
if (m_festProc)
{
if (m_festProc->isRunning())
{
if (m_ready)
{
m_state = psIdle;
// kdDebug() << "FestivalIntProc::~FestivalIntProc: telling Festival to quit." << endl;
m_ready = false;
m_waitingStop = true;
m_festProc->writeStdin("(quit)", true);
}
else
{
// kdDebug() << "FestivalIntProc::~FestivalIntProc: killing Festival." << endl;
m_waitingStop = true;
m_festProc->kill();
}
}
delete m_festProc;
}
}
/** Initialize the speech */
bool FestivalIntProc::init(KConfig *config, const TQString &configGroup)
{
// kdDebug() << "FestivalIntProc::init: Initializing plug in: Festival" << endl;
config->setGroup(configGroup);
m_voiceCode = config->readEntry("Voice");
m_festivalExePath = config->readEntry("FestivalExecutablePath", "festival");
// kdDebug() << "---- The code for the selected voice " << config->readEntry("Voice") << " is " << voiceCode << endl;
m_time = config->readNumEntry("time", 100);
m_pitch = config->readNumEntry("pitch", 100);
m_volume = config->readNumEntry("volume", 100);
// If voice should be pre-loaded, start Festival and load the voice.
m_preload = config->readBoolEntry("Preload", false);
m_languageCode = config->readEntry("LanguageCode", "en");
m_supportsSSML = static_cast<SupportsSSML>(config->readNumEntry("SupportsSSML", ssUnknown));
TQString codecName = config->readEntry("Codec", "Latin1");
m_codec = codecNameToCodec(codecName);
if (m_preload) startEngine(m_festivalExePath, m_voiceCode, m_languageCode, m_codec);
return true;
}
/**
* Say a text. Synthesize and audibilize it.
* @param text The text to be spoken.
*
* If the plugin supports asynchronous operation, it should return immediately.
*/
void FestivalIntProc::sayText(const TQString &text)
{
synth(m_festivalExePath, text, TQString(), m_voiceCode, m_time, m_pitch, m_volume,
m_languageCode, m_codec);
}
/**
* Synthesize text into an audio file, but do not send to the audio device.
* @param text The text to be synthesized.
* @param suggestedFilename Full pathname of file to create. The plugin
* may ignore this parameter and choose its own
* filename. KTTSD will query the generated
* filename using getFilename().
*
* If the plugin supports asynchronous operation, it should return immediately.
*/
void FestivalIntProc::synthText(const TQString& text, const TQString& suggestedFilename)
{
synth(m_festivalExePath, text, suggestedFilename, m_voiceCode, m_time, m_pitch, m_volume,
m_languageCode, m_codec);
}
/**
* Sends command to Festival to query for a list of supported voice codes.
* Fires queryVoicesFinished when completed.
* @return False if busy doing something else and therefore cannot
* do the query.
*/
bool FestivalIntProc::queryVoices(const TQString &festivalExePath)
{
// kdDebug() << "FestivalIntProc::queryVoices: Running" << endl;
if (m_state != psIdle && m_waitingQueryVoices && m_waitingStop) return false;
// Start Festival if not already running.
startEngine(festivalExePath, TQString(), m_languageCode, m_codec);
// Set state, waiting for voice codes list from Festival.
m_waitingQueryVoices = true;
// Voice rab_diphone is needed in order to support SSML.
m_supportsSSML = ssUnknown;
// Send command to query the voice codes.
sendToFestival("(print (mapcar (lambda (pair) (car pair)) voice-locations))");
return true;
}
/**
* Start Festival engine.
* @param festivalExePath Path to the Festival executable, or just "festival".
* @param voiceCode Voice code in which to speak text.
* @param languageCode Language code, for example, "en".
*/
void FestivalIntProc::startEngine(const TQString &festivalExePath, const TQString &voiceCode,
const TQString &languageCode, TQTextCodec* codec)
{
// Initialize Festival only if it's not initialized.
if (m_festProc)
{
// Stop Festival if a different EXE is requested or different language code.
// If festProc exists but is not running, it is because it was stopped.
if ((festivalExePath != m_festivalExePath) || !m_festProc->isRunning() ||
(m_languageCode != languageCode) || (codec->name() != m_codec->name()))
{
delete m_festProc;
m_festProc = 0;
}
}
if(!m_festProc)
{
// kdDebug()<< "FestivalIntProc::startEngine: Creating Festival object" << endl;
m_festProc = new KProcess;
*m_festProc << festivalExePath;
*m_festProc << "--interactive";
m_festProc->setEnvironment("LANG", languageCode + "." + codec->mimeName());
m_festProc->setEnvironment("LC_CTYPE", languageCode + "." + codec->mimeName());
// kdDebug() << "FestivalIntProc::startEngine: setting LANG = LC_CTYPE = " << languageCode << "." << codec->mimeName() << endl;
connect(m_festProc, TQT_SIGNAL(processExited(KProcess*)),
this, TQT_SLOT(slotProcessExited(KProcess*)));
connect(m_festProc, TQT_SIGNAL(receivedStdout(KProcess*, char*, int)),
this, TQT_SLOT(slotReceivedStdout(KProcess*, char*, int)));
connect(m_festProc, TQT_SIGNAL(receivedStderr(KProcess*, char*, int)),
this, TQT_SLOT(slotReceivedStderr(KProcess*, char*, int)));
connect(m_festProc, TQT_SIGNAL(wroteStdin(KProcess*)),
this, TQT_SLOT(slotWroteStdin(KProcess*)));
}
if (!m_festProc->isRunning())
{
// kdDebug() << "FestivalIntProc::startEngine: Starting Festival process" << endl;
m_runningVoiceCode = TQString();
m_runningTime = 100;
m_runningPitch = 100;
m_ready = false;
m_outputQueue.clear();
if (m_festProc->start(KProcess::NotifyOnExit, KProcess::All))
{
// kdDebug()<< "FestivalIntProc:startEngine: Festival initialized" << endl;
m_festivalExePath = festivalExePath;
m_languageCode = languageCode;
m_codec = codec;
// Load the SABLE to Wave module.
sendToFestival("(load \"" +
KGlobal::dirs()->resourceDirs("data").last() + "kttsd/festivalint/sabletowave.scm\")");
}
else
{
kdDebug() << "FestivalIntProc::startEngine: Error starting Festival process. Is festival in the PATH?" << endl;
m_ready = true;
m_state = psIdle;
return;
}
}
// If we just started Festival, or voiceCode has changed, send code to Festival.
if (m_runningVoiceCode != voiceCode && !voiceCode.isEmpty()) {
sendToFestival("(voice_" + voiceCode + ")");
m_runningVoiceCode = voiceCode;
}
}
/**
* Say or Synthesize text.
* @param festivalExePath Path to the Festival executable, or just "festival".
* @param text The text to be synthesized.
* @param suggestedFilename If not Null, synthesize only to this filename, otherwise
* synthesize and audibilize the text.
* @param voiceCode Voice code in which to speak text.
* @param time Speed percentage. 50 to 200. 200% = 2x normal.
* @param pitch Pitch persentage. 50 to 200.
* @param volume Volume percentage. 50 to 200.
* @param languageCode Language code, for example, "en".
*/
void FestivalIntProc::synth(
const TQString &festivalExePath,
const TQString &text,
const TQString &synthFilename,
const TQString &voiceCode,
int time,
int pitch,
int volume,
const TQString &languageCode,
TQTextCodec* codec)
{
// kdDebug() << "FestivalIntProc::synth: festivalExePath = " << festivalExePath
// << " voiceCode = " << voiceCode << endl;
// Initialize Festival only if it's not initialized
startEngine(festivalExePath, voiceCode, languageCode, codec);
// If we just started Festival, or rate changed, tell festival.
if (m_runningTime != time) {
TQString timeMsg;
if (voiceCode.contains("_hts") > 0)
{
// Map 50% to 200% onto 0 to 1000.
// slider = alpha * (log(percent)-log(50))
// with alpha = 1000/(log(200)-log(50))
double alpha = 1000 / (log(200) - log(50));
int slider = (int)floor (0.5 + alpha * (log(time)-log(50)));
// Center at 0.
slider = slider - 500;
// Map -500 to 500 onto 0.15 to -0.15.
float stretchValue = -float(slider) * 0.15 / 500.0;
timeMsg = TQString("(set! hts_duration_stretch %1)").tqarg(
stretchValue, 0, 'f', 3);
}
else
timeMsg = TQString("(Parameter.set 'Duration_Stretch %1)").tqarg(
1.0/(float(time)/100.0), 0, 'f', 2);
sendToFestival(timeMsg);
m_runningTime = time;
}
// If we just started Festival, or pitch changed, tell festival.
if (m_runningPitch != pitch) {
// Pitch values range from 50 to 200 %, with 100% as the midpoint,
// while frequency values range from 41 to 500 with 105 as the "midpoint".
int pitchValue;
if (pitch <= 100)
{
pitchValue = (((pitch - 50) * 64) / 50) + 41;
}
else
{
pitchValue = (((pitch - 100) * 395) / 100) + 105;
}
TQString pitchMsg = TQString(
"(set! int_lr_params '((target_f0_mean %1) (target_f0_std 14)"
"(model_f0_mean 170) (model_f0_std 34)))").tqarg(pitchValue, 0, 10);
sendToFestival(pitchMsg);
m_runningPitch = pitch;
}
TQString saidText = text;
// Split really long sentences into shorter sentences, by looking for commas and converting
// to periods.
int len = saidText.length();
while (len > c_tooLong)
{
len = saidText.findRev(", ", len - (c_tooLong * 2 / 3), true);
if (len != -1)
{
TQString c = saidText.mid(len+2, 1);
if (c != c.upper())
{
saidText.replace(len, 2, ". ");
saidText.replace(len+2, 1, c.upper());
kdDebug() << "FestivalIntProc::synth: Splitting long sentence at " << len << endl;
// kdDebug() << saidText << endl;
}
}
}
// Encode quotation characters.
saidText.replace("\\\"", "#!#!");
saidText.replace("\"", "\\\"");
saidText.replace("#!#!", "\\\"");
// Remove certain comment characters.
saidText.replace("--", "");
// Ok, let's rock.
if (synthFilename.isNull())
{
m_state = psSaying;
m_synthFilename = TQString();
// kdDebug() << "FestivalIntProc::synth: Saying text: '" << saidText << "' using Festival plug in with voice "
// << voiceCode << endl;
saidText = "(SayText \"" + saidText + "\")";
sendToFestival(saidText);
} else {
m_state = psSynthing;
m_synthFilename = synthFilename;
// Volume must be given for each utterance.
// Volume values range from 50 to 200%, with 100% = normal.
// Map onto rescale range of .5 to 2.
float volumeValue = float(volume) / 100;
// Expand to range .25 to 4.
// float volumeValue = exp(log(volumeValue) * 2);
// kdDebug() << "FestivalIntProc::synth: Synthing text: '" << saidText << "' using Festival plug in with voice "
// << voiceCode << endl;
if (isSable(saidText))
{
// Synth the text and adjust volume.
saidText =
"(ktts_sabletowave \"" + saidText + "\" \"" +
synthFilename + "\" " +
TQString::number(volumeValue) + ")";
}
else
{
saidText =
// Suppress pause at the beginning of each utterance.
"(define (insert_initial_pause utt) "
"(item.set_feat (utt.relation.first utt 'Segment) 'end 0.0))"
// Synth the text and adjust volume.
"(set! utt1 (Utterance Text \"" + saidText +
"\"))(utt.synth utt1)" +
"(utt.wave.rescale utt1 " + TQString::number(volumeValue) + " t)" +
"(utt.save.wave utt1 \"" + synthFilename + "\")";
}
sendToFestival(saidText);
}
}
/**
* If ready for more output, sends the given text to Festival process, otherwise,
* puts it in the queue.
* @param text Text to send or queue.
*/
void FestivalIntProc::sendToFestival(const TQString& text)
{
if (text.isNull()) return;
m_outputQueue.append(text);
sendIfReady();
}
/**
* If Festival is ready for more input and there is more output to send, send it.
* To be ready for more input, the Stdin buffer must be empty and the "festival>"
* prompt must have been received (m_ready = true).
* @return False when Festival is ready for more input
* but there is nothing to be sent, or if Festival
* has exited.
*/
bool FestivalIntProc::sendIfReady()
{
if (!m_ready) return true;
if (m_writingStdin) return true;
if (m_outputQueue.isEmpty()) return false;
if (!m_festProc->isRunning()) return false;
TQString text = m_outputQueue[0];
text += "\n";
TQCString encodedText;
if (m_codec)
encodedText = m_codec->fromUnicode(text);
else
encodedText = text.latin1(); // Should not happen, but just in case.
m_outputQueue.pop_front();
m_ready = false;
// kdDebug() << "FestivalIntProc::sendIfReady: sending to Festival: " << text << endl;
m_writingStdin = true;
m_festProc->writeStdin(encodedText, encodedText.length());
return true;
}
/**
* Determine if the text has SABLE tags. If so, we will have to use a different
* synthesis method.
*/
bool FestivalIntProc::isSable(const TQString &text)
{
return KttsUtils::hasRootElement( text, "SABLE" );
}
/**
* Get the generated audio filename from synthText.
* @return Name of the audio file the plugin generated.
* Null if no such file.
*
* The plugin must not re-use the filename.
*/
TQString FestivalIntProc::getFilename() { return m_synthFilename; }
/**
* Stop text
*/
void FestivalIntProc::stopText(){
// kdDebug() << "FestivalIntProc::stopText: Running" << endl;
if (m_festProc)
{
if (m_festProc->isRunning())
{
if (m_ready)
m_state = psIdle;
else
{
// If using a preloaded voice, killing Festival is a bad idea because of
// huge startup times. So if synthing (not saying), let Festival continue
// synthing. When it completes, we will emit the stopped signal.
if (m_preload && (m_state == psSynthing))
{
m_waitingStop = true;
// kdDebug() << "FestivalIntProc::stopText: Optimizing stopText() for preloaded voice." << endl;
}
else
{
// kdDebug() << "FestivalIntProc::stopText: killing Festival." << endl;
m_waitingStop = true;
m_festProc->kill();
}
}
} else m_state = psIdle;
} else m_state = psIdle;
}
void FestivalIntProc::slotProcessExited(KProcess*)
{
// kdDebug() << "FestivalIntProc:slotProcessExited: Festival process has exited." << endl;
m_ready = true;
pluginState prevState = m_state;
if (m_waitingStop || m_waitingQueryVoices)
{
if (m_waitingStop)
{
m_waitingStop = false;
m_state = psIdle;
// kdDebug() << "FestivalIntProc::slotProcessExited: emitting stopped signal" << endl;
emit stopped();
}
if (m_waitingQueryVoices)
{
// kdDebug() << "FestivalIntProc::slotProcessExited: canceling queryVoices operation" << endl;
m_waitingQueryVoices = false;
m_state = psIdle;
}
} else {
if (m_state != psIdle) m_state = psFinished;
if (prevState == psSaying)
{
// kdDebug() << "FestivalIntProc::slotProcessExited: emitting sayFinished signal" << endl;
emit sayFinished();
} else
if (prevState == psSynthing)
{
// kdDebug() << "FestivalIntProc::slotProcessExited: emitting synthFinished signal" << endl;
emit synthFinished();
}
}
delete m_festProc;
m_festProc = 0;
m_outputQueue.clear();
}
void FestivalIntProc::slotReceivedStdout(KProcess*, char* buffer, int buflen)
{
TQString buf = TQString::tqfromLatin1(buffer, buflen);
// kdDebug() << "FestivalIntProc::slotReceivedStdout: Received from Festival: " << buf << endl;
bool promptSeen = (buf.contains("festival>") > 0);
bool emitQueryVoicesFinished = false;
TQStringList voiceCodesList;
if (m_waitingQueryVoices && m_outputQueue.isEmpty())
{
// Look for opening ( and closing ).
buf.simplifyWhiteSpace();
if (buf.left(3) == "nil") {
emitQueryVoicesFinished = true;
m_waitingQueryVoices = false;
} else {
if (buf.left(1) == "(")
{
int rightParen = buf.find(')');
if (rightParen > 0)
{
m_waitingQueryVoices = false;
// Extract contents between parens.
buf = buf.mid(1, rightParen - 1);
// Space separated list.
voiceCodesList = TQStringList::split(" ", buf, false);
emitQueryVoicesFinished = true;
}
}
}
}
if (promptSeen)
{
// kdDebug() << "FestivalIntProc::slotReceivedStdout: Prompt seen" << endl;
m_ready = true;
if (!sendIfReady())
{
// kdDebug() << "FestivalIntProc::slotReceivedStdout: All output sent. " << endl;
pluginState prevState = m_state;
if (m_state != psIdle) m_state = psFinished;
if (prevState == psSaying)
{
// kdDebug() << "FestivalIntProc::slotReceivedStdout: emitting sayFinished signal" << endl;
emit sayFinished();
} else
if (prevState == psSynthing)
{
if (m_waitingStop)
{
m_waitingStop = false;
m_state = psIdle;
// kdDebug() << "FestivalIntProc::slotReceivedStdout: emitting optimized stopped signal" << endl;
emit stopped();
}
else
{
// kdDebug() << "FestivalIntProc::slotReceivedStdout: emitting synthFinished signal" << endl;
emit synthFinished();
}
}
}
}
if (emitQueryVoicesFinished)
{
// kdDebug() << "FestivalIntProc::slotReceivedStdout: emitting queryVoicesFinished" << endl;
m_supportsSSML = (voiceCodesList.contains("rab_diphone")) ? ssYes : ssNo;
emit queryVoicesFinished(voiceCodesList);
}
}
void FestivalIntProc::slotReceivedStderr(KProcess*, char* buffer, int buflen)
{
TQString buf = TQString::tqfromLatin1(buffer, buflen);
kdDebug() << "FestivalIntProc::slotReceivedStderr: Received error from Festival: " << buf << endl;
}
void FestivalIntProc::slotWroteStdin(KProcess* /*proc*/)
{
// kdDebug() << "FestivalIntProc::slotWroteStdin: Running" << endl;
m_writingStdin = false;
if (!sendIfReady())
{
// kdDebug() << "FestivalIntProc::slotWroteStdin: all output sent" << endl;
pluginState prevState = m_state;
if (m_state != psIdle) m_state = psFinished;
if (prevState == psSaying)
{
// kdDebug() << "FestivalIntProc::slotWroteStdin: emitting sayFinished signal" << endl;
emit sayFinished();
} else
if (prevState == psSynthing)
{
// kdDebug() << "FestivalIntProc::slotWroteStdin: emitting synthFinished signal" << endl;
emit synthFinished();
}
}
}
bool FestivalIntProc::isReady() { return m_ready; }
/**
* Return the current state of the plugin.
* This function only makes sense in asynchronous mode.
* @return The pluginState of the plugin.
*
* @see pluginState
*/
pluginState FestivalIntProc::getState() { return m_state; }
/**
* Acknowledges a finished state and resets the plugin state to psIdle.
*
* If the plugin is not in state psFinished, nothing happens.
* The plugin may use this call to do any post-processing cleanup,
* for example, blanking the stored filename (but do not delete the file).
* Calling program should call getFilename prior to ackFinished.
*/
void FestivalIntProc::ackFinished()
{
if (m_state == psFinished)
{
m_state = psIdle;
m_synthFilename = TQString();
}
}
/**
* Returns True if the plugin supports asynchronous processing,
* i.e., returns immediately from sayText or synthText.
* @return True if this plugin supports asynchronous processing.
*
* If the plugin returns True, it must also implement @ref getState .
* It must also emit @ref sayFinished or @ref synthFinished signals when
* saying or synthesis is completed.
*/
bool FestivalIntProc::supportsAsync() { return true; }
/**
* Returns True if the plugin supports synthText method,
* i.e., is able to synthesize text to a sound file without
* audibilizing the text.
* @return True if this plugin supports synthText method.
*/
bool FestivalIntProc::supportsSynth() { return true; }
/**
* Returns the name of an XSLT stylesheet that will convert a valid SSML file
* into a format that can be processed by the synth. For example,
* The Festival plugin returns a stylesheet that will convert SSML into
* SABLE. Any tags the synth cannot handle should be stripped (leaving
* their text contents though). The default stylesheet strips all
* tags and converts the file to plain text.
* @return Name of the XSLT file.
*/
TQString FestivalIntProc::getSsmlXsltFilename()
{
if (m_supportsSSML == ssYes)
return KGlobal::dirs()->resourceDirs("data").last() + "kttsd/festivalint/xslt/SSMLtoSable.xsl";
else
return PlugInProc::getSsmlXsltFilename();
}