//
// HtSGMLCodec.cc
//
// HtSGMLCodec: A Specialized HtWordCodec class to convert between SGML
// ISO 8859-1 entities and high-bit characters.
//
// Part of the ht://Dig package
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
//
//
// $Id: HtSGMLCodec.cc,v 1.6 2004/06/01 18:25:01 angusgb Exp $
//
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include "HtSGMLCodec.h"
#include "HtConfiguration.h"
// Constructor: parses the appropriate parameters using the
// encapsulated HtWordCodec class.
// Only used in privacy.
HtSGMLCodec::HtSGMLCodec()
{
HtConfiguration* config= HtConfiguration::config();
int translate_latin1 = config->Boolean("translate_latin1", 1);
StringList *myTextFromList = new StringList(); // For &foo;
StringList *myNumFromList = new StringList(); // For nnn;
StringList *myToList = new StringList();
String myTextFromString(770); // Full text list
// Is this really the best way to do this?
if (!translate_latin1 )
{
myTextFromString = " ";
}
else
{
// this set has been slightly modified in order to manage the € entity
// the resulting charset is therefore a ISO-8859-1 partially moved to ISO-8859-15
myTextFromString = " |¡|¢|£|€|¥|¦|§|";
myTextFromString << "¨|©|ª|«|¬||®|¯|°|";
myTextFromString << "±|²|³|´|µ|¶|·|¸|";
myTextFromString << "¹|º|»|¼|½|¾|¿|À|";
myTextFromString << "Á|Â|Ã|Ä|Å|Æ|Ç|È|";
myTextFromString << "É|Ê|Ë|Ì|Í|Î|Ï|Ð|";
myTextFromString << "Ñ|Ò|Ó|Ô|Õ|Ö|×|Ø|";
myTextFromString << "Ù|Ú|Û|Ü|Ý|Þ|ß|à|";
myTextFromString << "á|â|ã|ä|å|æ|ç|è|";
myTextFromString << "é|ê|ë|ì|í|î|ï|ð|";
myTextFromString << "ñ|ò|ó|ô|õ|ö|÷|ø|";
myTextFromString << "ù|ú|û|ü|ý|þ|ÿ";
}
myTextFromList->Create(myTextFromString, '|');
for (int i = 160; i <= 255; i++)
{
String temp = 0;
temp << (char) i;
myToList->Add(temp);
temp = 0;
temp << "" << i << ";";
myNumFromList->Add(temp);
if (!translate_latin1 )
break;
}
// Now let's take care of the low-bit characters with encodings.
myTextFromList->Add(""");
myToList->Add("\"");
myNumFromList->Add(""");
myTextFromList->Add("&");
myToList->Add("&");
myNumFromList->Add("&");
myTextFromList->Add("<");
myToList->Add("<");
myNumFromList->Add("<");
myTextFromList->Add(">");
myToList->Add(">");
myNumFromList->Add(">");
myTextWordCodec = new HtWordCodec(myTextFromList, myToList, '|');
myNumWordCodec = new HtWordCodec(myNumFromList, myToList, '|');
}
HtSGMLCodec::~HtSGMLCodec()
{
delete myTextWordCodec;
delete myNumWordCodec;
}
// Supposedly used as HtSGMLCodec::instance()->ErrMsg()
// to check if HtWordCodec liked what was fed.
String& HtSGMLCodec::ErrMsg()
{
return myErrMsg;
}
// Canonical singleton interface.
HtSGMLCodec *
HtSGMLCodec::instance()
{
static HtSGMLCodec *_instance = 0;
if (_instance == 0)
{
_instance = new HtSGMLCodec();
}
return _instance;
}
// End of HtSGMLCodec.cc