You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
125 lines
3.7 KiB
125 lines
3.7 KiB
//
|
|
// HtSGMLCodec.cc
|
|
//
|
|
// HtSGMLCodec: A Specialized HtWordCodec class to convert between SGML
|
|
// ISO 8859-1 entities and high-bit characters.
|
|
//
|
|
// Part of the ht://Dig package <http://www.htdig.org/>
|
|
// Copyright (c) 1995-2004 The ht://Dig Group
|
|
// For copyright details, see the file COPYING in your distribution
|
|
// or the GNU Library General Public License (LGPL) version 2 or later
|
|
// <http://www.gnu.org/copyleft/lgpl.html>
|
|
//
|
|
// $Id: HtSGMLCodec.cc,v 1.6 2004/06/01 18:25:01 angusgb Exp $
|
|
//
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "htconfig.h"
|
|
#endif /* HAVE_CONFIG_H */
|
|
|
|
#include "HtSGMLCodec.h"
|
|
#include "HtConfiguration.h"
|
|
|
|
// Constructor: parses the appropriate parameters using the
|
|
// encapsulated HtWordCodec class.
|
|
// Only used in privacy.
|
|
HtSGMLCodec::HtSGMLCodec()
|
|
{
|
|
HtConfiguration* config= HtConfiguration::config();
|
|
int translate_latin1 = config->Boolean("translate_latin1", 1);
|
|
StringList *myTextFromList = new StringList(); // For &foo;
|
|
StringList *myNumFromList = new StringList(); // For &#nnn;
|
|
StringList *myToList = new StringList();
|
|
String myTextFromString(770); // Full text list
|
|
|
|
// Is this really the best way to do this?
|
|
if (!translate_latin1 )
|
|
{
|
|
myTextFromString = " ";
|
|
}
|
|
else
|
|
{
|
|
// this set has been slightly modified in order to manage the € entity
|
|
// the resulting charset is therefore a ISO-8859-1 partially moved to ISO-8859-15
|
|
myTextFromString = " |¡|¢|£|€|¥|¦|§|";
|
|
myTextFromString << "¨|©|ª|«|¬|­|®|¯|°|";
|
|
myTextFromString << "±|²|³|´|µ|¶|·|¸|";
|
|
myTextFromString << "¹|º|»|¼|½|¾|¿|À|";
|
|
myTextFromString << "Á|Â|Ã|Ä|Å|Æ|Ç|È|";
|
|
myTextFromString << "É|Ê|Ë|Ì|Í|Î|Ï|Ð|";
|
|
myTextFromString << "Ñ|Ò|Ó|Ô|Õ|Ö|×|Ø|";
|
|
myTextFromString << "Ù|Ú|Û|Ü|Ý|Þ|ß|à|";
|
|
myTextFromString << "á|â|ã|ä|å|æ|ç|è|";
|
|
myTextFromString << "é|ê|ë|ì|í|î|ï|ð|";
|
|
myTextFromString << "ñ|ò|ó|ô|õ|ö|÷|ø|";
|
|
myTextFromString << "ù|ú|û|ü|ý|þ|ÿ";
|
|
}
|
|
|
|
myTextFromList->Create(myTextFromString, '|');
|
|
|
|
for (int i = 160; i <= 255; i++)
|
|
{
|
|
String temp = 0;
|
|
temp << (char) i;
|
|
myToList->Add(temp);
|
|
|
|
temp = 0;
|
|
temp << "&#" << i << ";";
|
|
myNumFromList->Add(temp);
|
|
if (!translate_latin1 )
|
|
break;
|
|
}
|
|
|
|
// Now let's take care of the low-bit characters with encodings.
|
|
myTextFromList->Add(""");
|
|
myToList->Add("\"");
|
|
myNumFromList->Add(""");
|
|
|
|
myTextFromList->Add("&");
|
|
myToList->Add("&");
|
|
myNumFromList->Add("&");
|
|
|
|
myTextFromList->Add("<");
|
|
myToList->Add("<");
|
|
myNumFromList->Add("<");
|
|
|
|
myTextFromList->Add(">");
|
|
myToList->Add(">");
|
|
myNumFromList->Add(">");
|
|
|
|
myTextWordCodec = new HtWordCodec(myTextFromList, myToList, '|');
|
|
myNumWordCodec = new HtWordCodec(myNumFromList, myToList, '|');
|
|
}
|
|
|
|
|
|
HtSGMLCodec::~HtSGMLCodec()
|
|
{
|
|
delete myTextWordCodec;
|
|
delete myNumWordCodec;
|
|
}
|
|
|
|
|
|
// Supposedly used as HtSGMLCodec::instance()->ErrMsg()
|
|
// to check if HtWordCodec liked what was fed.
|
|
String& HtSGMLCodec::ErrMsg()
|
|
{
|
|
return myErrMsg;
|
|
}
|
|
|
|
|
|
// Canonical singleton interface.
|
|
HtSGMLCodec *
|
|
HtSGMLCodec::instance()
|
|
{
|
|
static HtSGMLCodec *_instance = 0;
|
|
|
|
if (_instance == 0)
|
|
{
|
|
_instance = new HtSGMLCodec();
|
|
}
|
|
|
|
return _instance;
|
|
}
|
|
|
|
// End of HtSGMLCodec.cc
|