You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
210 lines
4.6 KiB
210 lines
4.6 KiB
//
|
|
// HtWordList.cc
|
|
//
|
|
// HtWordList: Specialized WordList class that can hold a list
|
|
// of words waiting to be inserted in the database.
|
|
//
|
|
// Part of the ht://Dig package <http://www.htdig.org/>
|
|
// Copyright (c) 1995-2004 The ht://Dig Group
|
|
// For copyright details, see the file COPYING in your distribution
|
|
// or the GNU Library General Public License (LGPL) version 2 or later
|
|
// <http://www.gnu.org/copyleft/lgpl.html>
|
|
//
|
|
// $Id: HtWordList.cc,v 1.7 2004/05/28 13:15:12 lha Exp $
|
|
//
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "htconfig.h"
|
|
#endif /* HAVE_CONFIG_H */
|
|
|
|
#include "HtWordList.h"
|
|
#include "HtWordReference.h"
|
|
#include "WordRecord.h"
|
|
#include "WordType.h"
|
|
#include "HtConfiguration.h"
|
|
#include "htString.h"
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <ctype.h>
|
|
|
|
#ifdef HAVE_STD
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#ifdef HAVE_NAMESPACES
|
|
using namespace std;
|
|
#endif
|
|
#else
|
|
#include <iostream.h>
|
|
#include <fstream.h>
|
|
#endif /* HAVE_STD */
|
|
|
|
#include <errno.h>
|
|
|
|
//*****************************************************************************
|
|
// HtWordList::~HtWordList()
|
|
//
|
|
HtWordList::~HtWordList()
|
|
{
|
|
delete words;
|
|
}
|
|
|
|
//*****************************************************************************
|
|
//
|
|
HtWordList::HtWordList(const HtConfiguration& config_arg) :
|
|
WordList(config_arg)
|
|
{
|
|
words = new List;
|
|
}
|
|
|
|
//*****************************************************************************
|
|
//
|
|
void HtWordList::Replace(const WordReference& arg)
|
|
{
|
|
//
|
|
// New word. Create a new reference for it and cache it in the object.
|
|
//
|
|
words->Add(new WordReference(arg));
|
|
}
|
|
|
|
//*****************************************************************************
|
|
// void HtWordList::Flush()
|
|
// Dump the current list of words to the database. After
|
|
// the words have been dumped, the list will be destroyed to make
|
|
// room for the words of the next document.
|
|
//
|
|
void HtWordList::Flush()
|
|
{
|
|
HtWordReference *wordRef;
|
|
|
|
// Provided for backwards compatibility
|
|
if (!isopen)
|
|
Open(config["word_db"], O_RDWR);
|
|
|
|
words->Start_Get();
|
|
while ((wordRef = (HtWordReference *) words->Get_Next()))
|
|
{
|
|
if (wordRef->Word().length() == 0) {
|
|
cerr << "HtWordList::Flush: unexpected empty word\n";
|
|
continue;
|
|
}
|
|
|
|
Override(*wordRef);
|
|
}
|
|
|
|
// Cleanup
|
|
words->Destroy();
|
|
}
|
|
|
|
//*****************************************************************************
|
|
// void HtWordList::Skip()
|
|
// The current document has disappeared or been modified.
|
|
// We do not need to store these words.
|
|
//
|
|
void HtWordList::Skip()
|
|
{
|
|
words->Destroy();
|
|
}
|
|
|
|
//
|
|
// Callback data dedicated to Dump and dump_word communication
|
|
//
|
|
class DumpWordData : public Object
|
|
{
|
|
public:
|
|
DumpWordData(FILE* fl_arg) { fl = fl_arg; }
|
|
|
|
FILE* fl;
|
|
};
|
|
|
|
//*****************************************************************************
|
|
//
|
|
// Write the ascii representation of a word occurence. Helper
|
|
// of WordList::Dump
|
|
//
|
|
static int dump_word(WordList *, WordDBCursor &, const WordReference *word, Object &data)
|
|
{
|
|
const HtWordReference *word_tmp = (const HtWordReference *)word;
|
|
|
|
DumpWordData &info = (DumpWordData &)data;
|
|
|
|
word_tmp->Dump(info.fl);
|
|
|
|
return OK;
|
|
}
|
|
|
|
//*****************************************************************************
|
|
// int HtWordList::Dump(char* filename)
|
|
//
|
|
// Write an ascii version of the word database in <filename>
|
|
//
|
|
int HtWordList::Dump(const String& filename)
|
|
{
|
|
FILE *fl;
|
|
|
|
if (!isopen) {
|
|
cerr << "WordList::Dump: database must be opened first\n";
|
|
return NOTOK;
|
|
}
|
|
|
|
if((fl = fopen(filename, "w")) == 0) {
|
|
perror(form("WordList::Dump: opening %s for writing", (const char*)filename));
|
|
return NOTOK;
|
|
}
|
|
|
|
HtWordReference::DumpHeader(fl);
|
|
DumpWordData data(fl);
|
|
WordCursor* search = Cursor(dump_word, &data);
|
|
search->Walk();
|
|
delete search;
|
|
|
|
fclose(fl);
|
|
|
|
return OK;
|
|
}
|
|
|
|
//*****************************************************************************
|
|
// int HtWordList::Load(char* filename)
|
|
//
|
|
// Read in an ascii version of the word database in <filename>
|
|
//
|
|
int HtWordList::Load(const String& filename)
|
|
{
|
|
FILE *fl;
|
|
String data;
|
|
HtWordReference *next;
|
|
|
|
if (!isopen) {
|
|
cerr << "WordList::Load: database must be opened first\n";
|
|
return NOTOK;
|
|
}
|
|
|
|
if((fl = fopen(filename, "r")) == 0) {
|
|
perror(form("WordList::Load: opening %s for reading", (const char*)filename));
|
|
return NOTOK;
|
|
}
|
|
|
|
if (HtWordReference::LoadHeader(fl) != OK)
|
|
{
|
|
cerr << "WordList::Load: header is not correct\n";
|
|
return NOTOK;
|
|
}
|
|
|
|
while (data.readLine(fl))
|
|
{
|
|
next = new HtWordReference;
|
|
if (next->Load(data) != OK)
|
|
{
|
|
delete next;
|
|
continue;
|
|
}
|
|
|
|
words->Add(next);
|
|
}
|
|
|
|
Flush();
|
|
fclose(fl);
|
|
|
|
return OK;
|
|
}
|