You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

117 lines
2.8 KiB

//
// Substring.cc
//
// Substring: The substring fuzzy algorithm. Currently a rather slow, naive approach
// that checks the substring against every word in the word db.
// It does not generate a separate database.
//
// Part of the ht://Dig package <http://www.htdig.org/>
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: Substring.cc,v 1.15 2004/05/28 13:15:20 lha Exp $
//
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include <fcntl.h>
#include "Substring.h"
#include "htString.h"
#include "List.h"
#include "StringMatch.h"
#include "HtConfiguration.h"
//*****************************************************************************
// Substring::Substring(const HtConfiguration& config_arg)
//
Substring::Substring(const HtConfiguration& config_arg) :
Fuzzy(config_arg)
{
name = "substring";
}
//*****************************************************************************
// Substring::~Substring()
//
Substring::~Substring()
{
}
//*****************************************************************************
// A very simplistic and inefficient substring search. For every word
// that is looked for we do a complete linear search through the word
// database.
// Maybe a better method of doing this would be to mmap a list of words
// to memory and then run the StringMatch on it. It would still be a
// linear search, but with much less overhead.
//
void
Substring::getWords(char *w, List &words)
{
// First strip the punctuation
String stripped = w;
HtStripPunctuation(stripped);
// Now set up the StringMatch object
StringMatch match;
match.Pattern(stripped);
// And get the list of all possible words
HtWordList wordDB(config);
List *wordList;
String *key;
wordDB.Open(config["word_db"], O_RDONLY);
wordList = wordDB.Words();
int wordCount = 0;
int maximumWords = config.Value("substring_max_words", 25);
wordList->Start_Get();
while (wordCount < maximumWords && (key = (String *) wordList->Get_Next()))
{
if (match.FindFirst((char*)*key) >= 0)
{
words.Add(new String(*key));
wordCount++;
}
}
if (wordList) {
wordList->Destroy();
delete wordList;
}
wordDB.Close();
}
//*****************************************************************************
int
Substring::openIndex()
{
return 0;
}
//*****************************************************************************
void
Substring::generateKey(char *, String &)
{
}
//*****************************************************************************
void
Substring::addWord(char *)
{
}