You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
275 lines
6.0 KiB
275 lines
6.0 KiB
3 years ago
|
//
|
||
|
// WordDict.cc
|
||
|
//
|
||
|
// Part of the ht://Dig package <http://www.htdig.org/>
|
||
|
// Copyright (c) 1999-2004 The ht://Dig Group
|
||
|
// For copyright details, see the file COPYING in your distribution
|
||
|
// or the GNU Library General Public License (LGPL) version 2 or later
|
||
|
// <http://www.gnu.org/copyleft/lgpl.html>
|
||
|
//
|
||
|
// $Id: WordDict.cc,v 1.4 2004/05/28 13:15:26 lha Exp $
|
||
|
//
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include "htconfig.h"
|
||
|
#endif /* HAVE_CONFIG_H */
|
||
|
|
||
|
#include <fcntl.h>
|
||
|
|
||
|
#include "WordDict.h"
|
||
|
#include "WordListOne.h"
|
||
|
|
||
|
#define WORD_DICT_CURSOR_FIRST 1
|
||
|
#define WORD_DICT_CURSOR_NEXT 2
|
||
|
|
||
|
class WordDictCursor {
|
||
|
public:
|
||
|
int info;
|
||
|
String prefix;
|
||
|
WordDBCursor* cursor;
|
||
|
};
|
||
|
|
||
|
WordDict::~WordDict()
|
||
|
{
|
||
|
delete db;
|
||
|
}
|
||
|
|
||
|
int WordDict::Initialize(WordList* nwords)
|
||
|
{
|
||
|
words = nwords;
|
||
|
db = new WordDB(nwords->GetContext()->GetDBInfo());
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDict::Open()
|
||
|
{
|
||
|
const String& filename = words->Filename();
|
||
|
int flags = words->Flags();
|
||
|
|
||
|
db->set_pagesize(words->Pagesize());
|
||
|
|
||
|
return db->Open(filename, "dict", DB_BTREE, flags, 0666, WORD_DB_DICT) == 0 ? OK : NOTOK;
|
||
|
}
|
||
|
|
||
|
int WordDict::Remove()
|
||
|
{
|
||
|
return db->Remove(words->Filename(), "dict") == 0 ? OK : NOTOK;
|
||
|
}
|
||
|
|
||
|
int WordDict::Close()
|
||
|
{
|
||
|
return db->Close() == 0 ? OK : NOTOK;
|
||
|
}
|
||
|
|
||
|
int WordDict::Serial(const String& word, unsigned int& serial)
|
||
|
{
|
||
|
int ret;
|
||
|
WordDictRecord entry;
|
||
|
if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
|
||
|
return NOTOK;
|
||
|
if(ret == DB_NOTFOUND) {
|
||
|
words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
|
||
|
if(entry.Put(db, word) != 0) return NOTOK;
|
||
|
}
|
||
|
serial = entry.id;
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDict::SerialExists(const String& word, unsigned int& serial)
|
||
|
{
|
||
|
int ret;
|
||
|
WordDictRecord entry;
|
||
|
if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
|
||
|
return NOTOK;
|
||
|
|
||
|
serial = ret == DB_NOTFOUND ? WORD_DICT_SERIAL_INVALID : entry.id;
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDict::SerialRef(const String& word, unsigned int& serial)
|
||
|
{
|
||
|
int ret;
|
||
|
WordDictRecord entry;
|
||
|
if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
|
||
|
return NOTOK;
|
||
|
if(ret == DB_NOTFOUND)
|
||
|
words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
|
||
|
entry.count++;
|
||
|
if(entry.Put(db, word) != 0) return NOTOK;
|
||
|
serial = entry.id;
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDict::Noccurrence(const String& word, unsigned int& noccurrence) const
|
||
|
{
|
||
|
if(word.empty()) {
|
||
|
fprintf(stderr, "WordDict::Noccurrence: null word\n");
|
||
|
return NOTOK;
|
||
|
}
|
||
|
WordDictRecord entry;
|
||
|
noccurrence = 0;
|
||
|
int ret;
|
||
|
if((ret = entry.Get(db, word)) != 0) {
|
||
|
if(ret != DB_NOTFOUND)
|
||
|
return NOTOK;
|
||
|
}
|
||
|
noccurrence = entry.count;
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDict::Normalize(String& word) const
|
||
|
{
|
||
|
const WordType& wtype = words->GetContext()->GetType();
|
||
|
|
||
|
return wtype.Normalize(word);
|
||
|
}
|
||
|
|
||
|
int WordDict::Incr(const String& word, unsigned int incr)
|
||
|
{
|
||
|
int ret;
|
||
|
WordDictRecord entry;
|
||
|
if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
|
||
|
return NOTOK;
|
||
|
if(ret == DB_NOTFOUND)
|
||
|
words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
|
||
|
entry.count += incr;
|
||
|
if(entry.Put(db, word) != 0) return NOTOK;
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDict::Decr(const String& word, unsigned int decr)
|
||
|
{
|
||
|
WordDictRecord entry;
|
||
|
int ret;
|
||
|
if((ret = entry.Get(db, word)) != 0) {
|
||
|
if(ret == DB_NOTFOUND)
|
||
|
fprintf(stderr, "WordDict::Unref(%s) Unref on non existing word occurrence\n", (const char*)word);
|
||
|
return NOTOK;
|
||
|
}
|
||
|
entry.count -= decr;
|
||
|
if(entry.count > 0)
|
||
|
ret = entry.Put(db, word) == 0 ? OK : NOTOK;
|
||
|
else
|
||
|
ret = entry.Del(db, word) == 0 ? OK : NOTOK;
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
int WordDict::Put(const String& word, unsigned int noccurrence)
|
||
|
{
|
||
|
int ret;
|
||
|
WordDictRecord entry;
|
||
|
if((ret = entry.Get(db, word)) != 0 && ret != DB_NOTFOUND)
|
||
|
return NOTOK;
|
||
|
if(ret == DB_NOTFOUND)
|
||
|
words->Meta()->Serial(WORD_META_SERIAL_WORD, entry.id);
|
||
|
entry.count = noccurrence;
|
||
|
if(entry.Put(db, word) != 0) return NOTOK;
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
List *WordDict::Words() const
|
||
|
{
|
||
|
String key;
|
||
|
String coded;
|
||
|
WordDBCursor* cursor = db->Cursor();
|
||
|
List* list = new List;
|
||
|
|
||
|
while(cursor->Get(key, coded, DB_NEXT) == 0)
|
||
|
list->Add(new String(key));
|
||
|
|
||
|
delete cursor;
|
||
|
|
||
|
return list;
|
||
|
}
|
||
|
|
||
|
int WordDict::Exists(const String& word) const
|
||
|
{
|
||
|
String tmp_word = word;
|
||
|
String coded;
|
||
|
|
||
|
return db->Get(0, tmp_word, coded, 0) == 0;
|
||
|
}
|
||
|
|
||
|
WordDictCursor* WordDict::Cursor() const
|
||
|
{
|
||
|
WordDictCursor* cursor = new WordDictCursor;
|
||
|
cursor->cursor = db->Cursor();
|
||
|
|
||
|
return cursor;
|
||
|
}
|
||
|
|
||
|
int WordDict::Next(WordDictCursor* cursor, String& word, WordDictRecord& record)
|
||
|
{
|
||
|
String coded;
|
||
|
int ret = cursor->cursor->Get(word, coded, DB_NEXT);
|
||
|
if(ret != 0) {
|
||
|
delete cursor->cursor;
|
||
|
delete cursor;
|
||
|
} else {
|
||
|
record.Unpack(coded);
|
||
|
}
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
WordDictCursor* WordDict::CursorPrefix(const String& prefix) const
|
||
|
{
|
||
|
WordDictCursor* cursor = new WordDictCursor;
|
||
|
cursor->cursor = db->Cursor();
|
||
|
cursor->prefix = prefix;
|
||
|
cursor->info = WORD_DICT_CURSOR_FIRST;
|
||
|
|
||
|
return cursor;
|
||
|
}
|
||
|
|
||
|
int WordDict::NextPrefix(WordDictCursor* cursor, String& word, WordDictRecord& record)
|
||
|
{
|
||
|
String coded;
|
||
|
int ret;
|
||
|
if(cursor->info == WORD_DICT_CURSOR_FIRST) {
|
||
|
word = cursor->prefix;
|
||
|
ret = cursor->cursor->Get(word, coded, DB_SET_RANGE);
|
||
|
cursor->info = WORD_DICT_CURSOR_NEXT;
|
||
|
} else {
|
||
|
ret = cursor->cursor->Get(word, coded, DB_NEXT);
|
||
|
}
|
||
|
//
|
||
|
// Stop walking when 1) DB_NOTFOUND, 2) the word found is shorter than
|
||
|
// the required prefix, 3) the word found does not start with the
|
||
|
// required prefix.
|
||
|
//
|
||
|
if(ret != 0 ||
|
||
|
cursor->prefix.length() > word.length() ||
|
||
|
strncmp(cursor->prefix.get(), word.get(), cursor->prefix.length())) {
|
||
|
delete cursor->cursor;
|
||
|
delete cursor;
|
||
|
if(ret == 0) ret = DB_NOTFOUND;
|
||
|
} else {
|
||
|
record.Unpack(coded);
|
||
|
}
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
int WordDict::Write(FILE* f)
|
||
|
{
|
||
|
WordDBCursor* cursor = db->Cursor();
|
||
|
String key;
|
||
|
String coded;
|
||
|
unsigned int occurrence;
|
||
|
unsigned int id;
|
||
|
|
||
|
while(cursor->Get(key, coded, DB_NEXT) == 0) {
|
||
|
int offset = 0;
|
||
|
coded.ber_shift(offset, occurrence);
|
||
|
coded.ber_shift(offset, id);
|
||
|
fprintf(f, "%s %d %d\n", (char*)key, id, occurrence);
|
||
|
}
|
||
|
|
||
|
delete cursor;
|
||
|
|
||
|
return OK;
|
||
|
}
|