You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

591 lines
17 KiB

//
// WordCursorOne.cc
//
// Part of the ht://Dig package <http://www.htdig.org/>
// Copyright (c) 1999-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: WordCursorOne.cc,v 1.4 2004/05/28 13:15:26 lha Exp $
//
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include <stdlib.h>
#include "WordCursorOne.h"
#include "WordListOne.h"
#include "WordDead.h"
#include <stdio.h>
//
// WordCursorOne implementation
//
// *****************************************************************************
WordCursorOne::WordCursorOne(WordList *words) :
WordCursor(words->GetContext()),
prefixKey(words->GetContext())
{
Clear();
}
// *****************************************************************************
WordCursorOne::WordCursorOne(WordList *words, wordlist_walk_callback_t callback, Object * callback_data) :
WordCursor(words->GetContext()),
prefixKey(words->GetContext())
{
Clear();
Initialize(words, WordKey(words->GetContext()), callback, callback_data, HTDIG_WORDLIST_WALKER);
}
// *****************************************************************************
WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) :
WordCursor(words->GetContext()),
prefixKey(words->GetContext())
{
Clear();
Initialize(words, searchKey, 0, 0, action);
}
// *****************************************************************************
WordCursorOne::WordCursorOne(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) :
WordCursor(words->GetContext()),
prefixKey(words->GetContext())
{
Clear();
Initialize(words, searchKey, callback, callback_data, HTDIG_WORDLIST_WALKER);
}
// *****************************************************************************
//
int WordCursorOne::Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object *ncallback_data, int naction)
{
action = naction;
searchKey = nsearchKey;
callback = ncallback;
callback_data = ncallback_data;
words = nwords;
cursor = ((WordListOne*)nwords)->db->Cursor();
return OK;
}
// *****************************************************************************
//
void
WordCursorOne::Clear()
{
searchKey.Clear();
action = 0;
callback = 0;
callback_data = 0;
ClearResult();
ClearInternal();
words = 0;
//
// Debugging section.
//
traceRes = 0;
}
// *****************************************************************************
//
void
WordCursorOne::ClearInternal()
{
key.trunc();
data.trunc();
prefixKey.Clear();
cursor_get_flags = DB_SET_RANGE;
searchKeyIsSameAsPrefix = 0;
}
// *****************************************************************************
//
void
WordCursorOne::ClearResult()
{
collectRes = 0;
found.Clear();
status = OK;
}
int
WordCursorOne::ContextRestore(const String& buffer)
{
int ret = OK;
if(!buffer.empty()) {
WordKey key(words->GetContext(), buffer);
if((ret = Seek(key)) != OK)
return ret;
//
// Move to restored position so that next call to
// WalkNext will go above the restored position.
//
if((ret = WalkNext()) != OK)
return ret;
}
return ret;
}
// *****************************************************************************
//
// Walk and collect data from the word database.
//
// If action bit HTDIG_WORDLIST_COLLECTOR is set WordReferences are
// stored in a list and the list is returned.
// If action bit HTDIG_WORDLIST_WALKER is set the <callback> function
// is called for each WordReference found. No list is built and the
// function returns a null pointer.
//
// The <searchKey> argument may be a fully qualified key, containing precise values for each
// field of the key. It may also contain only some fields of the key. In both cases
// all the word occurrences matching the fields set in the key are retrieved. It may
// be fast if key is a prefix (see WordKey::Prefix for a definition). It may
// be *slow* if key is not a prefix because it forces a complete walk of the
// index.
//
int
WordCursorOne::Walk()
{
int ret;
if((ret = WalkInit()) != OK) return ret;
while((ret = WalkNext()) == OK)
;
int ret1;
if((ret1 = WalkFinish()) != OK) return ret1;
return ret == WORD_WALK_ATEND ? OK : NOTOK;
}
int
WordCursorOne::WalkInit()
{
ClearResult();
ClearInternal();
WordReference wordRef(words->GetContext());
{
int ret;
if((ret = cursor->Open()) != 0)
return ret;
}
if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: action = %d, SearchKey = %s\n", action, (char*)searchKey.Get());
if(action & HTDIG_WORDLIST_COLLECTOR) {
collectRes = new List;
}
WordKey first_key(words->GetContext());
//
// Move the cursor to start walking and do some sanity checks.
//
if(searchKey.Empty()) {
//
// Move past the stat data
//
if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is empty\n");
} else {
prefixKey = searchKey;
//
// If the key is a prefix, the start key is
// the longest possible prefix contained in the key. If the
// key does not contain any prefix, start from the beginning
// of the file.
//
if(prefixKey.PrefixOnly() == NOTOK) {
if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: at start of keys because search key is not a prefix\n");
prefixKey.Clear();
} else {
if(words->verbose) fprintf(stderr, "WordCursorOne::WalkInit: go to %s \n", (char*)prefixKey.Get());
first_key = prefixKey;
}
}
first_key.Pack(key);
//
// Allow Seek immediately after Init
//
found.Key() = first_key;
status = OK;
searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
cursor_get_flags = DB_SET_RANGE;
return OK;
}
int
WordCursorOne::WalkRewind()
{
WordKey first_key(words->GetContext());
//
// Move the cursor to start walking and do some sanity checks.
//
if(searchKey.Empty()) {
first_key.Clear();
} else {
prefixKey = searchKey;
//
// If the key is a prefix, the start key is
// the longest possible prefix contained in the key. If the
// key does not contain any prefix, start from the beginning
// of the file.
//
if(prefixKey.PrefixOnly() == NOTOK) {
prefixKey.Clear();
first_key.Clear();
} else {
first_key = prefixKey;
}
}
first_key.Pack(key);
//
// Allow Seek immediately after Rewind
//
found.Key() = first_key;
status = OK;
searchKeyIsSameAsPrefix = searchKey.ExactEqual(prefixKey);
cursor_get_flags = DB_SET_RANGE;
return OK;
}
int
WordCursorOne::WalkNext()
{
int ret;
while((ret = WalkNextStep()) == WORD_WALK_NOMATCH_FAILED)
if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNext: got false match, retry\n");
return ret;
}
int
WordCursorOne::WalkNextStep()
{
status = OK;
{
int error;
if((error = cursor->Get(key, data, cursor_get_flags)) != 0) {
if(error == DB_NOTFOUND) {
if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches\n", (char*)searchKey.Get());
return (status = WORD_WALK_ATEND);
} else {
return WORD_WALK_GET_FAILED;
}
}
}
//
// Next step operation is always sequential walk
//
cursor_get_flags = DB_NEXT;
found.Unpack(key, data);
if(words->Dead()->Exists(found.Key()))
return WORD_WALK_NOMATCH_FAILED;
if(traceRes) traceRes->Add(new WordReference(found));
if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)found.Get());
//
// Don't bother to compare keys if we want to walk all the entries
//
if(!(searchKey.Empty())) {
// examples
// searchKey: aabc 1 ? ? ?
// prefixKey: aabc 1 ? ? ?
//
// Stop loop if we reach a record whose key does not
// match prefix key requirement, provided we have a valid
// prefix key.
// (ie. stop loop if we're past last possible match...)
//
if(!prefixKey.Empty() &&
!prefixKey.Equal(found.Key())) {
if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches because found a key that is greater than searchKey\n", (char*)searchKey.Get());
return (status = WORD_WALK_ATEND);
}
//
// Skip entries that do not exactly match the specified key.
//
if(!searchKeyIsSameAsPrefix &&
!searchKey.Equal(found.Key())) {
int ret;
switch((ret = SkipUselessSequentialWalking())) {
case OK:
if(words->verbose > 1) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, false match jump to %s\n", (char*)searchKey.Get(), (char*)found.Get());
return WORD_WALK_NOMATCH_FAILED;
break;
case WORD_WALK_ATEND:
if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, no more matches according to SkipUselessSequentialWalking\n", (char*)searchKey.Get());
return (status = WORD_WALK_ATEND);
break;
default:
fprintf(stderr, "WordCursorOne::WalkNextStep: SkipUselessSequentialWalking failed %d\n", ret);
return NOTOK;
break;
}
}
}
if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: looking for %s, found %s\n", (char*)searchKey.Get(), (char*)found.Get());
if(collectRes) {
if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: collect\n");
collectRes->Add(new WordReference(found));
} else if(callback) {
if(words->verbose > 2) fprintf(stderr, "WordCursorOne::WalkNextStep: calling callback\n");
int ret = (*callback)(words, *cursor, &found, *(callback_data) );
//
// The callback function tells us that something went wrong, might
// as well stop walking.
//
if(ret != OK) {
if(words->verbose) fprintf(stderr, "WordCursorOne::WalkNextStep: callback returned NOTOK");
return WORD_WALK_CALLBACK_FAILED|(status = WORD_WALK_ATEND);
}
}
return OK;
}
int
WordCursorOne::WalkFinish()
{
if(words->verbose) fprintf(stderr, "WordCursorOne::WalkFinish\n");
return cursor->Close() == 0 ? OK : NOTOK;
}
// *****************************************************************************
//
// Helper for SkipUselessSequentialWalking.
// Undefine in foundKey all fields defined in searchKey
// so that they are not considered by SetToFollowing.
// It could become a method of WordKey but lacks generalisation and
// from what I see it is a rather specific operation.
//
static inline void complement(WordContext* context, WordKey& key, const WordKey& mask)
{
int nfields = context->GetKeyInfo().nfields;
int i;
//
// Undefine in 'key' all fields defined in 'mask'
//
for(i = 0; i < nfields; i++) {
if(mask.IsDefined(i))
key.Undefined(i);
else
key.SetDefined(i);
}
}
// *****************************************************************************
//
// Find out if we should better jump to the next possible key (DB_SET_RANGE) instead of
// sequential iterating (DB_NEXT).
// If it is decided that jump is a better move :
// cursor_set_flags = DB_SET_RANGE
// key = calculated next possible key
// Else
// do nothing
// Return values
// OK: skipping successfull.
// WORD_WALK_ATEND : no more possible match, reached the maximum
// WORD_WALK_FAILED: general failure, occurs if called and no skipping
// necessary.
//
// Sequential searching can waste time by searching all keys, for example:
// If searching for Key: argh <DEF> <UNDEF> 10
// Under normal circonstances we would do the following
//
// DATA STATUS ACTION
// 1: argh 1 10 match DB_NEXT
// 2: argh 2 11 nomatch DB_NEXT
// 3: argh 2 15 nomatch DB_NEXT
// 4: argh 2 20 nomatch DB_NEXT
// 5: argh 2 30 nomatch DB_NEXT
// 6: argh 5 1 nomatch DB_NEXT
// 7: argh 5 8 nomatch DB_NEXT
// 8: argh 8 6 nomatch DB_NEXT
//
// But the optimal would be
//
// DATA STATUS ACTION
// 1: argh 1 10 match DB_NEXT
// 2: argh 2 11 nomatch DB_SET_RANGE argh 3 10
// 3: argh 2 15
// 4: argh 2 20
// 5: argh 2 30
// 6: argh 5 1 nomatch DB_SET_RANGE argh 5 10
// 7: argh 5 8
// 8: argh 8 6 nomatch DB_SET_RANGE argh 8 10
//
// That saves a lot of unecessary hit. The underlying logic is a bit
// more complex but you have the idea.
//
int
WordCursorOne::SkipUselessSequentialWalking()
{
WordKey& foundKey = found.Key();
int nfields = words->GetContext()->GetKeyInfo().nfields;
int i;
//
// Find out how the searchKey and the foundKey differ.
//
int diff_field = 0;
int lower = 0;
if(!foundKey.Diff(searchKey, diff_field, lower)) {
//
// foundKey matches searchKey (no difference), don't
// skip, everything is fine. The caller of SkipUselessSequentialWalking
// is expected to avoid this case for efficiency.
//
return WORD_WALK_FAILED;
}
if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for %s, candidate is %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
//
// Undefine in foundKey all fields defined in searchKey
// so that they are not considered by SetToFollowing.
//
complement(words->GetContext(), foundKey, searchKey);
//
// If the key found is lower than the searched key when
// considering only the fields defined in the search key,
// we only need to enforce the key to get the match.
// Otherwise we need to increment the found key to jump
// properly.
//
if(lower) {
if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: enforcing the search constraint is enough to jump forward\n");
for(i = diff_field + 1; i < nfields; i++)
if(foundKey.IsDefined(i)) foundKey.Set(i, 0);
} else {
if(words->verbose > 1) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: increment the key to jump forward\n");
//
// diff_field - 1 is not really necessary because diff_field is undefined
// in foundKey and would therefore be ignored by SetToFollowing. We write
// diff_field - 1 to clearly state that incrementing begins just before the
// field for which a difference was found.
//
int ret;
if((ret = foundKey.SetToFollowing(diff_field - 1)) != OK)
return ret;
}
//
// Copy all fields defined in searchKey into foundKey. This will copy
// searchKey in foundKey because all these fields have been
// previously undefined in foundKey.
//
foundKey.Merge(searchKey);
if(words->verbose > 2) fprintf(stderr, "WordCursorOne::SkipUselessSequentialWalking: looking for %s, jump to %s\n", (char*)searchKey.Get(), (char*)foundKey.Get());
//
// Instruct Next function to jump to the calculated key
//
if(foundKey.Pack(key) == NOTOK) {
return WORD_WALK_FAILED;
}
cursor_get_flags = DB_SET_RANGE;
return OK;
}
// *****************************************************************************
//
// Copy defined fields in patch into foundKey and
// initialize internal state so that WalkNext jumps to
// this key next time it's called.
//
// Technically this means : Override latest key found (found data member)
// with patch fields values, starting from the first field set in
// patch up to the last. Pack the result in the key field and set
// cursor_get_flags to DB_SET_RANGE.
//
int
WordCursorOne::Seek(const WordKey& patch)
{
int nfields = words->GetContext()->GetKeyInfo().nfields;
WordKey pos = searchKey;
if(patch.Empty()) {
fprintf(stderr, "WordCursorOne::Seek: empty patch is useless\n");
return NOTOK;
}
int i;
//
// Leave the most significant fields untouched
//
for(i = WORD_KEY_WORD + 1; i < nfields; i++)
if(patch.IsDefined(i))
break;
//
// From the first value set in the patch to the end
// override.
//
for(; i < nfields; i++) {
if(patch.IsDefined(i))
pos.Set(i, patch.Get(i));
else
pos.Set(i, 0);
}
if(!pos.Filled()) {
fprintf(stderr, "WordCursorOne::Seek: only make sense if the resulting key is fully defined\n");
return NOTOK;
}
if(words->verbose > 2) fprintf(stderr, "WordCursorOne::Seek: seek to %s\n", (char*)pos.Get());
//
// Next move will jump to the patched key
//
pos.Pack(key);
cursor_get_flags = DB_SET_RANGE;
return OK;
}
//
// Convert the whole structure to an ascii string description
//
int WordCursorOne::Get(String& bufferout) const
{
String tmp;
bufferout.trunc();
searchKey.Get(tmp);
bufferout << "Input: searchKey = " << tmp << ", action = " << action << "; Output: collectRes " << (collectRes ? "set" : "not set");
found.Get(tmp);
bufferout << ", found = " << tmp << ", status = " << status;
prefixKey.Get(tmp);
bufferout << "; Internal State: prefixKey = " << tmp << ", cursor_get_flags = " << cursor_get_flags;
return OK;
}