You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
446 lines
13 KiB
446 lines
13 KiB
//
|
|
// WordList.h
|
|
//
|
|
// NAME
|
|
//
|
|
// search specification and results for WordList.
|
|
//
|
|
// SYNOPSIS
|
|
//
|
|
// #include <WordList.h>
|
|
//
|
|
// int callback(WordList *, WordDBCursor& , const WordReference *, Object &)
|
|
// {
|
|
// ...
|
|
// }
|
|
//
|
|
// Object* data = ...
|
|
//
|
|
// WordList *words = ...;
|
|
//
|
|
// WordCursor *search = words->Cursor(callback, data);
|
|
// WordCursor *search = words->Cursor(WordKey("word <DEF> <UNDEF> <UNDEF>"));
|
|
// WordCursor *search = words->Cursor(WordKey("word <DEF> <UNDEF> <UNDEF>"), callback, data);
|
|
//
|
|
// ...
|
|
//
|
|
// if(search->Walk() == NOTOK) bark;
|
|
// List* results = search->GetResults();
|
|
//
|
|
// if(search->WalkNext() == OK)
|
|
// dosomething(search->GetFound());
|
|
//
|
|
// DESCRIPTION
|
|
//
|
|
// WordCursor is an iterator on an inverted index. It is created by
|
|
// asking a <i>WordList</i> object with the <i>Cursor.</i> There is
|
|
// no other way to create a WordCursor object.
|
|
// When the <i>Walk*</i> methods return,
|
|
// the WordCursor object contains the result of the search and
|
|
// status information that indicates if it reached the end of
|
|
// the list (IsAtEnd() method).
|
|
//
|
|
// The <b>callback</b> function that is called each time a match is
|
|
// found takes the following arguments:
|
|
// <pre>
|
|
// WordList* words pointer to the inverted index handle.
|
|
// WordDBCursor& cursor to call Del() and delete the current match
|
|
// WordReference* wordRef is the match
|
|
// Object& data is the user data provided by the caller when
|
|
// search began.
|
|
// </pre>
|
|
//
|
|
// The <i>WordKey</i> object that specifies the search criterion
|
|
// may be used as follows (assuming word is followed by DOCID and
|
|
// LOCATION):
|
|
//
|
|
// Ex1: <b>WordKey("word <DEF> <UNDEF> <UNDEF>")</b> find all occurrences
|
|
// of <i>word</i>.
|
|
//
|
|
// Ex2: <b>WordKey("meet <UNDEF> <UNDEF> <UNDEF>")</b> find all occurrences
|
|
// starting with <i>meet</i>, including <i>meeting</i> etc.
|
|
//
|
|
// Ex3: <b>WordKey("meet <DEF> <UNDEF> 1")</b> find all occurrences of
|
|
// <i>meet</i> that occur at LOCATION 1 in any DOCID. This can
|
|
// be inefficient since the search has to scan all occurrences
|
|
// of <i>meet</i> to find the ones that occur at LOCATION 1.
|
|
//
|
|
// Ex4: <b>WordKey("meet <DEF> 2 <UNDEF>")</b> find all occurrences of
|
|
// <i>meet</i> that occur in DOCID 2, at any location.
|
|
//
|
|
// Interface functions are virtual so that a derivation of the
|
|
// class is possible. Some functions are meant to be used by derived
|
|
// classes such as the <b>Initialize</b> function. All data members
|
|
// should be accessed using the corresponding accessor if possible.
|
|
//
|
|
// END
|
|
//
|
|
// Part of the ht://Dig package <http://www.htdig.org/>
|
|
// Copyright (c) 1999-2004 The ht://Dig Group
|
|
// For copyright details, see the file COPYING in your distribution
|
|
// or the GNU Library General Public License (LGPL) version 2 or later
|
|
// <http://www.gnu.org/copyleft/lgpl.html>
|
|
//
|
|
// $Id: WordCursor.h,v 1.4 2004/05/28 13:15:26 lha Exp $
|
|
//
|
|
|
|
#ifndef _WordCursor_h_
|
|
#define _WordCursor_h_
|
|
|
|
#ifndef SWIG
|
|
#include "htString.h"
|
|
#include "WordKey.h"
|
|
#include "WordDB.h"
|
|
|
|
class WordList;
|
|
class WordDBCursor;
|
|
#endif /* SWIG */
|
|
//
|
|
// Possible values of the action argument of WordList::Walk
|
|
// check walk function in WordList.cc for info on these:
|
|
//
|
|
#define HTDIG_WORDLIST_COLLECTOR 0x0001
|
|
#define HTDIG_WORDLIST_WALKER 0x0002
|
|
|
|
#ifndef SWIG
|
|
//
|
|
// Type of the callback argument in WordCursor
|
|
//
|
|
typedef int (*wordlist_walk_callback_t)(WordList *, WordDBCursor& , const WordReference *, Object &);
|
|
#endif /* SWIG */
|
|
|
|
//
|
|
// Possible values of the status member
|
|
//
|
|
//
|
|
// WalkNext reached the end of the matches
|
|
//
|
|
#define WORD_WALK_ATEND 0x0001
|
|
//
|
|
// Failed to acquire Berkeley DB cursor
|
|
//
|
|
#define WORD_WALK_CURSOR_FAILED 0x0002
|
|
//
|
|
// Berkeley DB Get operation failed
|
|
//
|
|
#define WORD_WALK_GET_FAILED 0x0004
|
|
//
|
|
// Callback function returned NOTOK
|
|
//
|
|
#define WORD_WALK_CALLBACK_FAILED 0x0008
|
|
//
|
|
// WalkNextStep hit an entry that does not match the
|
|
// searched key.
|
|
//
|
|
#define WORD_WALK_NOMATCH_FAILED 0x0010
|
|
//
|
|
// WordCursor contains undefined data
|
|
//
|
|
#define WORD_WALK_FAILED 0xffffffff
|
|
|
|
//
|
|
// Possible return values of the IsA() method
|
|
//
|
|
#define WORD_CURSOR 1
|
|
#define WORD_CURSORS 2
|
|
|
|
//
|
|
// Wordlist::Walk uses WordCursor for :
|
|
// state information : cursor
|
|
// search term description
|
|
// debug/trace/benchmarking
|
|
// search result format description
|
|
//
|
|
class WordCursor
|
|
{
|
|
public:
|
|
#ifndef SWIG
|
|
//
|
|
// Private constructor. Creator of the object must then call Initialize()
|
|
// prior to using any other methods.
|
|
//
|
|
WordCursor() { Clear(); }
|
|
//-
|
|
// Private constructor. See WordList::Cursor method with same prototype for
|
|
// description.
|
|
//
|
|
WordCursor(WordList *words, wordlist_walk_callback_t callback, Object * callback_data) { Clear(); Initialize(words, WordKey(), callback, callback_data, HTDIG_WORDLIST_WALKER); }
|
|
//-
|
|
// Private constructor. See WordList::Cursor method with same prototype for
|
|
// description.
|
|
//
|
|
WordCursor(WordList *words, const WordKey &searchKey, int action = HTDIG_WORDLIST_WALKER) { Clear(); Initialize(words, searchKey, 0, 0, action); }
|
|
//-
|
|
// Private constructor. See WordList::Cursor method with same prototype for
|
|
// description.
|
|
//
|
|
WordCursor(WordList *words, const WordKey &searchKey, wordlist_walk_callback_t callback, Object * callback_data) { Clear(); Initialize(words, searchKey, callback, callback_data, HTDIG_WORDLIST_WALKER); }
|
|
#endif /* SWIG */
|
|
virtual ~WordCursor() {}
|
|
//-
|
|
// Clear all data in object, set <b>GetResult()</b> data to NULL but
|
|
// do not delete it (the application is responsible for that).
|
|
//
|
|
virtual void Clear();
|
|
virtual void ClearInternal();
|
|
virtual void ClearResult();
|
|
|
|
//-
|
|
// Returns the type of the object. May be overloaded by
|
|
// derived classes to differentiate them at runtime.
|
|
// Returns WORD_CURSOR.
|
|
//
|
|
virtual int IsA() const { return WORD_CURSOR; }
|
|
|
|
//-
|
|
// Returns true if WalkNext() step entries in strictly increasing
|
|
// order, false if it step entries in random order.
|
|
//
|
|
virtual int Ordered() const { return 1; }
|
|
|
|
//-
|
|
// Optimize the cursor before starting a Walk.
|
|
// Returns OK on success, NOTOK otherwise.
|
|
//
|
|
virtual int Optimize() { return OK; }
|
|
|
|
//-
|
|
// Save in <b>buffer</b> all the information necessary to resume
|
|
// the walk at the point it left. The ASCII representation of the
|
|
// last key found (GetFound()) is written in <b>buffer</b> using the
|
|
// WordKey::Get method.
|
|
//
|
|
virtual int ContextSave(String& buffer) const { found.Get(buffer); return OK; }
|
|
//-
|
|
// Restore from buffer all the information necessary to
|
|
// resume the walk at the point it left. The <b>buffer</b> is expected
|
|
// to contain an ASCII representation of a WordKey (see WordKey::Set
|
|
// method). A <b>Seek</b> is done on the key and the object is prepared
|
|
// to jump to the next occurrence when <b>WalkNext</b> is called (the
|
|
// cursor_get_flags is set to <i>DB_NEXT.</i>
|
|
//
|
|
virtual int ContextRestore(const String& buffer);
|
|
|
|
#ifndef SWIG
|
|
//-
|
|
// Walk and collect data from the index.
|
|
// Returns OK on success, NOTOK otherwise.
|
|
//
|
|
virtual int Walk();
|
|
#endif /* SWIG */
|
|
//-
|
|
// Must be called before other Walk methods are used.
|
|
// Fill internal state according to input parameters
|
|
// and move before the first matching entry.
|
|
// Returns OK on success, NOTOK otherwise.
|
|
//
|
|
virtual int WalkInit();
|
|
//-
|
|
// Move before the first index matching entry.
|
|
// Returns OK on success, NOTOK otherwise.
|
|
//
|
|
virtual int WalkRewind();
|
|
//-
|
|
// Move to the next matching entry.
|
|
// At end of list, WORD_WALK_ATEND is returned.
|
|
// Returns OK on success, NOTOK otherwise.
|
|
//
|
|
virtual int WalkNext();
|
|
#ifndef SWIG
|
|
//-
|
|
// Advance the cursor one step. The entry pointed to by the cursor may
|
|
// or may not match the requirements. Returns OK if entry pointed
|
|
// by cursor matches requirements. Returns NOTOK on
|
|
// failure. Returns WORD_WALK_NOMATCH_FAILED if the current entry
|
|
// does not match requirements, it's safe to call WalkNextStep again
|
|
// until either OK or NOTOK is returned.
|
|
//
|
|
virtual int WalkNextStep();
|
|
#endif /* SWIG */
|
|
//-
|
|
// Terminate Walk, free allocated resources.
|
|
// Returns OK on success, NOTOK otherwise.
|
|
//
|
|
virtual int WalkFinish();
|
|
//
|
|
// Find out if cursor should better jump to the next possible key
|
|
// (DB_SET_RANGE) instead of sequential iterating (DB_NEXT). If it
|
|
// is decided that jump is a better move : cursor_set_flags =
|
|
// DB_SET_RANGE key = calculated next possible key Else do nothing
|
|
// Return OK if skipping successfull. Returns WORD_WALK_ATEND if no
|
|
// more possible match, reached the maximum. Returns
|
|
// WORD_WALK_FAILED on general failure, occurs if called and no
|
|
// skipping necessary.
|
|
//
|
|
int SkipUselessSequentialWalking();
|
|
|
|
//-
|
|
// Move before the inverted index position specified in <b>patch.</b>
|
|
// May only be called after a successfull call to the <i>WalkNext</i>
|
|
// or <i>WalkNextStep</i>method.
|
|
// Copy defined fields from <b>patch</b> into a copy of the
|
|
// <i>found</i> data member and
|
|
// initialize internal state so that <i>WalkNext</i> jumps to
|
|
// this key next time it's called (cursor_get_flag set to DB_SET_RANGE).
|
|
// Returns OK if successfull, NOTOK otherwise.
|
|
//
|
|
virtual int Seek(const WordKey& patch);
|
|
|
|
//-
|
|
// Returns true if cursor is positioned after the last possible
|
|
// match, false otherwise.
|
|
//
|
|
virtual int IsAtEnd() const { return status == WORD_WALK_ATEND; }
|
|
|
|
//
|
|
// Accessors for input parameters
|
|
//
|
|
//-
|
|
// Returns the search criterion.
|
|
//
|
|
WordKey& GetSearch() { return searchKey; }
|
|
#ifndef SWIG
|
|
const WordKey& GetSearch() const { return searchKey; }
|
|
#endif /* SWIG */
|
|
//-
|
|
// Returns the type of action when a matching entry
|
|
// is found.
|
|
//
|
|
int GetAction() const { return action; }
|
|
//
|
|
// Accessors for output parameters
|
|
//
|
|
//-
|
|
// Returns the list of WordReference found. The application
|
|
// is responsible for deallocation of the list.
|
|
//
|
|
List *GetResults() { return collectRes; }
|
|
//-
|
|
// For debugging purposes. Returns the list of WordReference hit
|
|
// during the search
|
|
// process. Some of them match the searched key, some don't.
|
|
// The application is responsible for deallocation of the list.
|
|
//
|
|
List *GetTraces() { return traceRes; }
|
|
//-
|
|
// For debugging purposes. Set the list of WordReference hit
|
|
// during the search process.
|
|
//
|
|
void SetTraces(List* traceRes_arg) { traceRes = traceRes_arg; }
|
|
//-
|
|
// Returns the last entry hit by the search. Only contains
|
|
// a valid value if the last <i>WalkNext</i> or <i>WalkNextStep</i>
|
|
// call was successfull (i.e. returned OK).
|
|
//
|
|
const WordReference& GetFound() { return found; }
|
|
//-
|
|
// Returns the number of occurrences of the searched word
|
|
// in the inverted index in the <b>noccurrence</b> parameter.
|
|
// Returns OK on success, NOTOK on failure.
|
|
//
|
|
virtual int Noccurrence(unsigned int& noccurrence) const;
|
|
|
|
#ifndef SWIG
|
|
//-
|
|
// Convert the whole structure to an ASCII string description
|
|
// Returns OK if successfull, NOTOK otherwise.
|
|
//
|
|
virtual int Get(String& bufferout) const;
|
|
String Get() const { String tmp; Get(tmp); return tmp; }
|
|
|
|
protected:
|
|
|
|
//-
|
|
// Protected method. Derived classes should use this function to initialize
|
|
// the object if they do not call a WordCursor constructor in their own
|
|
// constructutor. Initialization may occur after the object is created
|
|
// and must occur before a <b>Walk*</b> method is called. See the
|
|
// DESCRIPTION section for the semantics of the arguments.
|
|
// Return OK on success, NOTOK on error.
|
|
//
|
|
int Initialize(WordList *nwords, const WordKey &nsearchKey, wordlist_walk_callback_t ncallback, Object * ncallback_data, int naction);
|
|
|
|
//
|
|
// Input parameters
|
|
//
|
|
//-
|
|
// Input data. The key to be searched, see DESCRIPTION for more information.
|
|
//
|
|
WordKey searchKey;
|
|
//
|
|
// Input data. What do do when a WordReference is found.
|
|
// Can either be
|
|
// HTDIG_WORDLIST_COLLECTOR WordReference found stored in collectRes
|
|
// HTDIG_WORDLIST_WALKER callback is called for each WordReference found
|
|
//
|
|
int action;
|
|
|
|
//
|
|
// Input data. Callback function called for each match found.
|
|
//
|
|
wordlist_walk_callback_t callback;
|
|
//
|
|
// Input data. Argument given to callback, contains arbitrary
|
|
// caller defined data.
|
|
//
|
|
Object *callback_data;
|
|
|
|
//
|
|
// Output parameters
|
|
//
|
|
//
|
|
// Output data. List of WordReference found in the search.
|
|
//
|
|
List *collectRes;
|
|
|
|
//-
|
|
// Output data. Last match found. Use GetFound() to retrieve it.
|
|
//
|
|
WordReference found;
|
|
//-
|
|
// Output data. WORD_WALK_ATEND if cursor is past last match,
|
|
// OK otherwise. Use GetStatus() to retrieve it.
|
|
//
|
|
int status;
|
|
|
|
//
|
|
// Debugging section. Do not use unless you know exactly what you do.
|
|
//
|
|
//
|
|
// Collect everything found while searching (not necessarily matching)
|
|
//
|
|
List *traceRes;
|
|
|
|
//
|
|
// Internal state
|
|
//
|
|
//
|
|
// The actual Berkeley DB cursor.
|
|
//
|
|
WordDBCursor cursor;
|
|
//
|
|
// The latest retrieved key and data
|
|
//
|
|
String key;
|
|
String data;
|
|
//
|
|
// The shorted prefix key computed from searchKey
|
|
//
|
|
WordKey prefixKey;
|
|
//-
|
|
// WalkNext leap is either DB_NEXT or DB_SET_RANGE.
|
|
//
|
|
int cursor_get_flags;
|
|
//
|
|
// True if search key is a prefix key
|
|
//
|
|
int searchKeyIsSameAsPrefix;
|
|
//-
|
|
// The inverted index used by this cursor.
|
|
//
|
|
WordList *words;
|
|
#endif /* SWIG */
|
|
};
|
|
|
|
#endif /* _WordCursor_h_ */
|