You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

223 lines
5.1 KiB
C++

// DocMatch.cc
//
// DocMatch: Data object only. Contains information related to a given
// document that was matched by a search. For instance, the
// score of the document for this search.
//
// Part of the ht://Dig package <http://www.htdig.org/>
// Copyright (c) 1995-2004 The ht://Dig Group
// For copyright details, see the file COPYING in your distribution
// or the GNU Library General Public License (LGPL) version 2 or later
// <http://www.gnu.org/copyleft/lgpl.html>
//
// $Id: DocMatch.cc,v 1.8 2004/05/28 13:15:24 lha Exp $
//
#ifdef HAVE_CONFIG_H
#include "htconfig.h"
#endif /* HAVE_CONFIG_H */
#include "DocMatch.h"
#include "HtConfiguration.h"
#include "HtWordReference.h"
#ifdef HAVE_STD
#include <iostream>
#ifdef HAVE_NAMESPACES
using namespace std;
#endif
#else
#include <iostream.h>
#endif /* HAVE_STD */
//*******************************************************************************
// DocMatch::DocMatch()
//
//*******************************************************************************
// DocMatch::~DocMatch()
//
DocMatch::~DocMatch()
{
}
//
// merge with another match
// sets anchor to the lower value
// merges location lists
//
void
DocMatch::Merge(const DocMatch &match)
{
if(match.anchor < anchor)
{
anchor = match.anchor;
}
AddLocations(match.GetLocations());
}
//
// adds locations to an existing list
// avoiding duplicates, in location order
//
void
DocMatch::AddLocations(const List *locs)
{
List *merge = new List;
ListCursor c;
locations->Start_Get();
locs->Start_Get(c);
Location *a = (Location *)locations->Get_Next();
Location *b = (Location *)locs->Get_Next(c);
while(a && b)
{
if(a->from < b->from)
{
merge->Add(a);
a = (Location *)locations->Get_Next();
}
else if(a->from > b->from)
{
merge->Add(new Location(*b));
b = (Location *)locs->Get_Next(c);
}
else // (a->from == b->from)
{
if(a->to < b->to)
{
merge->Add(new Location(*a));
merge->Add(new Location(*b));
}
else if(a->to > b->to)
{
merge->Add(new Location(*b));
merge->Add(new Location(*a));
}
else // (a->to == b->to)
{
merge->Add(new Location(
a->from,
a->to,
a->flags,
a->weight + b->weight));
}
a = (Location *)locations->Get_Next();
b = (Location *)locs->Get_Next(c);
}
}
while(a)
{
merge->Add(a);
a = (Location *)locations->Get_Next();
}
while(b)
{
merge->Add(new Location(*b));
b = (Location *)locs->Get_Next(c);
}
locations->Release();
delete locations;
locations = merge;
}
//
// set the location list
//
void
DocMatch::SetLocations(List *locs)
{
delete locations;
locations = locs;
}
//
// copy constructor, copies locations
//
DocMatch::DocMatch(const DocMatch &other)
{
score = -1.0;
//score = other.score;
id = other.id;
anchor = other.anchor;
locations = new List;
AddLocations(other.GetLocations());
}
//
// set weight of all locations
//
void
DocMatch::SetWeight(double weight)
{
locations->Start_Get();
for(int i = 0; i < locations->Count(); i++)
{
Location *loc = (Location *)locations->Get_Next();
loc->weight = weight;
}
}
//
// debug dump
//
void
DocMatch::Dump()
{
cerr << "DocMatch id: " << id << " {" << endl;
locations->Start_Get();
for(int i = 0; i < locations->Count(); i++)
{
Location *loc = (Location *)locations->Get_Next();
cerr << "location [" << loc->from;
cerr << ", " << loc->to << "] ";
cerr << "weight " << loc->weight;
cerr << " flags " << loc->flags;
cerr << endl;
}
cerr << "score: " << GetScore() << endl << "}" << endl;
}
double
DocMatch::GetScore()
{
HtConfiguration* config= HtConfiguration::config();
static double text_factor = config->Double("text_factor", 1);
static double caps_factor = config->Double("caps_factor", 1);
static double title_factor = config->Double("title_factor", 1);
static double heading_factor = config->Double("heading_factor", 1);
static double keywords_factor = config->Double("keywords_factor", 1);
static double meta_desc_factor = config->Double("meta_description_factor", 1);
static double author_factor = config->Double("author_factor", 1);
static double description_factor = config->Double("description_factor", 1);
static double url_text_factor = config->Double("url_text_factor", 1);
if (score == -1.0)
{
score = 0.0;
double locresult = 0.0;
ListCursor c;
locations->Start_Get(c);
Location *loc = (Location *)locations->Get_Next(c);
while(loc)
{
locresult = 0.0;
if (loc->flags == FLAG_TEXT) locresult += text_factor;
if (loc->flags & FLAG_CAPITAL) locresult += caps_factor;
if (loc->flags & FLAG_TITLE) locresult += title_factor;
if (loc->flags & FLAG_HEADING) locresult += heading_factor;
if (loc->flags & FLAG_KEYWORDS) locresult += keywords_factor;
if (loc->flags & FLAG_DESCRIPTION) locresult += meta_desc_factor;
if (loc->flags & FLAG_AUTHOR) locresult += author_factor;
if (loc->flags & FLAG_LINK_TEXT) locresult += description_factor;
if (loc->flags & FLAG_URL) locresult += url_text_factor;
score += loc->weight * locresult;
loc = (Location *)locations->Get_Next(c);
}
}
return score;
}