You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
412 lines
11 KiB
412 lines
11 KiB
3 years ago
|
//
|
||
|
// Part of the ht://Dig package <http://www.htdig.org/>
|
||
|
// Copyright (c) 1999-2004 The ht://Dig Group
|
||
|
// For copyright details, see the file COPYING in your distribution
|
||
|
// or the GNU Library General Public License (LGPL) version 2 or later
|
||
|
// <http://www.gnu.org/copyleft/lgpl.html>
|
||
|
//
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include "htconfig.h"
|
||
|
#endif /* HAVE_CONFIG_H */
|
||
|
|
||
|
#include <stdio.h>
|
||
|
|
||
|
#ifndef _MSC_VER /* _WIN32 */
|
||
|
#include <unistd.h>
|
||
|
#endif
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
#include <sys/stat.h>
|
||
|
|
||
|
#include "WordKey.h"
|
||
|
#include "WordDB.h"
|
||
|
#include "WordDBCache.h"
|
||
|
#include "WordMeta.h"
|
||
|
#include "ber.h"
|
||
|
|
||
|
int WordDBCaches::Add(char* key, int key_size, char* data, int data_size)
|
||
|
{
|
||
|
int ret;
|
||
|
if((ret = cache.Allocate(key_size + data_size)) == ENOMEM) {
|
||
|
if((ret = CacheFlush()) != 0) return ret;
|
||
|
if((ret = cache.Allocate(key_size + data_size))) return ret;
|
||
|
}
|
||
|
|
||
|
return cache.Add(key, key_size, data, data_size);
|
||
|
}
|
||
|
|
||
|
int WordDBCaches::AddFile(String& filename)
|
||
|
{
|
||
|
char tmp[32];
|
||
|
unsigned int serial;
|
||
|
words->Meta()->Serial(WORD_META_SERIAL_FILE, serial);
|
||
|
if(serial == WORD_META_SERIAL_INVALID)
|
||
|
return NOTOK;
|
||
|
filename = words->Filename();
|
||
|
sprintf(tmp, "C%08d", serial - 1);
|
||
|
filename << tmp;
|
||
|
|
||
|
String dummy;
|
||
|
if(files->Put(0, filename, dummy, 0) != 0)
|
||
|
return NOTOK;
|
||
|
|
||
|
size = (cache.GetMax() / 1024) * serial;
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDBCaches::CacheFlush()
|
||
|
{
|
||
|
if(cache.Empty()) return OK;
|
||
|
|
||
|
if(cache.Sort() != OK) return NOTOK;
|
||
|
String filename;
|
||
|
int locking = 0;
|
||
|
if(!lock) {
|
||
|
words->Meta()->Lock("cache", lock);
|
||
|
locking = 1;
|
||
|
}
|
||
|
if(AddFile(filename) != OK) return NOTOK;
|
||
|
if(CacheWrite(filename) != OK) return NOTOK;
|
||
|
|
||
|
unsigned int serial;
|
||
|
words->Meta()->GetSerial(WORD_META_SERIAL_FILE, serial);
|
||
|
if(serial >= (unsigned int)file_max || Full())
|
||
|
if(Merge() != OK) return NOTOK;
|
||
|
if(locking) words->Meta()->Unlock("cache", lock);
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
static int merge_cmp_size(WordDBCaches* , WordDBCacheFile* a, WordDBCacheFile* b)
|
||
|
{
|
||
|
return b->size - a->size;
|
||
|
}
|
||
|
|
||
|
int WordDBCaches::Merge()
|
||
|
{
|
||
|
if(CacheFlush() != OK) return NOTOK;
|
||
|
|
||
|
int locking = 0;
|
||
|
if(!lock) {
|
||
|
words->Meta()->Lock("cache", lock);
|
||
|
locking = 1;
|
||
|
}
|
||
|
unsigned int serial;
|
||
|
words->Meta()->GetSerial(WORD_META_SERIAL_FILE, serial);
|
||
|
if(serial <= 1) return OK;
|
||
|
|
||
|
//
|
||
|
// heap lists all the files in decreasing size order (biggest first)
|
||
|
//
|
||
|
WordDBCacheFile* heap = new WordDBCacheFile[serial];
|
||
|
{
|
||
|
String filename;
|
||
|
String dummy;
|
||
|
WordDBCursor* cursor = files->Cursor();
|
||
|
struct stat stat_buf;
|
||
|
int i;
|
||
|
int ret;
|
||
|
for(i = 0; (ret = cursor->Get(filename, dummy, DB_NEXT)) == 0; i++) {
|
||
|
WordDBCacheFile& file = heap[i];
|
||
|
file.filename = filename;
|
||
|
if(stat((char*)file.filename, &stat_buf) == 0) {
|
||
|
file.size = stat_buf.st_size;
|
||
|
} else {
|
||
|
const String message = String("WordDBCaches::Merge: cannot stat ") + file.filename;
|
||
|
perror((const char*)message);
|
||
|
return NOTOK;
|
||
|
}
|
||
|
cursor->Del();
|
||
|
}
|
||
|
delete cursor;
|
||
|
myqsort((void*)heap, serial, sizeof(WordDBCacheFile), (myqsort_cmp)merge_cmp_size, (void*)this);
|
||
|
}
|
||
|
|
||
|
String tmpname = words->Filename() + String("C.tmp");
|
||
|
|
||
|
while(serial > 1) {
|
||
|
WordDBCacheFile* a = &heap[serial - 1];
|
||
|
WordDBCacheFile* b = &heap[serial - 2];
|
||
|
|
||
|
if(Merge(a->filename, b->filename, tmpname) != OK) return NOTOK;
|
||
|
|
||
|
//
|
||
|
// Remove file a
|
||
|
//
|
||
|
if(unlink((char*)a->filename) != 0) {
|
||
|
const String message = String("WordDBCaches::Merge: unlink ") + a->filename;
|
||
|
perror((const char*)message);
|
||
|
return NOTOK;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Remove file b
|
||
|
//
|
||
|
if(unlink((char*)b->filename) != 0) {
|
||
|
const String message = String("WordDBCaches::Merge: unlink ") + b->filename;
|
||
|
perror((const char*)message);
|
||
|
return NOTOK;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Rename tmp file into file b
|
||
|
//
|
||
|
if(rename((char*)tmpname, (char*)b->filename) != 0) {
|
||
|
const String message = String("WordDBCaches::Merge: rename ") + tmpname + String(" ") + b->filename;
|
||
|
perror((const char*)message);
|
||
|
return NOTOK;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Update b file size. The size need not be accurate number as long
|
||
|
// as it reflects the relative size of each file.
|
||
|
//
|
||
|
b->size += a->size;
|
||
|
|
||
|
serial--;
|
||
|
//
|
||
|
// update heap
|
||
|
//
|
||
|
myqsort((void*)heap, serial, sizeof(WordDBCacheFile), (myqsort_cmp)merge_cmp_size, (void*)this);
|
||
|
}
|
||
|
|
||
|
{
|
||
|
String newname(words->Filename());
|
||
|
newname << "C00000000";
|
||
|
|
||
|
if(rename((char*)heap[0].filename, (char*)newname) != 0) {
|
||
|
const String message = String("WordDBCaches::Merge: rename ") + heap[0].filename + String(" ") + newname;
|
||
|
perror((const char*)message);
|
||
|
return NOTOK;
|
||
|
}
|
||
|
|
||
|
String dummy;
|
||
|
if(files->Put(0, newname, dummy, 0) != 0)
|
||
|
return NOTOK;
|
||
|
words->Meta()->SetSerial(WORD_META_SERIAL_FILE, serial);
|
||
|
}
|
||
|
if(locking) words->Meta()->Unlock("cache", lock);
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDBCaches::Merge(const String& filea, const String& fileb, const String& tmpname)
|
||
|
{
|
||
|
FILE* ftmp = fopen((const char*)tmpname, "w");
|
||
|
FILE* fa = fopen((const char*)filea, "r");
|
||
|
FILE* fb = fopen((const char*)fileb, "r");
|
||
|
|
||
|
unsigned int buffertmp_size = 128;
|
||
|
unsigned char* buffertmp = (unsigned char*)malloc(buffertmp_size);
|
||
|
unsigned int buffera_size = 128;
|
||
|
unsigned char* buffera = (unsigned char*)malloc(buffera_size);
|
||
|
unsigned int bufferb_size = 128;
|
||
|
unsigned char* bufferb = (unsigned char*)malloc(bufferb_size);
|
||
|
|
||
|
unsigned int entriesa_length;
|
||
|
if(ber_file2value(fa, entriesa_length) < 1) return NOTOK;
|
||
|
unsigned int entriesb_length;
|
||
|
if(ber_file2value(fb, entriesb_length) < 1) return NOTOK;
|
||
|
|
||
|
if(ber_value2file(ftmp, entriesa_length + entriesb_length) < 1) return NOTOK;
|
||
|
|
||
|
WordDBCacheEntry entrya;
|
||
|
WordDBCacheEntry entryb;
|
||
|
|
||
|
if(entriesa_length > 0 && entriesb_length > 0) {
|
||
|
|
||
|
if(ReadEntry(fa, entrya, buffera, buffera_size) != OK) return NOTOK;
|
||
|
if(ReadEntry(fb, entryb, bufferb, bufferb_size) != OK) return NOTOK;
|
||
|
|
||
|
while(entriesa_length > 0 && entriesb_length > 0) {
|
||
|
if(WordKey::Compare(words->GetContext(), (const unsigned char*)entrya.key, entrya.key_size, (const unsigned char*)entryb.key, entryb.key_size) < 0) {
|
||
|
if(WriteEntry(ftmp, entrya, buffertmp, buffertmp_size) != OK) return NOTOK;
|
||
|
if(--entriesa_length > 0)
|
||
|
if(ReadEntry(fa, entrya, buffera, buffera_size) != OK) return NOTOK;
|
||
|
} else {
|
||
|
if(WriteEntry(ftmp, entryb, buffertmp, buffertmp_size) != OK) return NOTOK;
|
||
|
if(--entriesb_length > 0)
|
||
|
if(ReadEntry(fb, entryb, bufferb, bufferb_size) != OK) return NOTOK;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if(entriesa_length > 0 || entriesb_length > 0) {
|
||
|
FILE* fp = entriesa_length > 0 ? fa : fb;
|
||
|
unsigned int& entries_length = entriesa_length > 0 ? entriesa_length : entriesb_length;
|
||
|
WordDBCacheEntry& entry = entriesa_length > 0 ? entrya : entryb;
|
||
|
while(entries_length > 0) {
|
||
|
if(WriteEntry(ftmp, entry, buffertmp, buffertmp_size) != OK) return NOTOK;
|
||
|
if(--entries_length > 0)
|
||
|
if(ReadEntry(fp, entry, buffera, buffera_size) != OK) return NOTOK;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
free(buffera);
|
||
|
free(bufferb);
|
||
|
free(buffertmp);
|
||
|
|
||
|
fclose(fa);
|
||
|
fclose(fb);
|
||
|
fclose(ftmp);
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDBCaches::Merge(WordDB& db)
|
||
|
{
|
||
|
int locking = 0;
|
||
|
if(!lock) {
|
||
|
words->Meta()->Lock("cache", lock);
|
||
|
locking = 1;
|
||
|
}
|
||
|
if(Merge() != OK) return NOTOK;
|
||
|
|
||
|
String filename;
|
||
|
String dummy;
|
||
|
WordDBCursor* cursor = files->Cursor();
|
||
|
if(cursor->Get(filename, dummy, DB_FIRST) != 0) {
|
||
|
delete cursor;
|
||
|
return NOTOK;
|
||
|
}
|
||
|
cursor->Del();
|
||
|
delete cursor;
|
||
|
|
||
|
FILE* fp = fopen((char*)filename, "r");
|
||
|
|
||
|
unsigned int buffer_size = 128;
|
||
|
unsigned char* buffer = (unsigned char*)malloc(buffer_size);
|
||
|
|
||
|
unsigned int entries_length;
|
||
|
if(ber_file2value(fp, entries_length) < 1) return NOTOK;
|
||
|
|
||
|
WordDBCacheEntry entry;
|
||
|
|
||
|
unsigned int i;
|
||
|
for(i = 0; i < entries_length; i++) {
|
||
|
if(ReadEntry(fp, entry, buffer, buffer_size) != OK) return NOTOK;
|
||
|
void* user_data = words->GetContext();
|
||
|
WORD_DBT_INIT(rkey, (void*)entry.key, entry.key_size);
|
||
|
WORD_DBT_INIT(rdata, (void*)entry.data, entry.data_size);
|
||
|
db.db->put(db.db, 0, &rkey, &rdata, 0);
|
||
|
}
|
||
|
|
||
|
if(unlink((char*)filename) != 0) {
|
||
|
const String message = String("WordDBCaches::Merge: unlink ") + filename;
|
||
|
perror((const char*)message);
|
||
|
return NOTOK;
|
||
|
}
|
||
|
|
||
|
words->Meta()->SetSerial(WORD_META_SERIAL_FILE, 0);
|
||
|
if(locking) words->Meta()->Unlock("cache", lock);
|
||
|
size = 0;
|
||
|
free(buffer);
|
||
|
fclose(fp);
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDBCaches::CacheWrite(const String& filename)
|
||
|
{
|
||
|
FILE* fp = fopen(filename, "w");
|
||
|
if(!fp) {
|
||
|
String message;
|
||
|
message << "WordDBCaches::CacheWrite()" << filename << "): ";
|
||
|
perror((char*)message);
|
||
|
return NOTOK;
|
||
|
}
|
||
|
|
||
|
int entries_length;
|
||
|
WordDBCacheEntry* entries;
|
||
|
int ret;
|
||
|
if((ret = cache.Entries(entries, entries_length)) != 0)
|
||
|
return ret;
|
||
|
|
||
|
if(ber_value2file(fp, entries_length) < 1) return NOTOK;
|
||
|
|
||
|
unsigned int buffer_size = 1024;
|
||
|
unsigned char* buffer = (unsigned char*)malloc(buffer_size);
|
||
|
int i;
|
||
|
for(i = 0; i < entries_length; i++) {
|
||
|
if(WriteEntry(fp, entries[i], buffer, buffer_size) != OK) return NOTOK;
|
||
|
}
|
||
|
free(buffer);
|
||
|
fclose(fp);
|
||
|
|
||
|
cache.Flush();
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDBCaches::WriteEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size)
|
||
|
{
|
||
|
if(entry.key_size + entry.data_size + 64 > buffer_size) {
|
||
|
buffer_size = entry.key_size + entry.data_size + 64;
|
||
|
buffer = (unsigned char*)realloc(buffer, buffer_size);
|
||
|
}
|
||
|
|
||
|
int p_size = buffer_size;
|
||
|
unsigned char* p = buffer;
|
||
|
|
||
|
int ber_len;
|
||
|
if((ber_len = ber_value2buf(p, p_size, entry.key_size)) < 1) {
|
||
|
fprintf(stderr, "WordDBCaches::WriteEntry: BER failed for key %d\n", entry.key_size);
|
||
|
return NOTOK;
|
||
|
}
|
||
|
p += ber_len;
|
||
|
memcpy(p, entry.key, entry.key_size);
|
||
|
p += entry.key_size;
|
||
|
|
||
|
p_size -= ber_len + entry.key_size;
|
||
|
|
||
|
if((ber_len = ber_value2buf(p, p_size, entry.data_size)) < 1) {
|
||
|
fprintf(stderr, "WordDBCaches::WriteEntry: BER failed for data %d\n", entry.data_size);
|
||
|
return NOTOK;
|
||
|
}
|
||
|
p += ber_len;
|
||
|
memcpy(p, entry.data, entry.data_size);
|
||
|
p += entry.data_size;
|
||
|
|
||
|
if(fwrite((void*)buffer, p - buffer, 1, fp) != 1) {
|
||
|
perror("WordDBCaches::WriteEntry: cannot write entry ");
|
||
|
return NOTOK;
|
||
|
}
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
int WordDBCaches::ReadEntry(FILE* fp, WordDBCacheEntry& entry, unsigned char*& buffer, unsigned int& buffer_size)
|
||
|
{
|
||
|
if(ber_file2value(fp, entry.key_size) < 1) return NOTOK;
|
||
|
|
||
|
if(entry.key_size > buffer_size) {
|
||
|
buffer_size += entry.key_size;
|
||
|
if(!(buffer = (unsigned char*)realloc(buffer, buffer_size))) return NOTOK;
|
||
|
}
|
||
|
|
||
|
if(fread((void*)buffer, entry.key_size, 1, fp) != 1) {
|
||
|
perror("WordDBCaches::ReadEntry(): cannot read key entry ");
|
||
|
return NOTOK;
|
||
|
}
|
||
|
|
||
|
if(ber_file2value(fp, entry.data_size) < 1) return NOTOK;
|
||
|
|
||
|
if(entry.data_size > 0) {
|
||
|
if(entry.data_size + entry.key_size > buffer_size) {
|
||
|
buffer_size += entry.data_size;
|
||
|
if(!(buffer = (unsigned char*)realloc(buffer, buffer_size))) return NOTOK;
|
||
|
}
|
||
|
|
||
|
if(fread((void*)(buffer + entry.key_size), entry.data_size, 1, fp) != 1) {
|
||
|
perror("WordDBCaches::ReadEntry(): cannot read data entry ");
|
||
|
return NOTOK;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
entry.key = (char*)buffer;
|
||
|
entry.data = (char*)(buffer + entry.key_size);
|
||
|
|
||
|
return OK;
|
||
|
}
|