You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
607 lines
13 KiB
607 lines
13 KiB
/*-
|
|
* See the file LICENSE for redistribution information.
|
|
*
|
|
* Copyright (c) 1998, 1999
|
|
* Sleepycat Software. All rights reserved.
|
|
*/
|
|
|
|
#include "db_config.h"
|
|
|
|
#ifndef lint
|
|
static const char sccsid[] = "@(#)db_am.c 11.8 (Sleepycat) 11/15/99";
|
|
#endif /* not lint */
|
|
|
|
#ifndef NO_SYSTEM_INCLUDES
|
|
#include <sys/types.h>
|
|
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
#endif
|
|
|
|
#include "db_int.h"
|
|
#include "db_page.h"
|
|
#include "db_shash.h"
|
|
#include "btree.h"
|
|
#include "hash.h"
|
|
#include "qam.h"
|
|
#include "lock.h"
|
|
#include "mp.h"
|
|
#include "txn.h"
|
|
#include "db_am.h"
|
|
#include "db_ext.h"
|
|
|
|
#ifdef DEBUG
|
|
#include "WordMonitor.h"
|
|
#endif /* DEBUG */
|
|
|
|
static int CDB___db_c_close __P((DBC *));
|
|
|
|
/*
|
|
* CDB___db_cursor --
|
|
* Allocate and return a cursor.
|
|
*
|
|
* PUBLIC: int CDB___db_cursor __P((DB *, DB_TXN *, DBC **, u_int32_t));
|
|
*/
|
|
int
|
|
CDB___db_cursor(dbp, txn, dbcp, flags)
|
|
DB *dbp;
|
|
DB_TXN *txn;
|
|
DBC **dbcp;
|
|
u_int32_t flags;
|
|
{
|
|
DBC *dbc, *adbc;
|
|
db_lockmode_t mode;
|
|
u_int32_t op;
|
|
int ret;
|
|
|
|
PANIC_CHECK(dbp->dbenv);
|
|
DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor");
|
|
|
|
/* Check for invalid flags. */
|
|
if ((ret = CDB___db_cursorchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
|
|
return (ret);
|
|
|
|
/* Take one from the free list if it's available. */
|
|
MUTEX_THREAD_LOCK(dbp->mutexp);
|
|
if ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
|
|
TAILQ_REMOVE(&dbp->free_queue, dbc, links);
|
|
else {
|
|
MUTEX_THREAD_UNLOCK(dbp->mutexp);
|
|
|
|
if ((ret = CDB___os_calloc(1, sizeof(DBC), &dbc)) != 0)
|
|
return (ret);
|
|
|
|
dbc->dbp = dbp;
|
|
dbc->c_close = CDB___db_c_close;
|
|
dbc->c_dup = CDB___db_c_dup;
|
|
|
|
/* Set up locking information. */
|
|
if (F_ISSET(dbp->dbenv, DB_ENV_CDB | DB_ENV_LOCKING)) {
|
|
/*
|
|
* If we are not threaded, then there is no need to
|
|
* create new locker ids. We know that no one else
|
|
* is running concurrently using this DB, so we can
|
|
* take a peek at any cursors on the active queue.
|
|
*/
|
|
if (!F_ISSET(dbp->dbenv, DB_ENV_THREAD) &&
|
|
(adbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
|
|
dbc->lid = adbc->lid;
|
|
else
|
|
if ((ret = CDB_lock_id(dbp->dbenv, &dbc->lid)) != 0)
|
|
goto err;
|
|
|
|
memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN);
|
|
if (F_ISSET(dbp->dbenv, DB_ENV_CDB)) {
|
|
dbc->lock_dbt.size = DB_FILE_ID_LEN;
|
|
dbc->lock_dbt.data = dbc->lock.fileid;
|
|
} else {
|
|
dbc->lock.type = DB_PAGE_LOCK;
|
|
dbc->lock_dbt.size = sizeof(dbc->lock);
|
|
dbc->lock_dbt.data = &dbc->lock;
|
|
}
|
|
}
|
|
|
|
switch (dbp->type) {
|
|
case DB_BTREE:
|
|
case DB_RECNO:
|
|
if ((ret = CDB___bam_c_init(dbc)) != 0)
|
|
goto err;
|
|
break;
|
|
case DB_HASH:
|
|
if ((ret = CDB___ham_c_init(dbc)) != 0)
|
|
goto err;
|
|
break;
|
|
case DB_QUEUE:
|
|
if ((ret = CDB___qam_c_init(dbc)) != 0)
|
|
goto err;
|
|
break;
|
|
default:
|
|
ret = EINVAL;
|
|
goto err;
|
|
}
|
|
|
|
MUTEX_THREAD_LOCK(dbp->mutexp);
|
|
}
|
|
|
|
if ((dbc->txn = txn) == NULL)
|
|
dbc->locker = dbc->lid;
|
|
else
|
|
dbc->locker = txn->txnid;
|
|
|
|
TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links);
|
|
MUTEX_THREAD_UNLOCK(dbp->mutexp);
|
|
|
|
/*
|
|
* If this is CDB, then we do all locking in the interface, which is
|
|
* right here. However, if we are duplicating a cursor, then we do
|
|
* not want to acquire any locks here, because we'll do that in the
|
|
* dup code for the correct locker.
|
|
*/
|
|
op = LF_ISSET(DB_OPFLAGS_MASK);
|
|
if (op != DB_DUPCURSOR && F_ISSET(dbp->dbenv, DB_ENV_CDB)) {
|
|
mode = (op == DB_WRITELOCK) ? DB_LOCK_WRITE :
|
|
(LF_ISSET(DB_WRITECURSOR) ? DB_LOCK_IWRITE : DB_LOCK_READ);
|
|
if ((ret = CDB_lock_get(dbp->dbenv, dbc->locker, 0,
|
|
&dbc->lock_dbt, mode, &dbc->mylock)) != 0) {
|
|
(void)CDB___db_c_close(dbc);
|
|
return (ret);
|
|
}
|
|
if (LF_ISSET(DB_WRITECURSOR))
|
|
F_SET(dbc, DBC_WRITECURSOR);
|
|
if (op == DB_WRITELOCK)
|
|
F_SET(dbc, DBC_WRITER);
|
|
}
|
|
|
|
*dbcp = dbc;
|
|
return (0);
|
|
|
|
err: CDB___os_free(dbc, sizeof(*dbc));
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_c_close --
|
|
* Close the cursor (recycle for later use).
|
|
*/
|
|
static int
|
|
CDB___db_c_close(dbc)
|
|
DBC *dbc;
|
|
{
|
|
DB *dbp;
|
|
int ret, t_ret;
|
|
|
|
dbp = dbc->dbp;
|
|
|
|
PANIC_CHECK(dbp->dbenv);
|
|
|
|
ret = 0;
|
|
|
|
/*
|
|
* Remove the cursor from the active queue.
|
|
*
|
|
* !!!
|
|
* This must happen before the access specific cursor close routine
|
|
* is called, Btree depends on it.
|
|
*/
|
|
MUTEX_THREAD_LOCK(dbp->mutexp);
|
|
TAILQ_REMOVE(&dbp->active_queue, dbc, links);
|
|
MUTEX_THREAD_UNLOCK(dbp->mutexp);
|
|
|
|
/* Call the access specific cursor close routine. */
|
|
if ((t_ret = dbc->c_am_close(dbc)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
/*
|
|
* Release the lock after calling the access method specific close
|
|
* routine, a Btree cursor may have had pending deletes.
|
|
*/
|
|
if (F_ISSET(dbc->dbp->dbenv, DB_ENV_CDB) &&
|
|
dbc->mylock.off != LOCK_INVALID) {
|
|
ret = CDB_lock_put(dbc->dbp->dbenv, &dbc->mylock);
|
|
dbc->mylock.off = LOCK_INVALID;
|
|
}
|
|
|
|
/* Clean up the cursor. */
|
|
dbc->flags = 0;
|
|
|
|
#ifdef CLOSE_CURSOR_CHECK_FOR_LEFTOVER_LOCKS
|
|
/*
|
|
* Check for leftover locks, unless we're running with transactions.
|
|
*
|
|
* If we're running tests, display any locks currently held. It's
|
|
* possible that some applications may hold locks for long periods,
|
|
* e.g., conference room locks, but the DB tests should never close
|
|
* holding locks.
|
|
*/
|
|
if (F_ISSET(dbp->dbenv, DB_ENV_LOCKING) && dbc->lid == dbc->locker) {
|
|
DB_LOCKREQ request;
|
|
|
|
request.op = DB_LOCK_DUMP;
|
|
if ((t_ret = CDB_lock_vec(dbp->dbenv,
|
|
dbc->locker, 0, &request, 1, NULL)) != 0 && ret == 0)
|
|
ret = EINVAL;
|
|
}
|
|
#endif
|
|
/* Move the cursor to the free queue. */
|
|
MUTEX_THREAD_LOCK(dbp->mutexp);
|
|
TAILQ_INSERT_TAIL(&dbp->free_queue, dbc, links);
|
|
MUTEX_THREAD_UNLOCK(dbp->mutexp);
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_c_dup --
|
|
* Duplicate a cursor
|
|
*
|
|
* PUBLIC: int CDB___db_c_dup __P((DBC *, DBC **, u_int32_t));
|
|
*/
|
|
int
|
|
CDB___db_c_dup(orig_dbc, dbcp, flags)
|
|
DBC *orig_dbc;
|
|
DBC **dbcp;
|
|
u_int32_t flags;
|
|
{
|
|
DB *dbp;
|
|
DBC *dbc;
|
|
int ret;
|
|
|
|
PANIC_CHECK(orig_dbc->dbp->dbenv);
|
|
|
|
/*
|
|
* We can never have two write cursors open in CDB, so do not
|
|
* allow duplication of a write cursor.
|
|
*/
|
|
if (F_ISSET(orig_dbc, DBC_WRITER | DBC_WRITECURSOR) &&
|
|
flags != DB_POSITIONI)
|
|
return (EINVAL);
|
|
|
|
dbp = orig_dbc->dbp;
|
|
|
|
/* Allocate a new cursor. */
|
|
if ((ret = dbp->cursor(dbp, orig_dbc->txn, &dbc, DB_DUPCURSOR)) != 0)
|
|
return (ret);
|
|
|
|
/* Assign local locker to be the same as the original. */
|
|
dbc->locker = orig_dbc->locker;
|
|
|
|
/* If the user wants the cursor positioned, do it here. */
|
|
if (flags == DB_POSITION || flags == DB_POSITIONI) {
|
|
switch(dbp->type) {
|
|
case DB_QUEUE:
|
|
if ((ret = CDB___qam_c_dup(orig_dbc, dbc)) != 0)
|
|
goto err;
|
|
break;
|
|
case DB_BTREE:
|
|
case DB_RECNO:
|
|
if ((ret = CDB___bam_c_dup(orig_dbc, dbc)) != 0)
|
|
goto err;
|
|
break;
|
|
case DB_HASH:
|
|
if ((ret = CDB___ham_c_dup(orig_dbc, dbc)) != 0)
|
|
goto err;
|
|
break;
|
|
default:
|
|
ret = EINVAL;
|
|
goto err;
|
|
}
|
|
dbc->flags = orig_dbc->flags;
|
|
}
|
|
*dbcp = dbc;
|
|
return (0);
|
|
|
|
err: (void)dbc->c_close(dbc);
|
|
return (ret);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
/*
|
|
* CDB___db_cprint --
|
|
* Display the current cursor list.
|
|
*
|
|
* PUBLIC: int CDB___db_cprint __P((DB *));
|
|
*/
|
|
int
|
|
CDB___db_cprint(dbp)
|
|
DB *dbp;
|
|
{
|
|
static const FN fn[] = {
|
|
{ DBC_RECOVER, "recover" },
|
|
{ DBC_RMW, "read-modify-write" },
|
|
{ DBC_WRITECURSOR, "write cursor" },
|
|
{ DBC_WRITER, "short-term write cursor" },
|
|
{ 0, NULL }
|
|
};
|
|
BTREE_CURSOR *cp;
|
|
DBC *dbc;
|
|
|
|
MUTEX_THREAD_LOCK(dbp->mutexp);
|
|
for (dbc = TAILQ_FIRST(&dbp->active_queue);
|
|
dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
|
|
fprintf(stderr,
|
|
"%#0x: dbp: %#0x txn: %#0x lid: %lu locker: %lu",
|
|
(u_int)dbc, (u_int)dbc->dbp, (u_int)dbc->txn,
|
|
(u_long)dbc->lid, (u_long)dbc->locker);
|
|
if (dbp->type == DB_BTREE) {
|
|
cp = dbc->internal;
|
|
fprintf(stderr, "p/i: %lu/%lu dp/di: %lu/%lu",
|
|
(u_long)cp->pgno, (u_long)cp->indx,
|
|
(u_long)cp->dpgno, (u_long)cp->dindx);
|
|
}
|
|
CDB___db_prflags(dbc->flags, fn, stderr);
|
|
fprintf(stderr, "\n");
|
|
}
|
|
MUTEX_THREAD_UNLOCK(dbp->mutexp);
|
|
|
|
return (0);
|
|
}
|
|
#endif /* DEBUG */
|
|
|
|
/*
|
|
* CDB___db_c_destroy --
|
|
* Destroy the cursor.
|
|
*
|
|
* PUBLIC: int CDB___db_c_destroy __P((DBC *));
|
|
*/
|
|
int
|
|
CDB___db_c_destroy(dbc)
|
|
DBC *dbc;
|
|
{
|
|
DB *dbp;
|
|
int ret;
|
|
|
|
dbp = dbc->dbp;
|
|
|
|
/* Remove the cursor from the free queue. */
|
|
MUTEX_THREAD_LOCK(dbp->mutexp);
|
|
TAILQ_REMOVE(&dbp->free_queue, dbc, links);
|
|
MUTEX_THREAD_UNLOCK(dbp->mutexp);
|
|
|
|
/* Call the access specific cursor destroy routine. */
|
|
ret = dbc->c_am_destroy == NULL ? 0 : dbc->c_am_destroy(dbc);
|
|
|
|
/* Free up allocated memory. */
|
|
if (dbc->rkey.data != NULL)
|
|
CDB___os_free(dbc->rkey.data, dbc->rkey.ulen);
|
|
if (dbc->rdata.data != NULL)
|
|
CDB___os_free(dbc->rdata.data, dbc->rdata.ulen);
|
|
CDB___os_free(dbc, sizeof(*dbc));
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* db_fd --
|
|
* Return a file descriptor for flock'ing.
|
|
*
|
|
* PUBLIC: int CDB___db_fd __P((DB *, int *));
|
|
*/
|
|
int
|
|
CDB___db_fd(dbp, fdp)
|
|
DB *dbp;
|
|
int *fdp;
|
|
{
|
|
DB_FH *fhp;
|
|
int ret;
|
|
|
|
PANIC_CHECK(dbp->dbenv);
|
|
DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->fd");
|
|
|
|
/*
|
|
* XXX
|
|
* Truly spectacular layering violation.
|
|
*/
|
|
if ((ret = CDB___mp_xxx_fh(dbp->mpf, &fhp)) != 0)
|
|
return (ret);
|
|
|
|
if (F_ISSET(fhp, DB_FH_VALID)) {
|
|
*fdp = fhp->fd;
|
|
return (0);
|
|
} else {
|
|
*fdp = -1;
|
|
return (ENOENT);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* CDB___db_get --
|
|
* Return a key/data pair.
|
|
*
|
|
* PUBLIC: int CDB___db_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
|
|
*/
|
|
int
|
|
CDB___db_get(dbp, txn, key, data, flags)
|
|
DB *dbp;
|
|
DB_TXN *txn;
|
|
DBT *key, *data;
|
|
u_int32_t flags;
|
|
{
|
|
DBC *dbc;
|
|
int ret, t_ret;
|
|
|
|
PANIC_CHECK(dbp->dbenv);
|
|
DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get");
|
|
|
|
if ((ret = CDB___db_getchk(dbp, key, data, flags)) != 0)
|
|
return (ret);
|
|
|
|
if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
|
|
return (ret);
|
|
|
|
DEBUG_LREAD(dbc, txn, "CDB___db_get", key, NULL, flags);
|
|
|
|
ret = dbc->c_get(dbc, key, data,
|
|
flags == 0 || flags == DB_RMW ? flags | DB_SET : flags);
|
|
|
|
if ((t_ret = CDB___db_c_close(dbc)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
#ifdef DEBUG
|
|
switch(flags) {
|
|
case 0:
|
|
word_monitor_add(WORD_MONITOR_GET, 1);
|
|
break;
|
|
case DB_NEXT:
|
|
word_monitor_add(WORD_MONITOR_GET_NEXT, 1);
|
|
break;
|
|
case DB_SET_RANGE:
|
|
word_monitor_add(WORD_MONITOR_GET_SET_RANGE, 1);
|
|
break;
|
|
default:
|
|
word_monitor_add(WORD_MONITOR_GET_OTHER, 1);
|
|
break;
|
|
}
|
|
#endif /* DEBUG */
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_put --
|
|
* Store a key/data pair.
|
|
*
|
|
* PUBLIC: int CDB___db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
|
|
*/
|
|
int
|
|
CDB___db_put(dbp, txn, key, data, flags)
|
|
DB *dbp;
|
|
DB_TXN *txn;
|
|
DBT *key, *data;
|
|
u_int32_t flags;
|
|
{
|
|
DBC *dbc;
|
|
DBT tdata;
|
|
int ret, t_ret;
|
|
|
|
PANIC_CHECK(dbp->dbenv);
|
|
DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->put");
|
|
|
|
if ((ret = CDB___db_putchk(dbp, key, data,
|
|
flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
|
|
return (ret);
|
|
|
|
if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
|
|
return (ret);
|
|
|
|
DEBUG_LWRITE(dbc, txn, "CDB___db_put", key, data, flags);
|
|
|
|
if (flags == DB_NOOVERWRITE) {
|
|
/*
|
|
* Set DB_DBT_USERMEM, this might be a threaded application and
|
|
* the flags checking will catch us. We don't want the actual
|
|
* data, so request a partial of length 0.
|
|
*/
|
|
memset(&tdata, 0, sizeof(tdata));
|
|
F_SET(&tdata, DB_DBT_USERMEM | DB_DBT_PARTIAL);
|
|
|
|
/*
|
|
* If we're locking, set the read-modify-write flag, we're
|
|
* going to overwrite immediately.
|
|
*/
|
|
if ((ret = dbc->c_get(dbc, key, &tdata, DB_SET |
|
|
(F_ISSET(dbp->dbenv, DB_ENV_LOCKING) ? DB_RMW : 0))) == 0)
|
|
ret = DB_KEYEXIST;
|
|
else if (ret == DB_NOTFOUND)
|
|
ret = 0;
|
|
}
|
|
if (ret == 0)
|
|
ret = dbc->c_put(dbc, key, data, DB_KEYLAST);
|
|
|
|
if ((t_ret = CDB___db_c_close(dbc)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
#ifdef DEBUG
|
|
word_monitor_add(WORD_MONITOR_PUT, 1);
|
|
#endif /* DEBUG */
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_sync --
|
|
* Flush the database cache.
|
|
*
|
|
* PUBLIC: int CDB___db_sync __P((DB *, u_int32_t));
|
|
*/
|
|
int
|
|
CDB___db_sync(dbp, flags)
|
|
DB *dbp;
|
|
u_int32_t flags;
|
|
{
|
|
int ret, t_ret;
|
|
|
|
PANIC_CHECK(dbp->dbenv);
|
|
DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync");
|
|
|
|
if ((ret = CDB___db_syncchk(dbp, flags)) != 0)
|
|
return (ret);
|
|
|
|
/* Read-only trees never need to be sync'd. */
|
|
if (F_ISSET(dbp, DB_AM_RDONLY))
|
|
return (0);
|
|
|
|
/* If it's a Recno tree, write the backing source text file. */
|
|
if (dbp->type == DB_RECNO)
|
|
ret = CDB___ram_writeback(dbp);
|
|
|
|
/* If the tree was never backed by a database file, we're done. */
|
|
if (F_ISSET(dbp, DB_AM_INMEM))
|
|
return (0);
|
|
|
|
/* Flush any dirty pages from the cache to the backing file. */
|
|
if ((t_ret = CDB_memp_fsync(dbp->mpf)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_log_page
|
|
* Log a meta-data or root page during a create operation.
|
|
*
|
|
* PUBLIC: int CDB___db_log_page __P((DB *,
|
|
* PUBLIC: const char *, DB_LSN *, db_pgno_t, PAGE *));
|
|
*/
|
|
int
|
|
CDB___db_log_page(dbp, name, lsn, pgno, page)
|
|
DB *dbp;
|
|
const char *name;
|
|
DB_LSN *lsn;
|
|
db_pgno_t pgno;
|
|
PAGE *page;
|
|
{
|
|
DBT name_dbt, page_dbt;
|
|
DB_LSN new_lsn;
|
|
int ret;
|
|
|
|
if (dbp->open_txn == NULL)
|
|
return (0);
|
|
|
|
memset(&page_dbt, 0, sizeof(page_dbt));
|
|
page_dbt.size = dbp->pgsize;
|
|
page_dbt.data = page;
|
|
if (pgno == PGNO_BASE_MD) {
|
|
/*
|
|
* !!!
|
|
* Make sure that we properly handle a null name. The old
|
|
* Tcl sent us pathnames of the form ""; it may be the case
|
|
* that the new Tcl doesn't do that, so we can get rid of
|
|
* the second check here.
|
|
*/
|
|
memset(&name_dbt, 0, sizeof(name_dbt));
|
|
name_dbt.data = (char *)name;
|
|
if (name == NULL || *name == '\0')
|
|
name_dbt.size = 0;
|
|
else
|
|
name_dbt.size = strlen(name) + 1;
|
|
|
|
ret = CDB___crdel_metapage_log(dbp->dbenv,
|
|
dbp->open_txn, &new_lsn, DB_FLUSH,
|
|
dbp->log_fileid, &name_dbt, pgno, &page_dbt);
|
|
} else
|
|
ret = CDB___crdel_metasub_log(dbp->dbenv, dbp->open_txn,
|
|
&new_lsn, 0, dbp->log_fileid, pgno, &page_dbt, lsn);
|
|
|
|
if (ret == 0)
|
|
page->lsn = new_lsn;
|
|
return (ret);
|
|
}
|