You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1681 lines
44 KiB
1681 lines
44 KiB
/*-
|
|
* See the file LICENSE for redistribution information.
|
|
*
|
|
* Copyright (c) 1996, 1997, 1998, 1999
|
|
* Sleepycat Software. All rights reserved.
|
|
*/
|
|
/*
|
|
* Copyright (c) 1990, 1993, 1994, 1995, 1996
|
|
* Keith Bostic. All rights reserved.
|
|
*/
|
|
/*
|
|
* Copyright (c) 1990, 1993, 1994, 1995
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "db_config.h"
|
|
|
|
#ifndef lint
|
|
static const char sccsid[] = "@(#)db.c 11.31 (Sleepycat) 11/12/99";
|
|
#endif /* not lint */
|
|
|
|
#ifndef NO_SYSTEM_INCLUDES
|
|
#include <sys/types.h>
|
|
|
|
#include <errno.h>
|
|
#include <stddef.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#endif
|
|
|
|
#include "db_int.h"
|
|
#include "db_page.h"
|
|
#include "db_shash.h"
|
|
#include "db_swap.h"
|
|
#include "btree.h"
|
|
#include "db_am.h"
|
|
#include "hash.h"
|
|
#include "lock.h"
|
|
#include "log.h"
|
|
#include "mp.h"
|
|
#include "qam.h"
|
|
|
|
static int CDB___db_dbopen __P((DB *, const char *, u_int32_t, int, db_pgno_t));
|
|
static int CDB___db_dbenv_setup __P((DB *, const char *, u_int32_t));
|
|
static int CDB___db_file_setup __P((DB *,
|
|
const char *, u_int32_t, int, db_pgno_t, int *));
|
|
static int CDB___db_master_open __P((DB_ENV *,
|
|
DB_TXN *, const char *, u_int32_t, int, DB **));
|
|
static int CDB___db_master_update __P((DB *,
|
|
const char *, u_int32_t, db_pgno_t *, int, u_int32_t));
|
|
static int CDB___db_metabegin __P((DB *, DB_LOCK *));
|
|
static int CDB___db_metaend __P((DB *,
|
|
DB_LOCK *, int, int (*)(DB *, void *), void *));
|
|
static int CDB___db_refresh __P((DB *));
|
|
static int CDB___db_remove_callback __P((DB *, void *));
|
|
static int CDB___db_set_pgsize __P((DB *, DB_FH *, char *));
|
|
static int CDB___db_subdb_remove __P((DB *, const char *, const char *));
|
|
#if CONFIG_TEST
|
|
static void __db_makecopy __P((const char *, const char *));
|
|
#endif
|
|
|
|
/*
|
|
* CDB___db_open --
|
|
* Main library interface to the DB access methods.
|
|
*
|
|
* PUBLIC: int CDB___db_open __P((DB *,
|
|
* PUBLIC: const char *, const char *, DBTYPE, u_int32_t, int));
|
|
*/
|
|
int
|
|
CDB___db_open(dbp, name, subdb, type, flags, mode)
|
|
DB *dbp;
|
|
const char *name, *subdb;
|
|
DBTYPE type;
|
|
u_int32_t flags;
|
|
int mode;
|
|
{
|
|
DB_ENV *dbenv;
|
|
DB_LOCK open_lock;
|
|
DB *mdbp;
|
|
db_pgno_t meta_pgno;
|
|
u_int32_t ok_flags;
|
|
int ret, t_ret;
|
|
|
|
dbenv = dbp->dbenv;
|
|
mdbp = NULL;
|
|
|
|
/* Validate arguments. */
|
|
#define OKFLAGS \
|
|
(DB_CREATE | DB_EXCL | DB_FCNTL_LOCKING | \
|
|
DB_NOMMAP | DB_RDONLY | DB_THREAD | DB_TRUNCATE | DB_COMPRESS)
|
|
if ((ret = CDB___db_fchk(dbenv, "DB->open", flags, OKFLAGS)) != 0)
|
|
return (ret);
|
|
if (LF_ISSET(DB_EXCL) && !LF_ISSET(DB_CREATE))
|
|
return (CDB___db_ferr(dbenv, "DB->open", 1));
|
|
if (LF_ISSET(DB_RDONLY) && LF_ISSET(DB_CREATE))
|
|
return (CDB___db_ferr(dbenv, "DB->open", 1));
|
|
/*
|
|
* Transparent I/O compression does not work on mmap'd files.
|
|
*/
|
|
if(LF_ISSET(DB_COMPRESS))
|
|
LF_SET(DB_NOMMAP);
|
|
|
|
switch (type) {
|
|
case DB_UNKNOWN:
|
|
ok_flags = 0;
|
|
break;
|
|
case DB_BTREE:
|
|
ok_flags = DB_OK_BTREE;
|
|
break;
|
|
case DB_HASH:
|
|
ok_flags = DB_OK_HASH;
|
|
break;
|
|
case DB_QUEUE:
|
|
ok_flags = DB_OK_QUEUE;
|
|
break;
|
|
case DB_RECNO:
|
|
ok_flags = DB_OK_RECNO;
|
|
break;
|
|
default:
|
|
CDB___db_err(dbp->dbenv, "unknown type: %lu", type);
|
|
return (EINVAL);
|
|
}
|
|
if (ok_flags)
|
|
DB_ILLEGAL_METHOD(dbp, ok_flags);
|
|
|
|
/* The environment may have been created, but never opened. */
|
|
if (!F_ISSET(dbenv, DB_ENV_DBLOCAL | DB_ENV_OPEN_CALLED)) {
|
|
CDB___db_err(dbenv, "environment not yet opened");
|
|
return (EINVAL);
|
|
}
|
|
|
|
/*
|
|
* Historically, you could pass in an environment that didn't have a
|
|
* mpool, and DB would create a private one behind the scenes. This
|
|
* no longer works.
|
|
*/
|
|
if (!F_ISSET(dbenv, DB_ENV_DBLOCAL) && dbenv->mp_handle == NULL) {
|
|
CDB___db_err(dbenv, "environment did not include a memory pool.");
|
|
return (EINVAL);
|
|
}
|
|
|
|
/*
|
|
* You can't specify threads during DB->open if subsystems in the
|
|
* environment weren't configured with them.
|
|
*/
|
|
if (LF_ISSET(DB_THREAD) &&
|
|
!F_ISSET(dbenv, DB_ENV_DBLOCAL | DB_ENV_THREAD)) {
|
|
CDB___db_err(dbenv, "environment not created using DB_THREAD");
|
|
return (EINVAL);
|
|
}
|
|
|
|
/* DB_TRUNCATE is not transaction recoverable. */
|
|
if (LF_ISSET(DB_TRUNCATE) && F_ISSET(dbenv, DB_ENV_TXN)) {
|
|
CDB___db_err(dbenv,
|
|
"DB_TRUNCATE illegal in a transaction protected environment");
|
|
return (EINVAL);
|
|
}
|
|
|
|
/* Subdatabase checks. */
|
|
if (subdb != NULL) {
|
|
/* Subdatabases must be created in named files. */
|
|
if (name == NULL) {
|
|
CDB___db_err(dbenv,
|
|
"subdatabases cannot be created in temporary files");
|
|
return (EINVAL);
|
|
}
|
|
|
|
/* QAM can't be done as a subdatabase. */
|
|
if (type == DB_QUEUE) {
|
|
CDB___db_err(dbenv, "subdatabases cannot be queue files");
|
|
return (EINVAL);
|
|
}
|
|
}
|
|
|
|
/* Convert any DB->open flags. */
|
|
if (LF_ISSET(DB_RDONLY))
|
|
F_SET(dbp, DB_AM_RDONLY);
|
|
if (LF_ISSET(DB_COMPRESS))
|
|
F_SET(dbp, DB_AM_CMPR);
|
|
|
|
/* Fill in the type. */
|
|
dbp->type = type;
|
|
|
|
/*
|
|
* If we're potentially creating a database, wrap the open inside of
|
|
* a transaction.
|
|
*/
|
|
if (F_ISSET(dbenv, DB_ENV_TXN) && LF_ISSET(DB_CREATE))
|
|
if ((ret = CDB___db_metabegin(dbp, &open_lock)) != 0)
|
|
return (ret);
|
|
|
|
/*
|
|
* If we're opening a subdatabase, we have to open (and potentially
|
|
* create) the main database, and then get (and potentially store)
|
|
* our base page number in that database. Then, we can finally open
|
|
* the subdatabase.
|
|
*/
|
|
if (subdb == NULL)
|
|
meta_pgno = PGNO_BASE_MD;
|
|
else {
|
|
/*
|
|
* Open the master database, optionally updating it, and
|
|
* retrieving the metadata page number.
|
|
*/
|
|
if ((ret = CDB___db_master_open(dbp->dbenv, dbp->open_txn,
|
|
name, flags, mode, &mdbp)) != 0)
|
|
goto err;
|
|
|
|
/* Copy the page size and file id from the master. */
|
|
dbp->pgsize = mdbp->pgsize;
|
|
F_SET(dbp, DB_AM_SUBDB);
|
|
memcpy(dbp->fileid, mdbp->fileid, DB_FILE_ID_LEN);
|
|
|
|
if ((ret = CDB___db_master_update(mdbp,
|
|
subdb, type, &meta_pgno, 0, flags)) != 0)
|
|
goto err;
|
|
|
|
/*
|
|
* Clear the exclusive open and truncation flags, they only
|
|
* apply to the open of the master database.
|
|
*/
|
|
LF_CLR(DB_EXCL | DB_TRUNCATE);
|
|
}
|
|
|
|
ret = CDB___db_dbopen(dbp, name, flags, mode, meta_pgno);
|
|
|
|
/*
|
|
* You can open the database that describes the subdatabases in the
|
|
* rest of the file read-only. The content of each key's data is
|
|
* unspecified and applications should never be adding new records
|
|
* or updating existing records. However, during recovery, we need
|
|
* to open these databases R/W so we can redo/undo changes in them.
|
|
*/
|
|
if (subdb == NULL &&
|
|
(dbenv->lg_handle == NULL ||
|
|
!F_ISSET((DB_LOG *)(dbenv->lg_handle), DBC_RECOVER)) &&
|
|
!LF_ISSET(DB_RDONLY) && F_ISSET(dbp, DB_AM_SUBDB)) {
|
|
CDB___db_err(dbenv,
|
|
"databases containing subdatabase lists may only be opened read-only");
|
|
ret = EINVAL;
|
|
goto err;
|
|
}
|
|
|
|
err: /*
|
|
* End any transaction, committing if we were successful, aborting
|
|
* otherwise.
|
|
*/
|
|
if (F_ISSET(dbenv, DB_ENV_TXN) && LF_ISSET(DB_CREATE))
|
|
if ((t_ret = CDB___db_metaend(dbp,
|
|
&open_lock, ret == 0, NULL, NULL)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
/* If we were successful, don't discard the file on close. */
|
|
if (ret == 0)
|
|
F_CLR(dbp, DB_AM_DISCARD);
|
|
|
|
/* If we were unsuccessful, destroy the DB handle. */
|
|
if (ret != 0)
|
|
CDB___db_refresh(dbp);
|
|
|
|
if (mdbp != NULL) {
|
|
/* If we were successful, don't discard the file on close. */
|
|
if (ret == 0)
|
|
F_CLR(mdbp, DB_AM_DISCARD);
|
|
if ((t_ret = mdbp->close(mdbp, 0)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
}
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_dbopen --
|
|
* Open a database.
|
|
*/
|
|
static int
|
|
CDB___db_dbopen(dbp, name, flags, mode, meta_pgno)
|
|
DB *dbp;
|
|
const char *name;
|
|
u_int32_t flags;
|
|
int mode;
|
|
db_pgno_t meta_pgno;
|
|
{
|
|
DB_ENV *dbenv;
|
|
int ret;
|
|
int zero_length;
|
|
|
|
dbenv = dbp->dbenv;
|
|
|
|
/* Set up the underlying file. */
|
|
if ((ret = CDB___db_file_setup(dbp,
|
|
name, flags, mode, meta_pgno, &zero_length)) != 0)
|
|
return (ret);
|
|
|
|
/* Set up the underlying environment. */
|
|
if ((ret = CDB___db_dbenv_setup(dbp, name, flags)) != 0)
|
|
return (ret);
|
|
|
|
/*
|
|
* Do access method specific initialization.
|
|
*
|
|
* !!!
|
|
* Set the open flag. (The underlying access method open functions
|
|
* may want to do things like acquire cursors, so the open flag has
|
|
* to be set before calling them.)
|
|
*/
|
|
F_SET(dbp, DB_OPEN_CALLED);
|
|
|
|
if (zero_length)
|
|
return (0);
|
|
|
|
switch (dbp->type) {
|
|
case DB_BTREE:
|
|
ret = CDB___bam_open(dbp, name, meta_pgno);
|
|
break;
|
|
case DB_HASH:
|
|
ret = CDB___ham_open(dbp, name, meta_pgno);
|
|
break;
|
|
case DB_RECNO:
|
|
ret = CDB___ram_open(dbp, name, meta_pgno);
|
|
break;
|
|
case DB_QUEUE:
|
|
ret = CDB___qam_open(dbp, name, meta_pgno);
|
|
break;
|
|
case DB_UNKNOWN:
|
|
ret = EINVAL; /* Shouldn't be possible. */
|
|
break;
|
|
}
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_master_open --
|
|
* Open up a handle on a master database.
|
|
*/
|
|
static int
|
|
CDB___db_master_open(dbenv, txn, name, flags, mode, dbpp)
|
|
DB_ENV *dbenv;
|
|
DB_TXN *txn;
|
|
const char *name;
|
|
u_int32_t flags;
|
|
int mode;
|
|
DB **dbpp;
|
|
{
|
|
DB *dbp;
|
|
int ret;
|
|
|
|
/*
|
|
* Open up a handle on the main database.
|
|
*/
|
|
if ((ret = CDB_db_create(
|
|
&dbp, F_ISSET(dbenv, DB_ENV_DBLOCAL) ? NULL : dbenv, 0)) != 0)
|
|
return (ret);
|
|
dbp->open_txn = txn;
|
|
|
|
/*
|
|
* It's always a btree; flag that we're creating a database with
|
|
* subdatabases.
|
|
*/
|
|
dbp->type = DB_BTREE;
|
|
F_SET(dbp, DB_AM_SUBDB);
|
|
|
|
ret = CDB___db_dbopen(dbp, name, flags, mode, PGNO_BASE_MD);
|
|
|
|
*dbpp = dbp;
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_master_update --
|
|
* Add/Remove a subdatabase from a master database.
|
|
*/
|
|
static int
|
|
CDB___db_master_update(mdbp, subdb, type, meta_pgnop, is_remove, flags)
|
|
DB *mdbp;
|
|
const char *subdb;
|
|
u_int32_t type;
|
|
db_pgno_t *meta_pgnop; /* !NULL if creating/reading. */
|
|
int is_remove;
|
|
u_int32_t flags;
|
|
{
|
|
DBC *dbc;
|
|
DBT key, data;
|
|
PAGE *p;
|
|
int ret, t_ret;
|
|
|
|
dbc = NULL;
|
|
p = NULL;
|
|
|
|
/* Open up a cursor. */
|
|
if ((ret = mdbp->cursor(mdbp, mdbp->open_txn, &dbc, 0)) != 0)
|
|
goto err;
|
|
|
|
/*
|
|
* Try to point the cursor at the record.
|
|
*
|
|
* If we're removing or potentially creating an entry, lock the page
|
|
* with DB_RMW.
|
|
*
|
|
* !!!
|
|
* We don't include the name's nul termination in the database.
|
|
*/
|
|
memset(&key, 0, sizeof(key));
|
|
memset(&data, 0, sizeof(data));
|
|
key.data = (char *)subdb;
|
|
key.size = strlen(subdb);
|
|
ret = dbc->c_get(dbc, &key, &data, DB_SET |
|
|
(meta_pgnop == NULL || (F_ISSET(
|
|
mdbp->dbenv, DB_ENV_LOCKING) && LF_ISSET(DB_CREATE)) ? DB_RMW : 0));
|
|
|
|
if (is_remove) {
|
|
/* We should have found something if we're removing it. */
|
|
if (ret != 0)
|
|
goto err;
|
|
|
|
memcpy(meta_pgnop, data.data, sizeof(db_pgno_t));
|
|
|
|
/* Delete the subdatabase entry. */
|
|
if ((ret = dbc->c_del(dbc, 0)) != 0)
|
|
goto err;
|
|
|
|
if ((ret = CDB_memp_fget(mdbp->mpf, meta_pgnop, 0, &p)) != 0)
|
|
goto err;
|
|
|
|
/* Free and put the page. */
|
|
if ((ret = CDB___db_free(dbc, p)) != 0)
|
|
goto err;
|
|
p = NULL;
|
|
} else {
|
|
/*
|
|
* Get the subdatabase information. If it already exists,
|
|
* copy out the page number and we're done.
|
|
*/
|
|
switch (ret) {
|
|
case 0:
|
|
memcpy(meta_pgnop, data.data, sizeof(db_pgno_t));
|
|
goto done;
|
|
case DB_NOTFOUND:
|
|
if (LF_ISSET(DB_CREATE))
|
|
break;
|
|
ret = ENOENT;
|
|
goto err;
|
|
default:
|
|
goto err;
|
|
}
|
|
|
|
if ((ret = CDB___db_new(dbc,
|
|
type == DB_HASH ? P_HASHMETA : P_BTREEMETA, &p)) != 0)
|
|
goto err;
|
|
data.data = &PGNO(p);
|
|
data.size = sizeof(db_pgno_t);
|
|
if ((ret = dbc->c_put(dbc, &key, &data, DB_KEYLAST)) != 0)
|
|
goto err;
|
|
|
|
*meta_pgnop = PGNO(p);
|
|
}
|
|
|
|
err:
|
|
done: /*
|
|
* If we allocated a page: if we're successful, mark the page dirty
|
|
* and return it to the cache, otherwise, discard/free it.
|
|
*/
|
|
if (p != NULL) {
|
|
if (ret == 0) {
|
|
if ((t_ret =
|
|
CDB_memp_fput(mdbp->mpf, p, DB_MPOOL_DIRTY)) != 0)
|
|
ret = t_ret;
|
|
/*
|
|
* Since we cannot close this file until after
|
|
* transaction commit, we need to sync the dirty
|
|
* pages, because we'll read these directly from
|
|
* disk to open.
|
|
*/
|
|
if ((t_ret = mdbp->sync(mdbp, 0)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
} else
|
|
(void)CDB___db_free(dbc, p);
|
|
}
|
|
|
|
/* Discard the cursor. */
|
|
if (dbc != NULL && (t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_dbenv_setup --
|
|
* Set up the underlying environment during a db_open.
|
|
*/
|
|
static int
|
|
CDB___db_dbenv_setup(dbp, name, flags)
|
|
DB *dbp;
|
|
const char *name;
|
|
u_int32_t flags;
|
|
{
|
|
DB_ENV *dbenv;
|
|
DBT pgcookie;
|
|
DB_MPOOL_FINFO finfo;
|
|
DB_PGINFO pginfo;
|
|
int ret;
|
|
|
|
dbenv = dbp->dbenv;
|
|
|
|
/* If the environment is local, it's time to create it. */
|
|
if (F_ISSET(dbenv, DB_ENV_DBLOCAL)) {
|
|
/* Make sure we have at least DB_MINCACHE pages in our cache. */
|
|
if (dbenv->mp_gbytes == 0 &&
|
|
dbenv->mp_bytes < dbp->pgsize * DB_MINPAGECACHE &&
|
|
(ret = dbenv->set_cachesize(
|
|
dbenv, 0, dbp->pgsize * DB_MINPAGECACHE, 0)) != 0)
|
|
return (ret);
|
|
|
|
if ((ret = dbenv->open(dbenv, NULL, NULL, DB_CREATE |
|
|
DB_INIT_MPOOL | DB_PRIVATE | LF_ISSET(DB_THREAD), 0)) != 0)
|
|
return (ret);
|
|
}
|
|
|
|
/* Register DB's pgin/pgout functions. */
|
|
if ((ret =
|
|
CDB_memp_register(dbenv, DB_FTYPE_SET, CDB___db_pgin, CDB___db_pgout)) != 0)
|
|
return (ret);
|
|
|
|
/*
|
|
* Open a backing file in the memory pool.
|
|
*
|
|
* If we need to pre- or post-process a file's pages on I/O, set the
|
|
* file type. If it's a hash file, always call the pgin and pgout
|
|
* routines. This means that hash files can never be mapped into
|
|
* process memory. If it's a btree file and requires swapping, we
|
|
* need to page the file in and out. This has to be right -- we can't
|
|
* mmap files that are being paged in and out.
|
|
*/
|
|
memset(&finfo, 0, sizeof(finfo));
|
|
switch (dbp->type) {
|
|
case DB_BTREE:
|
|
case DB_RECNO:
|
|
finfo.ftype =
|
|
F_ISSET(dbp, DB_AM_SWAP) ? DB_FTYPE_SET : DB_FTYPE_NOTSET;
|
|
finfo.clear_len = DB_PAGE_DB_LEN;
|
|
break;
|
|
case DB_HASH:
|
|
finfo.ftype = DB_FTYPE_SET;
|
|
finfo.clear_len = DB_PAGE_DB_LEN;
|
|
break;
|
|
case DB_QUEUE:
|
|
finfo.ftype =
|
|
F_ISSET(dbp, DB_AM_SWAP) ? DB_FTYPE_SET : DB_FTYPE_NOTSET;
|
|
finfo.clear_len = DB_PAGE_QUEUE_LEN;
|
|
break;
|
|
case DB_UNKNOWN:
|
|
return (EINVAL); /* Shouldn't be possible. */
|
|
}
|
|
/*
|
|
* Better compression is achieved if the page does not contain random data.
|
|
*/
|
|
if(F_ISSET(dbp, DB_AM_CMPR))
|
|
finfo.clear_len = 0;
|
|
|
|
finfo.pgcookie = &pgcookie;
|
|
finfo.fileid = dbp->fileid;
|
|
finfo.lsn_offset = 0;
|
|
|
|
pginfo.db_pagesize = dbp->pgsize;
|
|
/*
|
|
* Forbiding byte swap when compression is enabled
|
|
* makes things simpler for the compression.
|
|
*/
|
|
if(F_ISSET(dbp, DB_AM_SWAP) && LF_ISSET(DB_COMPRESS))
|
|
return (EINVAL);
|
|
pginfo.needswap = F_ISSET(dbp, DB_AM_SWAP);
|
|
pgcookie.data = &pginfo;
|
|
pgcookie.size = sizeof(DB_PGINFO);
|
|
|
|
if ((ret = CDB_memp_fopen(dbenv, name,
|
|
LF_ISSET(DB_RDONLY | DB_NOMMAP | DB_COMPRESS),
|
|
0666, dbp->pgsize, &finfo, &dbp->mpf)) != 0)
|
|
return (ret);
|
|
|
|
/*
|
|
* We may need a per-thread mutex. Allocate it from the environment
|
|
* region, there's supposed to be extra space there for that purpose.
|
|
*/
|
|
if (LF_ISSET(DB_THREAD)) {
|
|
if ((ret = CDB___db_mutex_alloc(
|
|
dbenv, dbenv->reginfo, (MUTEX **)&dbp->mutexp)) != 0)
|
|
return (ret);
|
|
if ((ret = __db_mutex_init(
|
|
dbenv, dbp->mutexp, 0, MUTEX_THREAD)) != 0)
|
|
return (ret);
|
|
}
|
|
|
|
/* Get a log file id. */
|
|
if (F_ISSET(dbenv, DB_ENV_LOGGING) &&
|
|
#if !defined(DEBUG_ROP)
|
|
!F_ISSET(dbp, DB_AM_RDONLY) &&
|
|
#endif
|
|
(ret = CDB_log_register(dbenv, dbp, name, &dbp->log_fileid)) != 0)
|
|
return (ret);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_file_setup --
|
|
* Setup the file or in-memory data.
|
|
* Read the database metadata and resolve it with our arguments.
|
|
*/
|
|
static int
|
|
CDB___db_file_setup(dbp, name, flags, mode, meta_pgno, zerop)
|
|
DB *dbp;
|
|
const char *name;
|
|
u_int32_t flags;
|
|
int mode;
|
|
db_pgno_t meta_pgno;
|
|
int *zerop;
|
|
{
|
|
DBT namedbt;
|
|
DB_ENV *dbenv;
|
|
DB_FH *fhp, fh;
|
|
DB_LSN lsn;
|
|
DB_TXN *txn;
|
|
ssize_t nr;
|
|
u_int32_t magic, oflags;
|
|
int ret, retry_cnt, t_ret;
|
|
char *real_name, mbuf[256];
|
|
|
|
#define IS_SUBDB_SETUP (meta_pgno != PGNO_BASE_MD)
|
|
|
|
dbenv = dbp->dbenv;
|
|
txn = NULL;
|
|
*zerop = 0;
|
|
|
|
/*
|
|
* If we open a file handle and our caller is doing fcntl(2) locking,
|
|
* we can't close it because that would discard the caller's lock.
|
|
* Save it until we close the DB handle.
|
|
*/
|
|
if (LF_ISSET(DB_FCNTL_LOCKING)) {
|
|
if ((ret = CDB___os_malloc(sizeof(*fhp), NULL, &fhp)) != 0)
|
|
return (ret);
|
|
} else
|
|
fhp = &fh;
|
|
F_CLR(fhp, DB_FH_VALID);
|
|
|
|
/*
|
|
* If the file is in-memory, set up is simple. Otherwise, do the
|
|
* hard work of opening and reading the file.
|
|
*
|
|
* If we have a file name, try and read the first page, figure out
|
|
* what type of file it is, and initialize everything we can based
|
|
* on that file's meta-data page.
|
|
*
|
|
* !!!
|
|
* There's a reason we don't push this code down into the buffer cache.
|
|
* The problem is that there's no information external to the file that
|
|
* we can use as a unique ID. UNIX has dev/inode pairs, but they are
|
|
* not necessarily unique after reboot, if the file was mounted via NFS.
|
|
* Windows has similar problems, as the FAT filesystem doesn't maintain
|
|
* dev/inode numbers across reboot. So, we must get something from the
|
|
* file we can use to ensure that, even after a reboot, the file we're
|
|
* joining in the cache is the right file for us to join. The solution
|
|
* we use is to maintain a file ID that's stored in the database, and
|
|
* that's why we have to open and read the file before calling into the
|
|
* buffer cache.
|
|
*
|
|
* The secondary reason is that there's additional information that
|
|
* we want to have before instantiating a file in the buffer cache:
|
|
* the page size, file type (btree/hash), if swapping is required,
|
|
* and flags (DB_RDONLY, DB_CREATE, DB_TRUNCATE). We could handle
|
|
* needing this information by allowing it to be set for a file in
|
|
* the buffer cache even after the file has been opened, and, of
|
|
* course, supporting the ability to flush a file from the cache as
|
|
* necessary, e.g., if we guessed wrongly about the page size. Given
|
|
* that we have to read the file anyway to get the file ID, we might
|
|
* as well get the rest, too.
|
|
*
|
|
* Get the real file name.
|
|
*/
|
|
if (name == NULL) {
|
|
F_SET(dbp, DB_AM_INMEM);
|
|
|
|
if (dbp->type == DB_UNKNOWN) {
|
|
CDB___db_err(dbenv,
|
|
"DBTYPE of unknown without existing file");
|
|
return (EINVAL);
|
|
}
|
|
real_name = NULL;
|
|
|
|
/*
|
|
* If the file is a temporary file and we're doing locking,
|
|
* then we have to create a unique file ID. We can't use our
|
|
* normal dev/inode pair (or whatever this OS uses in place of
|
|
* dev/inode pairs) because no backing file will be created
|
|
* until the mpool cache is filled forcing the buffers to disk.
|
|
* Grab a random locker ID to use as a file ID. The created
|
|
* ID must never match a potential real file ID -- we know it
|
|
* won't because real file IDs contain a time stamp after the
|
|
* dev/inode pair, and we're simply storing a 4-byte value.
|
|
*
|
|
* !!!
|
|
* Store the locker in the file id structure -- we can get it
|
|
* from there as necessary, and it saves having two copies.
|
|
*/
|
|
if (F_ISSET(dbenv, DB_ENV_LOCKING | DB_ENV_CDB) &&
|
|
(ret = CDB_lock_id(dbenv, (u_int32_t *)dbp->fileid)) != 0)
|
|
return (ret);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/* Get the real backing file name. */
|
|
if ((ret = CDB___db_appname(dbenv,
|
|
DB_APP_DATA, NULL, name, 0, NULL, &real_name)) != 0)
|
|
return (ret);
|
|
|
|
/*
|
|
* Open the backing file. We need to make sure that multiple processes
|
|
* attempting to create the file at the same time are properly ordered
|
|
* so that only one of them creates the "unique" file ID, so we open it
|
|
* O_EXCL and O_CREAT so two simultaneous attempts to create the region
|
|
* will return failure in one of the attempts. If we're the one that
|
|
* fails, simply retry without the O_CREAT flag, which will require the
|
|
* meta-data page exist.
|
|
*/
|
|
|
|
/* Fill in the default file mode. */
|
|
if (mode == 0)
|
|
mode = CDB___db_omode("rwrw--");
|
|
|
|
oflags = 0;
|
|
if (LF_ISSET(DB_RDONLY))
|
|
oflags |= DB_OSO_RDONLY;
|
|
if (LF_ISSET(DB_TRUNCATE))
|
|
oflags |= DB_OSO_TRUNC;
|
|
|
|
retry_cnt = 0;
|
|
open_retry:
|
|
*zerop = 0;
|
|
ret = 0;
|
|
if (LF_ISSET(DB_CREATE)) {
|
|
if (dbp->open_txn != NULL) {
|
|
/*
|
|
* Start a child transaction to wrap this individual
|
|
* create.
|
|
*/
|
|
if ((ret =
|
|
CDB_txn_begin(dbenv, dbp->open_txn, &txn, 0)) != 0)
|
|
goto err_msg;
|
|
|
|
memset(&namedbt, 0, sizeof(namedbt));
|
|
namedbt.data = (char *)name;
|
|
namedbt.size = strlen(name) + 1;
|
|
if ((ret = CDB___crdel_fileopen_log(dbenv, txn,
|
|
&lsn, DB_FLUSH, &namedbt, mode)) != 0)
|
|
goto err_msg;
|
|
}
|
|
DB_TEST_RECOVERY(dbp, DB_TEST_PREOPEN, ret, name);
|
|
if ((ret = CDB___os_open(real_name,
|
|
oflags | DB_OSO_CREATE | DB_OSO_EXCL, mode, fhp)) == 0) {
|
|
DB_TEST_RECOVERY(dbp, DB_TEST_POSTOPEN, ret, name);
|
|
|
|
/* Commit the file create. */
|
|
if (dbp->open_txn != NULL) {
|
|
if ((ret = CDB_txn_commit(txn, DB_TXN_SYNC)) != 0)
|
|
goto err_msg;
|
|
txn = NULL;
|
|
}
|
|
|
|
/*
|
|
* We created the file. This means that if we later
|
|
* fail, we need to delete the file and if we're going
|
|
* to do that, we need to trash any pages in the
|
|
* memory pool. Since we only know here that we
|
|
* created the file, we're going to set the flag here
|
|
* and clear it later if we commit successfully.
|
|
*/
|
|
F_SET(dbp, DB_AM_DISCARD);
|
|
} else {
|
|
/*
|
|
* Abort the file create. If the abort fails, report
|
|
* the error returned by CDB_txn_abort(), rather than the
|
|
* open error, for no particular reason.
|
|
*/
|
|
if (dbp->open_txn != NULL) {
|
|
if ((t_ret = CDB_txn_abort(txn)) != 0) {
|
|
ret = t_ret;
|
|
goto err_msg;
|
|
}
|
|
txn = NULL;
|
|
}
|
|
|
|
/*
|
|
* If we were not doing an exclusive open, try again
|
|
* without the create flag.
|
|
*/
|
|
if (ret == EEXIST && !LF_ISSET(DB_EXCL)) {
|
|
LF_CLR(DB_CREATE);
|
|
DB_TEST_RECOVERY(dbp,
|
|
DB_TEST_POSTOPEN, ret, name);
|
|
goto open_retry;
|
|
}
|
|
}
|
|
} else
|
|
ret = CDB___os_open(real_name, oflags, mode, fhp);
|
|
|
|
/*
|
|
* Be quiet if we couldn't open the file because it didn't exist,
|
|
* the customers don't like those messages appearing in the logs.
|
|
* Otherwise, complain loudly.
|
|
*/
|
|
if (ret != 0) {
|
|
if (ret == ENOENT)
|
|
goto err;
|
|
goto err_msg;
|
|
}
|
|
|
|
/* Set the page size if we don't have one yet. */
|
|
if (dbp->pgsize == 0 &&
|
|
(ret = CDB___db_set_pgsize(dbp, fhp, real_name)) != 0)
|
|
goto err;
|
|
|
|
/*
|
|
* Seek to the metadata offset; if it's a master database open or a
|
|
* database without subdatabases, we're seeking to 0, but that's OK.
|
|
*/
|
|
if ((ret = CDB___os_seek(fhp,
|
|
dbp->pgsize, meta_pgno, 0, 0, DB_OS_SEEK_SET)) != 0)
|
|
goto err_msg;
|
|
|
|
/*
|
|
* Read the metadata page. We read 256 bytes, which is larger than
|
|
* any access method's metadata page and smaller than any disk sector.
|
|
*/
|
|
if ((ret = CDB___os_read(fhp, mbuf, sizeof(mbuf), &nr)) != 0)
|
|
goto err_msg;
|
|
|
|
if (nr == sizeof(mbuf)) {
|
|
/*
|
|
* Figure out what access method we're dealing with, and then
|
|
* call access method specific code to check error conditions
|
|
* based on conflicts between the found file and application
|
|
* arguments. A found file overrides some user information --
|
|
* we don't consider it an error, for example, if the user set
|
|
* an expected byte order and the found file doesn't match it.
|
|
*/
|
|
F_CLR(dbp, DB_AM_SWAP);
|
|
magic = ((DBMETA *)mbuf)->magic;
|
|
|
|
swap_retry: switch (magic) {
|
|
case DB_BTREEMAGIC:
|
|
if ((ret =
|
|
CDB___bam_metachk(dbp, name, (BTMETA *)mbuf)) != 0)
|
|
goto err;
|
|
break;
|
|
case DB_HASHMAGIC:
|
|
if ((ret =
|
|
CDB___ham_metachk(dbp, name, (HMETA *)mbuf)) != 0)
|
|
goto err;
|
|
break;
|
|
case DB_QAMMAGIC:
|
|
if ((ret =
|
|
CDB___qam_metachk(dbp, name, (QMETA *)mbuf)) != 0)
|
|
goto err;
|
|
break;
|
|
case 0:
|
|
/*
|
|
* There are two ways we can get a 0 magic number.
|
|
* If we're creating a subdatabase, then the magic
|
|
* number will be 0. We allocate a page as part of
|
|
* finding out what the base page number will be for
|
|
* the new subdatabase, but it's not initialized in
|
|
* any way.
|
|
*
|
|
* The second case happens if we are in recovery
|
|
* and we are going to recreate a database, it's
|
|
* possible that it's page was created (on systems
|
|
* where pages must be created explicitly to avoid
|
|
* holes in files) but is still 0.
|
|
*/
|
|
if (IS_SUBDB_SETUP) /* Case 1 */
|
|
goto empty;
|
|
|
|
if (!LF_ISSET(DB_CREATE | DB_TRUNCATE)) { /* Case 2 */
|
|
*zerop = 1;
|
|
goto empty;
|
|
}
|
|
goto bad_format;
|
|
default:
|
|
if (F_ISSET(dbp, DB_AM_SWAP))
|
|
goto bad_format;
|
|
|
|
M_32_SWAP(magic);
|
|
F_SET(dbp, DB_AM_SWAP);
|
|
goto swap_retry;
|
|
}
|
|
} else {
|
|
/*
|
|
* Only newly created files are permitted to fail magic
|
|
* number tests.
|
|
*/
|
|
if (nr != 0 || IS_SUBDB_SETUP)
|
|
goto bad_format;
|
|
|
|
|
|
/* Let the caller know that we had a 0-length file. */
|
|
if (!LF_ISSET(DB_CREATE | DB_TRUNCATE))
|
|
*zerop = 1;
|
|
|
|
/*
|
|
* The only way we can reach here with the DB_CREATE flag set
|
|
* is if we created the file. If that's not the case, then
|
|
* either (a) someone else created the file but has not yet
|
|
* written out the metadata page, or (b) we truncated the file
|
|
* (DB_TRUNCATE) leaving it zero-length. In the case of (a),
|
|
* we want to sleep and give the file creator time to write
|
|
* the metadata page. In the case of (b), we want to continue.
|
|
*
|
|
* !!!
|
|
* There's a race in the case of two processes opening the file
|
|
* with the DB_TRUNCATE flag set at roughly the same time, and
|
|
* they could theoretically hurt each other. Sure hope that's
|
|
* unlikely.
|
|
*/
|
|
if (!LF_ISSET(DB_CREATE | DB_TRUNCATE) &&
|
|
(dbenv->lg_handle == NULL ||
|
|
!F_ISSET((DB_LOG *)dbenv->lg_handle, DBC_RECOVER))) {
|
|
if (retry_cnt++ < 3) {
|
|
CDB___os_sleep(1, 0);
|
|
goto open_retry;
|
|
}
|
|
bad_format: CDB___db_err(dbenv,
|
|
"%s: unexpected file type or format", name);
|
|
ret = EINVAL;
|
|
goto err;
|
|
}
|
|
if (dbp->type == DB_UNKNOWN) {
|
|
CDB___db_err(dbenv,
|
|
"%s: DB_UNKNOWN type specified with empty file",
|
|
name);
|
|
ret = EINVAL;
|
|
goto err;
|
|
}
|
|
|
|
empty: /*
|
|
* The file is empty, and that's OK. If it's not a subdatabase,
|
|
* though, we do need to generate a unique file ID for it. The
|
|
* unique file ID includes a timestampe so that we can't collide
|
|
* with any other files, even when the file IDs (dev/inode pair)
|
|
* are reused.
|
|
*/
|
|
if (*zerop == 1)
|
|
memset(dbp->fileid, 0, DB_FILE_ID_LEN);
|
|
else if (!IS_SUBDB_SETUP &&
|
|
(ret = CDB___os_fileid(dbenv, real_name, 1, dbp->fileid)) != 0)
|
|
goto err_msg;
|
|
}
|
|
|
|
if (0) {
|
|
err_msg: CDB___db_err(dbenv, "%s: %s", name, CDB_db_strerror(ret));
|
|
}
|
|
|
|
/*
|
|
* Abort any running transaction -- it can only exist if something
|
|
* went wrong.
|
|
*/
|
|
err: if (txn != NULL)
|
|
(void)CDB_txn_abort(txn);
|
|
|
|
DB_TEST_RECOVERY_LABEL
|
|
/*
|
|
* If we opened a file handle and our caller is doing fcntl(2) locking,
|
|
* then we can't close it because that would discard the caller's lock.
|
|
* Otherwise, close the handle.
|
|
*/
|
|
if (F_ISSET(fhp, DB_FH_VALID)) {
|
|
if (ret == 0 && LF_ISSET(DB_FCNTL_LOCKING))
|
|
dbp->saved_open_fhp = fhp;
|
|
else
|
|
if ((t_ret = CDB___os_closehandle(fhp)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
}
|
|
|
|
if (real_name != NULL)
|
|
CDB___os_freestr(real_name);
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_set_pgsize --
|
|
* Set the page size based on file information.
|
|
*/
|
|
static int
|
|
CDB___db_set_pgsize(dbp, fhp, name)
|
|
DB *dbp;
|
|
DB_FH *fhp;
|
|
char *name;
|
|
{
|
|
DB_ENV *dbenv;
|
|
u_int32_t iopsize;
|
|
int ret;
|
|
|
|
dbenv = dbp->dbenv;
|
|
|
|
/*
|
|
* Use the filesystem's optimum I/O size as the pagesize if a pagesize
|
|
* not specified. Some filesystems have 64K as their optimum I/O size,
|
|
* but as that results in fairly large default caches, we limit the
|
|
* default pagesize to 16K.
|
|
*/
|
|
if ((ret = CDB___os_ioinfo(name, fhp, NULL, NULL, &iopsize)) != 0) {
|
|
CDB___db_err(dbenv, "%s: %s", name, CDB_db_strerror(ret));
|
|
return (ret);
|
|
}
|
|
if (iopsize < 512)
|
|
iopsize = 512;
|
|
if (iopsize > 16 * 1024)
|
|
iopsize = 16 * 1024;
|
|
|
|
/*
|
|
* If compression is on, the minimum page size must be multiplied
|
|
* by the compression factor.
|
|
*/
|
|
#ifdef HAVE_LIBZ
|
|
if(F_ISSET(dbp, DB_AM_CMPR)) {
|
|
if(iopsize < DB_CMPR_MULTIPLY(dbenv, DB_MIN_PGSIZE))
|
|
iopsize = DB_CMPR_MULTIPLY(dbenv, DB_MIN_PGSIZE);
|
|
}
|
|
#endif /* HAVE_LIBZ */
|
|
|
|
/*
|
|
* Sheer paranoia, but we don't want anything that's not a power-of-2
|
|
* (we rely on that for alignment of various types on the pages), and
|
|
* we want a multiple of the sector size as well.
|
|
*/
|
|
OS_ROUNDOFF(iopsize, 512);
|
|
|
|
dbp->pgsize = iopsize;
|
|
F_SET(dbp, DB_AM_PGDEF);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_close --
|
|
* DB destructor.
|
|
*
|
|
* PUBLIC: int CDB___db_close __P((DB *, u_int32_t));
|
|
*/
|
|
int
|
|
CDB___db_close(dbp, flags)
|
|
DB *dbp;
|
|
u_int32_t flags;
|
|
{
|
|
DB_ENV *dbenv;
|
|
DBC *dbc;
|
|
int ret, t_ret;
|
|
|
|
ret = 0;
|
|
|
|
PANIC_CHECK(dbp->dbenv);
|
|
|
|
/* Validate arguments. */
|
|
if ((ret = CDB___db_closechk(dbp, flags)) != 0)
|
|
return (ret);
|
|
|
|
/* If never opened, or not currently open, it's easy. */
|
|
if (!F_ISSET((dbp), DB_OPEN_CALLED))
|
|
goto never_opened;
|
|
|
|
/* Sync the underlying access method. */
|
|
if (!LF_ISSET(DB_NOSYNC) && !F_ISSET(dbp, DB_AM_DISCARD) &&
|
|
(t_ret = dbp->sync(dbp, 0)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
/*
|
|
* Go through the active cursors and call the cursor recycle routine,
|
|
* which resolves pending operations and moves the cursors onto the
|
|
* free list. Then, walk the free list and call the cursor destroy
|
|
* routine.
|
|
*/
|
|
while ((dbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
|
|
if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
|
|
if ((t_ret = CDB___db_c_destroy(dbc)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
/* Sync the memory pool. */
|
|
if (!LF_ISSET(DB_NOSYNC) && !F_ISSET(dbp, DB_AM_DISCARD) &&
|
|
(t_ret = CDB_memp_fsync(dbp->mpf)) != 0 &&
|
|
t_ret != DB_INCOMPLETE && ret == 0)
|
|
ret = t_ret;
|
|
|
|
/* Close any handle we've been holding since the open. */
|
|
if (dbp->saved_open_fhp != NULL &&
|
|
F_ISSET(dbp->saved_open_fhp, DB_FH_VALID) &&
|
|
(t_ret = CDB___os_closehandle(dbp->saved_open_fhp)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
never_opened:
|
|
/*
|
|
* Call the access specific close function.
|
|
*
|
|
* !!!
|
|
* Because of where the function is called in the close process,
|
|
* these routines can't do anything that would dirty pages or
|
|
* otherwise affect closing down the database.
|
|
*/
|
|
if ((t_ret = CDB___ham_db_close(dbp)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
if ((t_ret = CDB___bam_db_close(dbp)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
if ((t_ret = CDB___qam_db_close(dbp)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
/* Refresh the structure and close any local environment. */
|
|
dbenv = dbp->dbenv;
|
|
if ((t_ret = CDB___db_refresh(dbp)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
if (F_ISSET(dbenv, DB_ENV_DBLOCAL) &&
|
|
(t_ret = dbenv->close(dbenv, 0)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
memset(dbp, CLEAR_BYTE, sizeof(*dbp));
|
|
CDB___os_free(dbp, sizeof(*dbp));
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_refresh --
|
|
* Refresh the DB structure, releasing any allocated resources.
|
|
*/
|
|
static int
|
|
CDB___db_refresh(dbp)
|
|
DB *dbp;
|
|
{
|
|
DB_ENV *dbenv;
|
|
int ret, t_ret;
|
|
|
|
ret = 0;
|
|
|
|
dbenv = dbp->dbenv;
|
|
|
|
dbp->type = 0;
|
|
|
|
/* Close the memory pool file handle. */
|
|
if (dbp->mpf != NULL) {
|
|
if (F_ISSET(dbp, DB_AM_DISCARD))
|
|
(void)CDB___memp_fremove(dbp->mpf);
|
|
if ((t_ret = CDB_memp_fclose(dbp->mpf)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
dbp->mpf = NULL;
|
|
}
|
|
|
|
/* Discard the thread mutex. */
|
|
if (dbp->mutexp != NULL) {
|
|
CDB___db_mutex_free(dbenv, dbenv->reginfo, dbp->mutexp);
|
|
dbp->mutexp = NULL;
|
|
}
|
|
|
|
/* Discard the log file id. */
|
|
if (dbp->log_fileid != DB_LOGFILEID_INVALID) {
|
|
(void)CDB_log_unregister(dbenv, dbp->log_fileid);
|
|
dbp->log_fileid = DB_LOGFILEID_INVALID;
|
|
}
|
|
|
|
TAILQ_INIT(&dbp->free_queue);
|
|
TAILQ_INIT(&dbp->active_queue);
|
|
|
|
F_CLR(dbp, DB_AM_DISCARD);
|
|
F_CLR(dbp, DB_AM_INMEM);
|
|
F_CLR(dbp, DB_AM_RDONLY);
|
|
F_CLR(dbp, DB_AM_SWAP);
|
|
F_CLR(dbp, DB_DBM_ERROR);
|
|
F_CLR(dbp, DB_OPEN_CALLED);
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_remove
|
|
* Remove method for DB.
|
|
*
|
|
* PUBLIC: int CDB___db_remove __P((DB *, const char *, const char *, u_int32_t));
|
|
*/
|
|
int
|
|
CDB___db_remove(dbp, name, subdb, flags)
|
|
DB *dbp;
|
|
const char *name, *subdb;
|
|
u_int32_t flags;
|
|
{
|
|
DBT namedbt;
|
|
DB_ENV *dbenv;
|
|
DB_LOCK remove_lock;
|
|
DB_LSN newlsn;
|
|
int ret, t_ret;
|
|
char *backup, *real_back, *real_name;
|
|
|
|
dbenv = dbp->dbenv;
|
|
ret = 0;
|
|
backup = real_back = real_name = NULL;
|
|
|
|
PANIC_CHECK(dbenv);
|
|
DB_ILLEGAL_AFTER_OPEN(dbp, "remove");
|
|
|
|
/* Validate arguments. */
|
|
if ((ret = CDB___db_removechk(dbp, flags)) != 0)
|
|
return (ret);
|
|
|
|
/*
|
|
* Subdatabases.
|
|
*/
|
|
if (subdb != NULL) {
|
|
/* Subdatabases must be created in named files. */
|
|
if (name == NULL) {
|
|
CDB___db_err(dbenv,
|
|
"subdatabases cannot be created in temporary files");
|
|
return (EINVAL);
|
|
}
|
|
return (CDB___db_subdb_remove(dbp, name, subdb));
|
|
}
|
|
|
|
/* Start the transaction and log the delete. */
|
|
if (F_ISSET(dbenv, DB_ENV_TXN)) {
|
|
if ((ret = CDB___db_metabegin(dbp, &remove_lock)) != 0)
|
|
return (ret);
|
|
|
|
memset(&namedbt, 0, sizeof(namedbt));
|
|
namedbt.data = (char *)name;
|
|
namedbt.size = strlen(name) + 1;
|
|
|
|
if ((ret = CDB___crdel_delete_log(dbenv,
|
|
dbp->open_txn, &newlsn, DB_FLUSH, &namedbt)) != 0) {
|
|
CDB___db_err(dbenv,
|
|
"%s: %s", name, CDB_db_strerror(ret));
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* XXX
|
|
* We need to open the file and call CDB___memp_fremove on the mpf. I'm
|
|
* not sure that we need to do this. Is it our responsibility or the
|
|
* application's responsibility to make sure someone else isn't busily
|
|
* deleting pages behind our backs?
|
|
*/
|
|
|
|
/* Find the real name of the file. */
|
|
if ((ret = CDB___db_appname(dbenv,
|
|
DB_APP_DATA, NULL, name, 0, NULL, &real_name)) != 0)
|
|
goto err;
|
|
|
|
/* Create name for backup file. */
|
|
if ((ret = CDB___db_backup_name(name, &backup, &newlsn)) != 0)
|
|
goto err;
|
|
if ((ret = CDB___db_appname(dbenv,
|
|
DB_APP_DATA, NULL, backup, 0, NULL, &real_back)) != 0)
|
|
goto err;
|
|
|
|
DB_TEST_RECOVERY(dbp, DB_TEST_PRERENAME, ret, name);
|
|
ret = CDB___os_rename(real_name, real_back);
|
|
DB_TEST_RECOVERY(dbp, DB_TEST_POSTRENAME, ret, name);
|
|
|
|
err:
|
|
DB_TEST_RECOVERY_LABEL
|
|
/*
|
|
* End the transaction, committing the transaction if we were
|
|
* successful, aborting otherwise.
|
|
*/
|
|
if (dbp->open_txn != NULL && (t_ret = CDB___db_metaend(dbp, &remove_lock,
|
|
ret == 0, CDB___db_remove_callback, real_back)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
if (real_name != NULL)
|
|
CDB___os_freestr(real_name);
|
|
if (backup != NULL)
|
|
CDB___os_freestr(backup);
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_subdb_remove --
|
|
* Remove a subdatabase.
|
|
*/
|
|
static int
|
|
CDB___db_subdb_remove(dbp, name, subdb)
|
|
DB *dbp;
|
|
const char *name, *subdb;
|
|
{
|
|
DB *mdbp;
|
|
DBC *dbc;
|
|
DB_ENV *dbenv;
|
|
DB_LOCK remove_lock;
|
|
db_pgno_t meta_pgno;
|
|
int ret, t_ret;
|
|
|
|
mdbp = NULL;
|
|
dbc = NULL;
|
|
dbenv = dbp->dbenv;
|
|
|
|
/* Start the transaction. */
|
|
if (F_ISSET(dbenv, DB_ENV_TXN) &&
|
|
(ret = CDB___db_metabegin(dbp, &remove_lock)) != 0)
|
|
return (ret);
|
|
|
|
/*
|
|
* Open the subdatabase. We can use the user's DB handle for this
|
|
* purpose, I think.
|
|
*/
|
|
if ((ret = CDB___db_open(dbp, name, subdb, DB_UNKNOWN, 0, 0)) != 0)
|
|
goto err;
|
|
|
|
/* Free up the pages in the subdatabase. */
|
|
switch (dbp->type) {
|
|
case DB_BTREE:
|
|
case DB_RECNO:
|
|
if ((ret = CDB___bam_reclaim(dbp, dbp->open_txn)) != 0)
|
|
goto err;
|
|
break;
|
|
case DB_HASH:
|
|
if ((ret = CDB___ham_reclaim(dbp, dbp->open_txn)) != 0)
|
|
goto err;
|
|
break;
|
|
default:
|
|
ret = EINVAL; /* Shouldn't be possible. */
|
|
goto err;
|
|
}
|
|
|
|
/*
|
|
* Remove the entry from the main database and free the subdatabase
|
|
* metadata page.
|
|
*/
|
|
if ((ret = CDB___db_master_open(dbp->dbenv,
|
|
dbp->open_txn, name, 0, 0, &mdbp)) != 0)
|
|
goto err;
|
|
|
|
if ((ret = CDB___db_master_update(mdbp,
|
|
subdb, dbp->type, &meta_pgno, 1, 0)) != 0)
|
|
goto err;
|
|
|
|
|
|
err: /*
|
|
* End the transaction, committing the transaction if we were
|
|
* successful, aborting otherwise.
|
|
*/
|
|
if (dbp->open_txn != NULL && (t_ret = CDB___db_metaend(dbp,
|
|
&remove_lock, ret == 0, NULL, NULL)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
/*
|
|
* Close the user's DB handle -- do this LAST to avoid smashing the
|
|
* the transaction information.
|
|
*/
|
|
if ((t_ret = dbp->close(dbp, 0)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
if (mdbp != NULL && (t_ret = mdbp->close(mdbp, 0)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_metabegin --
|
|
*
|
|
* Begin a meta-data operation. This involves doing any required locking,
|
|
* potentially beginning a transaction and then telling the caller if you
|
|
* did or did not begin the transaction.
|
|
*
|
|
* The writing flag indicates if the caller is actually allowing creates
|
|
* or doing deletes (i.e., if the caller is opening and not creating, then
|
|
* we don't need to do any of this).
|
|
*/
|
|
static int
|
|
CDB___db_metabegin(dbp, lockp)
|
|
DB *dbp;
|
|
DB_LOCK *lockp;
|
|
{
|
|
DB_ENV *dbenv;
|
|
DBT dbplock;
|
|
u_int32_t locker, lockval;
|
|
int ret;
|
|
|
|
dbenv = dbp->dbenv;
|
|
|
|
lockp->off = LOCK_INVALID;
|
|
|
|
/*
|
|
* There is no single place where we can know that we are or are not
|
|
* going to be creating any files and/or subdatabases, so we will
|
|
* always begin a tranasaction when we start creating one. If we later
|
|
* discover that this was unnecessary, we will abort the transaction.
|
|
* Recovery is written so that if we log a file create, but then
|
|
* discover that we didn't have to do it, we recover correctly. The
|
|
* file recovery design document has details.
|
|
*
|
|
* We need to single thread all create and delete operations, so if we
|
|
* are running with locking, we must obtain a lock. We use CDB_lock_id to
|
|
* generate a unique locker id and use a handcrafted DBT as the object
|
|
* on which we are locking.
|
|
*/
|
|
if (F_ISSET(dbenv, DB_ENV_LOCKING | DB_ENV_CDB)) {
|
|
if ((ret = CDB_lock_id(dbenv, &locker)) != 0)
|
|
return (ret);
|
|
lockval = 0;
|
|
dbplock.data = &lockval;
|
|
dbplock.size = sizeof(lockval);
|
|
if ((ret = CDB_lock_get(dbenv,
|
|
locker, 0, &dbplock, DB_LOCK_WRITE, lockp)) != 0)
|
|
return(ret);
|
|
}
|
|
|
|
return (CDB_txn_begin(dbenv, NULL, &dbp->open_txn, 0));
|
|
}
|
|
|
|
/*
|
|
* CDB___db_metaend --
|
|
* End a meta-data operation.
|
|
*/
|
|
static int
|
|
CDB___db_metaend(dbp, lockp, commit, callback, cookie)
|
|
DB *dbp;
|
|
DB_LOCK *lockp;
|
|
int commit, (*callback) __P((DB *, void *));
|
|
void *cookie;
|
|
{
|
|
DB_ENV *dbenv;
|
|
int ret, t_ret;
|
|
|
|
dbenv = dbp->dbenv;
|
|
|
|
/* End the transaction. */
|
|
if (commit) {
|
|
if ((ret = CDB_txn_commit(dbp->open_txn, DB_TXN_SYNC)) == 0) {
|
|
/*
|
|
* Unlink any underlying file, we've committed the
|
|
* transaction.
|
|
*/
|
|
if (callback != NULL)
|
|
ret = callback(dbp, cookie);
|
|
}
|
|
} else
|
|
ret = CDB_txn_abort(dbp->open_txn);
|
|
|
|
/* Release our lock. */
|
|
if (lockp->off != LOCK_INVALID &&
|
|
(t_ret = CDB_lock_put(dbenv, lockp)) != 0 && ret == 0)
|
|
ret = t_ret;
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_backup_name
|
|
* Create the backup file name for a given file.
|
|
*
|
|
* PUBLIC: int CDB___db_backup_name __P((const char *, char **, DB_LSN *));
|
|
*/
|
|
#undef BACKUP_PREFIX
|
|
#define BACKUP_PREFIX "__db."
|
|
|
|
#undef MAX_LSN_TO_TEXT
|
|
#define MAX_LSN_TO_TEXT 21
|
|
int
|
|
CDB___db_backup_name(name, backup, lsn)
|
|
const char *name;
|
|
char **backup;
|
|
DB_LSN *lsn;
|
|
{
|
|
size_t len;
|
|
int ret;
|
|
char *retp;
|
|
|
|
len = strlen(name) + strlen(BACKUP_PREFIX) + MAX_LSN_TO_TEXT + 1;
|
|
|
|
if ((ret = CDB___os_malloc(len, NULL, &retp)) != 0)
|
|
return (ret);
|
|
|
|
/*
|
|
* Create the name. Backup file names are of the form:
|
|
*
|
|
* __db.name.0x[lsn-file].0x[lsn-offset]
|
|
*
|
|
* which guarantees uniqueness.
|
|
*/
|
|
snprintf(retp, len,
|
|
"%s%s.0x%x0x%x", BACKUP_PREFIX, name, lsn->file, lsn->offset);
|
|
|
|
*backup = retp;
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* CDB___db_remove_callback --
|
|
* Callback function -- on file remove commit, it unlinks the backing
|
|
* file.
|
|
*/
|
|
static int
|
|
CDB___db_remove_callback(dbp, cookie)
|
|
DB *dbp;
|
|
void *cookie;
|
|
{
|
|
COMPQUIET(dbp, NULL);
|
|
|
|
return (CDB___os_unlink(cookie));
|
|
}
|
|
|
|
#if CONFIG_TEST
|
|
/*
|
|
* __db_testcopy
|
|
* Create a copy of all backup files and our "main" DB.
|
|
*
|
|
* PUBLIC: int __db_testcopy __P((DB *, const char *));
|
|
*/
|
|
int
|
|
__db_testcopy(dbp, name)
|
|
DB *dbp;
|
|
const char *name;
|
|
{
|
|
size_t len;
|
|
int dircnt, i, ret;
|
|
char **namesp, *backup, *copy, *dir, *p, *real_name;
|
|
|
|
real_name = NULL;
|
|
/* Get the real backing file name. */
|
|
if ((ret = CDB___db_appname(dbp->dbenv,
|
|
DB_APP_DATA, NULL, name, 0, NULL, &real_name)) != 0)
|
|
return (ret);
|
|
|
|
/*
|
|
* Maximum size of file, including adding a ".afterop".
|
|
*/
|
|
len = strlen(real_name) + strlen(BACKUP_PREFIX) + MAX_LSN_TO_TEXT + 9;
|
|
|
|
if ((ret = CDB___os_malloc(len, NULL, ©)) != 0)
|
|
goto out;
|
|
|
|
if ((ret = CDB___os_malloc(len, NULL, &backup)) != 0)
|
|
goto out;
|
|
|
|
/*
|
|
* First copy the file itself.
|
|
*/
|
|
snprintf(copy, len, "%s.afterop", real_name);
|
|
__db_makecopy(real_name, copy);
|
|
|
|
if ((ret = CDB___os_strdup(real_name, &dir)) != 0)
|
|
goto out;
|
|
CDB___os_freestr(real_name);
|
|
real_name = NULL;
|
|
/*
|
|
* Create the name. Backup file names are of the form:
|
|
*
|
|
* __db.name.0x[lsn-file].0x[lsn-offset]
|
|
*
|
|
* which guarantees uniqueness. We want to look for the
|
|
* backup name, followed by a '.0x' (so that if they have
|
|
* files named, say, 'a' and 'abc' we won't match 'abc' when
|
|
* looking for 'a'.
|
|
*/
|
|
snprintf(backup, len, "%s%s.0x", BACKUP_PREFIX, name);
|
|
|
|
/*
|
|
* We need the directory path to do the CDB___os_dirlist.
|
|
*/
|
|
p = CDB___db_rpath(dir);
|
|
if (p != NULL)
|
|
*p = '\0';
|
|
ret = CDB___os_dirlist(dir, &namesp, &dircnt);
|
|
#if DIAGNOSTIC
|
|
/*
|
|
* XXX
|
|
* To get the memory guard code to work because
|
|
* it uses strlen and we just moved the end of the
|
|
* string somewhere sooner. This causes the guard
|
|
* code to fail as it looks at one byte past the end
|
|
* of the string.
|
|
* XXX
|
|
*/
|
|
*p = '/';
|
|
#endif
|
|
CDB___os_freestr(dir);
|
|
if (ret != 0)
|
|
goto out;
|
|
for (i = 0; i < dircnt; i++) {
|
|
/*
|
|
* Need to check if it is a backup file for this.
|
|
* No idea what namesp[i] may be or how long, so
|
|
* must use strncmp and not memcmp. We don't want
|
|
* to use strcmp either because we are only matching
|
|
* the first part of the real file's name. We don't
|
|
* know its LSN's.
|
|
*/
|
|
if (strncmp(namesp[i], backup, strlen(backup)) == 0) {
|
|
if ((ret = CDB___db_appname(dbp->dbenv, DB_APP_DATA,
|
|
NULL, namesp[i], 0, NULL, &real_name)) != 0)
|
|
goto out;
|
|
|
|
/*
|
|
* This should not happen. Check that old
|
|
* .afterop files aren't around.
|
|
* If so, just move on.
|
|
*/
|
|
if (strstr(real_name, ".afterop") != NULL) {
|
|
CDB___os_freestr(real_name);
|
|
real_name = NULL;
|
|
continue;
|
|
}
|
|
snprintf(copy, len, "%s.afterop", real_name);
|
|
__db_makecopy(real_name, copy);
|
|
CDB___os_freestr(real_name);
|
|
real_name = NULL;
|
|
}
|
|
}
|
|
out:
|
|
if (real_name)
|
|
CDB___os_freestr(real_name);
|
|
return (ret);
|
|
}
|
|
|
|
static void
|
|
__db_makecopy(src, dest)
|
|
const char *src, *dest;
|
|
{
|
|
DB_FH rfh, wfh;
|
|
ssize_t rcnt, wcnt;
|
|
char *buf;
|
|
|
|
memset(&rfh, 0, sizeof(rfh));
|
|
memset(&wfh, 0, sizeof(wfh));
|
|
|
|
if (CDB___os_malloc(1024, NULL, &buf) != 0)
|
|
return;
|
|
|
|
if (CDB___os_open(src, DB_OSO_RDONLY, CDB___db_omode("rw----"), &rfh) != 0)
|
|
goto err;
|
|
if (CDB___os_open(dest,
|
|
DB_OSO_CREATE | DB_OSO_TRUNC, CDB___db_omode("rw----"), &wfh) != 0)
|
|
goto err;
|
|
|
|
for (;;)
|
|
if (CDB___os_read(&rfh, buf, 1024, &rcnt) < 0 || rcnt == 0 ||
|
|
CDB___os_write(&wfh, buf, rcnt, &wcnt) < 0 || wcnt != rcnt)
|
|
break;
|
|
|
|
err: CDB___os_free(buf, 1024);
|
|
if (F_ISSET(&rfh, DB_FH_VALID))
|
|
CDB___os_closehandle(&rfh);
|
|
if (F_ISSET(&wfh, DB_FH_VALID))
|
|
CDB___os_closehandle(&wfh);
|
|
}
|
|
#endif
|