You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
321 lines
9.7 KiB
321 lines
9.7 KiB
/*-
|
|
* See the file LICENSE for redistribution information.
|
|
*
|
|
* Copyright (c) 1996, 1997, 1998, 1999
|
|
* Sleepycat Software. All rights reserved.
|
|
*
|
|
* @(#)mp.h 11.3 (Sleepycat) 10/6/99
|
|
*/
|
|
|
|
struct __bh; typedef struct __bh BH;
|
|
struct __db_mpool; typedef struct __db_mpool DB_MPOOL;
|
|
struct __db_mpreg; typedef struct __db_mpreg DB_MPREG;
|
|
struct __mcache; typedef struct __mcache MCACHE;
|
|
struct __mpool; typedef struct __mpool MPOOL;
|
|
struct __mpoolfile; typedef struct __mpoolfile MPOOLFILE;
|
|
struct __cmpr; typedef struct __cmpr CMPR;
|
|
struct __cmpr_context; typedef struct __cmpr_context CMPR_CONTEXT;
|
|
|
|
/* We require at least 20K of cache. */
|
|
#define DB_CACHESIZE_MIN ( 20 * 1024)
|
|
|
|
/*
|
|
* By default, environments have room for 500 files.
|
|
*/
|
|
#define DB_MPOOLFILE_DEF 500
|
|
|
|
/*
|
|
* DB_MPOOL --
|
|
* Per-process memory pool structure.
|
|
*/
|
|
struct __db_mpool {
|
|
/* These fields need to be protected for multi-threaded support. */
|
|
MUTEX *mutexp; /* Structure thread lock. */
|
|
|
|
/* List of pgin/pgout routines. */
|
|
LIST_HEAD(__db_mpregh, __db_mpreg) dbregq;
|
|
|
|
/* List of DB_MPOOLFILE's. */
|
|
TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq;
|
|
|
|
/* These fields are not thread-protected. */
|
|
DB_ENV *dbenv; /* Reference to error information. */
|
|
|
|
REGINFO reginfo; /* Main shared region. */
|
|
|
|
int nc_reg; /* N underlying cache regions. */
|
|
REGINFO *c_reginfo; /* Underlying cache regions. */
|
|
|
|
/* I'm not sure if these need to be thread-protected... */
|
|
int recursion_level; /* limit recur'n from weak compr'n */
|
|
|
|
};
|
|
|
|
/*
|
|
* DB_MPREG --
|
|
* DB_MPOOL registry of pgin/pgout functions.
|
|
*/
|
|
struct __db_mpreg {
|
|
LIST_ENTRY(__db_mpreg) q; /* Linked list. */
|
|
|
|
int ftype; /* File type. */
|
|
/* Pgin, pgout routines. */
|
|
int (*pgin) __P((db_pgno_t, void *, DBT *));
|
|
int (*pgout) __P((db_pgno_t, void *, DBT *));
|
|
};
|
|
|
|
/*
|
|
* CMPR_CONTEXT --
|
|
* Shared compresssion information.
|
|
*/
|
|
struct __cmpr_context {
|
|
#define DB_CMPR_SUFFIX "_weakcmpr"
|
|
DB *weakcmpr; /* Free weakcmpr pages pool. */
|
|
};
|
|
|
|
/*
|
|
* DB_MPOOLFILE --
|
|
* Per-process DB_MPOOLFILE information.
|
|
*/
|
|
struct __db_mpoolfile {
|
|
/* These fields need to be protected for multi-threaded support. */
|
|
MUTEX *mutexp; /* Structure thread lock. */
|
|
|
|
DB_FH fh; /* Underlying file handle. */
|
|
|
|
u_int32_t ref; /* Reference count. */
|
|
|
|
/*
|
|
* !!!
|
|
* This field is a special case -- it's protected by the region lock
|
|
* NOT the thread lock. The reason for this is that we always have
|
|
* the region lock immediately before or after we modify the field,
|
|
* and we don't want to use the structure lock to protect it because
|
|
* then I/O (which is done with the structure lock held because of
|
|
* the race between the seek and write of the file descriptor) will
|
|
* block any other put/get calls using this DB_MPOOLFILE structure.
|
|
*/
|
|
u_int32_t pinref; /* Pinned block reference count. */
|
|
|
|
/*
|
|
* !!!
|
|
* This field is a special case -- it's protected by the region lock
|
|
* since it's manipulated only when new files are added to the list.
|
|
*/
|
|
TAILQ_ENTRY(__db_mpoolfile) q; /* Linked list of DB_MPOOLFILE's. */
|
|
|
|
/* These fields are not thread-protected. */
|
|
DB_MPOOL *dbmp; /* Overlying DB_MPOOL. */
|
|
MPOOLFILE *mfp; /* Underlying MPOOLFILE. */
|
|
|
|
void *addr; /* Address of mmap'd region. */
|
|
size_t len; /* Length of mmap'd region. */
|
|
|
|
/* These fields need to be protected for multi-threaded support. */
|
|
#define MP_READONLY 0x01 /* File is readonly. */
|
|
#define MP_UPGRADE 0x02 /* File descriptor is readwrite. */
|
|
#define MP_UPGRADE_FAIL 0x04 /* Upgrade wasn't possible. */
|
|
#define MP_CMPR 0x08 /* Transparent I/O compression. */
|
|
u_int32_t flags;
|
|
|
|
CMPR_CONTEXT cmpr_context; /* Shared compression information */
|
|
|
|
};
|
|
|
|
/*
|
|
* NCACHE --
|
|
* Select a cache based on the page number. This assumes accesses are
|
|
* uniform across pages, which is probably OK -- what we really want to
|
|
* avoid is anything that puts all the pages for any single file in the
|
|
* same cache, as we expect that file access will be bursty.
|
|
*/
|
|
#define NCACHE(mp, pgno) \
|
|
((pgno) % ((MPOOL *)mp)->nc_reg)
|
|
|
|
/*
|
|
* NBUCKET --
|
|
* We make the assumption that early pages of the file are more likely
|
|
* to be retrieved than the later pages, which means the top bits will
|
|
* be more interesting for hashing as they're less likely to collide.
|
|
* That said, as 512 8K pages represents a 4MB file, so only reasonably
|
|
* large files will have page numbers with any other than the bottom 9
|
|
* bits set. We XOR in the MPOOL offset of the MPOOLFILE that backs the
|
|
* page, since that should also be unique for the page. We don't want
|
|
* to do anything very fancy -- speed is more important to us than using
|
|
* good hashing.
|
|
*/
|
|
#define NBUCKET(mc, mf_offset, pgno) \
|
|
(((pgno) ^ ((mf_offset) << 9)) % (mc)->htab_buckets)
|
|
|
|
/*
|
|
* MPOOL --
|
|
* Shared memory pool region. One of these is allocated in shared
|
|
* memory, and describes the entire pool.
|
|
*/
|
|
struct __mpool {
|
|
SH_TAILQ_HEAD(__mpfq) mpfq; /* List of MPOOLFILEs. */
|
|
|
|
/*
|
|
* We single-thread CDB_memp_sync and CDB_memp_fsync calls.
|
|
*
|
|
* This mutex is intended *only* to single-thread access to the call,
|
|
* it is not used to protect the lsn and lsn_cnt fields, the region
|
|
* lock is used to protect them.
|
|
*/
|
|
MUTEX sync_mutex; /* Checkpoint lock. */
|
|
DB_LSN lsn; /* Maximum checkpoint LSN. */
|
|
u_int32_t lsn_cnt; /* Checkpoint buffers left to write. */
|
|
|
|
u_int32_t nc_reg; /* Number of underlying REGIONS. */
|
|
roff_t c_regids; /* Array of underlying REGION Ids. */
|
|
|
|
#define MP_LSN_RETRY 0x01 /* Retry all BH_WRITE buffers. */
|
|
u_int32_t flags;
|
|
|
|
/* HACK!! */
|
|
/* a pointers allocated for this structure is (erroneously?) used */
|
|
/* in CDB___memp_alloc() to refer to a MCACHE structure. Make sure */
|
|
/* the allocation is big enough. */
|
|
int dummy [100];
|
|
|
|
};
|
|
|
|
/*
|
|
* MCACHE --
|
|
* The memory pool may be broken up into individual pieces/files. Not
|
|
* what we would have liked, but on Solaris you can allocate only a
|
|
* little more than 2GB of memory in a single contiguous chunk, and I
|
|
* expect to see more systems with similar issues. An MCACHE structure
|
|
* describes a backing piece of memory used as a cache.
|
|
*/
|
|
struct __mcache {
|
|
SH_TAILQ_HEAD(__bhq) bhq; /* LRU list of buffer headers. */
|
|
|
|
int htab_buckets; /* Number of hash table entries. */
|
|
roff_t htab; /* Hash table offset. */
|
|
|
|
DB_MPOOL_STAT stat; /* Per-cache mpool statistics. */
|
|
};
|
|
|
|
/*
|
|
* MPOOLFILE --
|
|
* Shared DB_MPOOLFILE information.
|
|
*/
|
|
struct __mpoolfile {
|
|
SH_TAILQ_ENTRY q; /* List of MPOOLFILEs */
|
|
|
|
int ftype; /* File type. */
|
|
|
|
int32_t lsn_off; /* Page's LSN offset. */
|
|
u_int32_t clear_len; /* Bytes to clear on page create. */
|
|
|
|
roff_t path_off; /* File name location. */
|
|
roff_t fileid_off; /* File identification location. */
|
|
|
|
roff_t pgcookie_len; /* Pgin/pgout cookie length. */
|
|
roff_t pgcookie_off; /* Pgin/pgout cookie location. */
|
|
|
|
u_int32_t lsn_cnt; /* Checkpoint buffers left to write. */
|
|
|
|
db_pgno_t last_pgno; /* Last page in the file. */
|
|
db_pgno_t orig_last_pgno; /* Original last page in the file. */
|
|
|
|
#define MP_CAN_MMAP 0x01 /* If the file can be mmap'd. */
|
|
#define MP_REMOVED 0x02 /* Backing file has been removed. */
|
|
#define MP_TEMP 0x04 /* Backing file is a temporary. */
|
|
u_int32_t flags;
|
|
|
|
DB_MPOOL_FSTAT stat; /* Per-file mpool statistics. */
|
|
};
|
|
|
|
/*
|
|
* BH_TO_CACHE --
|
|
* Return the cache where we can find the specified buffer header.
|
|
*/
|
|
#define BH_TO_CACHE(dbmp, bhp) \
|
|
(dbmp)->c_reginfo[NCACHE((dbmp)->reginfo.primary, (bhp)->pgno)].primary
|
|
|
|
/*
|
|
* DB_CMPR --
|
|
* Page compression information
|
|
*
|
|
* !!!
|
|
* There is no need to keep the length of the data wrote
|
|
* in the page since it's already encoded in the compressed
|
|
* data.
|
|
*/
|
|
|
|
/*
|
|
* Convert size to expected compressed size
|
|
*/
|
|
#define DB_CMPR_DIVIDE(dbenv, size) ((size) >> CDB___memp_cmpr_coefficient(dbenv) )
|
|
#define DB_CMPR_MULTIPLY(dbenv, size) ((size) << CDB___memp_cmpr_coefficient(dbenv) )
|
|
|
|
|
|
struct __cmpr {
|
|
#define DB_CMPR_FIRST 0x01 /* Head of chain. */
|
|
#define DB_CMPR_INTERNAL 0x02 /* Weak compression data. */
|
|
#define DB_CMPR_CHAIN 0x04 /* More data in next page. */
|
|
#define DB_CMPR_FREE 0x08 /* Not in use. */
|
|
|
|
u_int16_t flags;
|
|
|
|
/*
|
|
* Filled if DB_CMPR_CHAIN set
|
|
*/
|
|
db_pgno_t next;
|
|
};
|
|
|
|
/*
|
|
* Reserved information at the beginning of each compressed page
|
|
*/
|
|
#define DB_CMPR_OVERHEAD sizeof(struct __cmpr)
|
|
/*
|
|
* Size of IO page, without the reserved information
|
|
*/
|
|
#define DB_CMPR_PAGESIZE(io) (io->pagesize - DB_CMPR_OVERHEAD)
|
|
/*
|
|
* Pointer to data within raw compressed buffer
|
|
*/
|
|
#define DB_CMPR_DATA(io) (io->buf + DB_CMPR_OVERHEAD)
|
|
|
|
/*
|
|
* BH --
|
|
* Buffer header.
|
|
*/
|
|
struct __bh {
|
|
MUTEX mutex; /* Buffer thread/process lock. */
|
|
|
|
u_int16_t ref; /* Reference count. */
|
|
|
|
#define BH_CALLPGIN 0x001 /* Page needs to be reworked... */
|
|
#define BH_DIRTY 0x002 /* Page was modified. */
|
|
#define BH_DISCARD 0x004 /* Page is useless. */
|
|
#define BH_LOCKED 0x008 /* Page is locked (I/O in progress). */
|
|
#define BH_TRASH 0x010 /* Page is garbage. */
|
|
#define BH_WRITE 0x020 /* Page scheduled for writing. */
|
|
#define BH_CMPR 0x040 /* Chain contains valid data. */
|
|
#define BH_CMPR_POOL 0x080 /* Chain allocated in pool. */
|
|
#define BH_CMPR_OS 0x100 /* Chain allocate with malloc. */
|
|
u_int16_t flags;
|
|
|
|
db_pgno_t *chain; /* Compression chain. */
|
|
|
|
SH_TAILQ_ENTRY q; /* LRU queue. */
|
|
SH_TAILQ_ENTRY hq; /* MPOOL hash bucket queue. */
|
|
|
|
db_pgno_t pgno; /* Underlying MPOOLFILE page number. */
|
|
roff_t mf_offset; /* Associated MPOOLFILE offset. */
|
|
|
|
/*
|
|
* !!!
|
|
* This array must be size_t aligned -- the DB access methods put PAGE
|
|
* and other structures into it, and expect to be able to access them
|
|
* directly. (We guarantee size_t alignment in the documentation too.)
|
|
*/
|
|
u_int8_t buf[1]; /* Variable length data. */
|
|
};
|
|
|
|
#include "mp_ext.h"
|
|
|