/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 1996, 1997, 1998, 1999
 *	Sleepycat Software.  All rights reserved.
 */
/*
 * Copyright (c) 1995, 1996
 *	Margo Seltzer.  All rights reserved.
 */
/*
 * Copyright (c) 1995, 1996
 *	The President and Fellows of Harvard University.  All rights reserved.
 *
 * This code is derived from software contributed to Berkeley by
 * Margo Seltzer.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 *	@(#)hash.src	10.14 (Sleepycat) 10/1/99
 */

/*
 * This is the source file used to create the logging functions for the
 * hash package.  Each access method (or set of routines wishing to register
 * record types with the transaction system) should have a file like this.
 * Each type of log record and its parameters is defined.  The basic
 * format of a record definition is:
 *
 * BEGIN	<RECORD_TYPE>
 * ARG|STRING|POINTER	<variable name>	<variable type> <printf format>
 * ...
 * END
 * ARG the argument is a simple parameter of the type *	specified.
 * DBT the argument is a DBT (db.h) containing a length and pointer.
 * PTR the argument is a pointer to the data type specified; the entire
 *     type should be logged.
 *
 * There are a set of shell scripts of the form xxx.sh that generate c
 * code and or h files to process these.  (This is probably better done
 * in a single PERL script, but for now, this works.)
 *
 * The DB recovery system requires the following three fields appear in
 * every record, and will assign them to the per-record-type structures
 * as well as making them the first parameters to the appropriate logging
 * call.
 * rectype:	record-type, identifies the structure and log/read call
 * txnid:	transaction id, a DBT in this implementation
 * prev:	the last LSN for this transaction
 */

/*
 * Use the argument of PREFIX as the prefix for all record types,
 * routines, id numbers, etc.
 */
PREFIX	ham

INCLUDE	#include "db_config.h"
INCLUDE
INCLUDE #ifndef NO_SYSTEM_INCLUDES
INCLUDE #include <sys/types.h>
INCLUDE
INCLUDE #include <ctype.h>
INCLUDE #include <string.h>
INCLUDE #endif
INCLUDE
INCLUDE #include "db_int.h"
INCLUDE #include "db_page.h"
INCLUDE #include "db_dispatch.h"
INCLUDE #include "db_am.h"
INCLUDE #include "hash.h"
INCLUDE #include "txn.h"
INCLUDE

/*
 * HASH-insdel: used for hash to insert/delete a pair of entries onto a master
 * page. The pair might be regular key/data pairs or they might be the
 * structures that refer to off page items, duplicates or offpage duplicates.
 *  opcode - PUTPAIR/DELPAIR + big masks
 *  fileid - identifies the file referenced
 *  pgno - page within file
 *  ndx - index on the page of the item being added (item index)
 *  pagelsn - lsn on the page before the update
 *  key - the key being inserted
 *  data - the data being inserted
 */
BEGIN insdel
ARG	opcode		u_int32_t	lu
ARG	fileid		int32_t		lu
ARG	pgno		db_pgno_t	lu
ARG	ndx		u_int32_t	lu
POINTER	pagelsn		DB_LSN *	lu
DBT	key		DBT		s
DBT	data		DBT		s
END

/*
 * Used to add and remove overflow pages.
 * prev_pgno is the previous page that is going to get modified to
 *	point to this one.  If this is the first page in a chain
 *	then prev_pgno should be PGNO_INVALID.
 * new_pgno is the page being allocated.
 * next_pgno is the page that follows this one.  On allocation,
 *	this should be PGNO_INVALID.  For deletes, it may exist.
 * pagelsn is the old lsn on the page.
 */
BEGIN newpage
ARG	opcode		u_int32_t	lu
ARG	fileid		int32_t		lu
ARG	prev_pgno	db_pgno_t	lu
POINTER	prevlsn		DB_LSN *	lu
ARG	new_pgno	db_pgno_t	lu
POINTER	pagelsn		DB_LSN *	lu
ARG	next_pgno	db_pgno_t	lu
POINTER	nextlsn		DB_LSN *	lu
END

/*
 * THIS IS A DEPRECATED LOG MESSAGE.  IT IS BEING SUPERCEDED
 * BY metagroup WHICH ALLOCATES A GROUP OF NEW PAGES.
 *
 * Splitting requires two types of log messages.  The first
 * logs the meta-data of the split.  The second logs the
 * data on the original page.  To redo the split, we have
 * to visit the new page (pages) and add the items back
 * on the page if they are not yet there.
 * For the meta-data split
 * 	bucket: max_bucket in table before split
 *	ovflpoint: overflow point before split.
 *	spares: spares[ovflpoint] before split.
 */
BEGIN splitmeta
ARG	fileid		int32_t		lu
ARG	bucket		u_int32_t	lu
ARG	ovflpoint	u_int32_t	lu
ARG	spares		u_int32_t	lu
POINTER	metalsn		DB_LSN *	lu
END

BEGIN splitdata
ARG	fileid		int32_t		lu
ARG	opcode		u_int32_t	lu
ARG	pgno		db_pgno_t	lu
DBT	pageimage	DBT		s
POINTER	pagelsn		DB_LSN *	lu
END

/*
 * HASH-replace: is used for hash to handle partial puts that only
 * affect a single master page.
 *  fileid - identifies the file referenced
 *  pgno - page within file
 *  ndx - index on the page of the item being modified (item index)
 *  pagelsn - lsn on the page before the update
 *  off - offset in the old item where the new item is going.
 *  olditem - DBT that describes the part of the item being replaced.
 *  newitem - DBT of the new item.
 *  makedup - this was a replacement that made an item a duplicate.
 */
BEGIN replace
ARG	fileid		int32_t		lu
ARG	pgno		db_pgno_t	lu
ARG	ndx		u_int32_t	lu
POINTER	pagelsn		DB_LSN *	lu
ARG	off		int32_t		ld
DBT	olditem		DBT		s
DBT	newitem		DBT		s
ARG	makedup		u_int32_t	lu
END

/*
 * DEPRECATED IN 3.0
 * Hash now uses the btree allocation and deletion page routines.
 *
 * HASH-newpgno: is used to record getting/deleting a new page number.
 * This doesn't require much data modification, just modifying the
 * meta-data.
 * pgno is the page being allocated/freed.
 * free_pgno is the next_pgno on the free list.
 * old_type was the type of a page being deallocated.
 * old_pgno was the next page number before the deallocation.
 */
BEGIN newpgno
ARG	opcode		u_int32_t	lu
ARG	fileid		int32_t		lu
ARG	pgno		db_pgno_t	lu
ARG	free_pgno	db_pgno_t	lu
ARG	old_type	u_int32_t	lu
ARG	old_pgno	db_pgno_t	lu
ARG	new_type	u_int32_t	lu
POINTER	pagelsn		DB_LSN *	lu
POINTER metalsn		DB_LSN *	lu
END

/*
 * DEPRECATED in 3.0
 * Since we now pre-allocate the contiguous chunk of pages for a doubling,
 * there is no big benefit to pre-allocating a few extra pages.  It used
 * to be that the file was only physically as large as the current bucket,
 * so if you were on a doubling of 16K, but were only on the first bucket
 * of that 16K, the file was much shorter than it would be at the end of
 * the doubling, so we didn't want to force overflow pages at the end of the
 * 16K pages.  Since we now must allocate the 16K pages (because of sub
 * databases), it's not a big deal to tack extra pages on at the end.
 *
 * ovfl: initialize a set of overflow pages.
 */
BEGIN ovfl
ARG	fileid		int32_t		lu
ARG	start_pgno	db_pgno_t	lu
ARG	npages		u_int32_t	lu
ARG	free_pgno	db_pgno_t	lu
ARG	ovflpoint	u_int32_t	lu
POINTER	metalsn		DB_LSN *	lu
END

/*
 * Used when we empty the first page in a bucket and there are pages after
 * it.  The page after it gets copied into the bucket page (since bucket
 * pages have to be in fixed locations).
 * pgno: the bucket page
 * pagelsn: the old LSN on the bucket page
 * next_pgno: the page number of the next page
 * nnext_pgno: page after next_pgno (may need to change its prev)
 * nnextlsn: the LSN of nnext_pgno.
 */
BEGIN copypage
ARG	fileid		int32_t		lu
ARG	pgno		db_pgno_t	lu
POINTER	pagelsn		DB_LSN *	lu
ARG	next_pgno	db_pgno_t	lu
POINTER	nextlsn		DB_LSN *	lu
ARG	nnext_pgno	db_pgno_t	lu
POINTER	nnextlsn	DB_LSN *	lu
DBT	page		DBT		s
END

/*
 * This replaces the old splitmeta operation.  It behaves largely the same
 * way, but it has enough information so that we can record a group allocation
 * which we do now because of sub databases.  The number of pages allocated is
 * always bucket + 1 pgno is the page number of the first newly allocated
 * bucket.
 * bucket:	Old maximum bucket number.
 * pgno:	Page allocated to bucket + 1 (first newly allocated page)
 * metalsn:	Lsn of the meta-data page.
 * pagelsn:	Lsn of the maximum page allocated.
 */
BEGIN metagroup
ARG	fileid		int32_t		lu
ARG	bucket		u_int32_t	lu
ARG	pgno		db_pgno_t	lu
POINTER	metalsn		DB_LSN *	lu
POINTER	pagelsn		DB_LSN *	lu
END

/*
 * groupalloc
 *
 * This is used in conjunction with MPOOL_NEW_GROUP when we are creating
 * a new database to make sure that we recreate or reclaim free pages
 * when we allocate a chunk of contiguous ones during database creation.
 *
 * pgno: meta-data page number
 * metalsn: meta-data lsn
 * start_pgno: starting page number
 * num: number of allocated pages
 */
BEGIN groupalloc
ARG	fileid		int32_t		lu
ARG	pgno		db_pgno_t	lu
POINTER	metalsn		DB_LSN *	lu
POINTER	mmetalsn	DB_LSN *	lu
ARG	start_pgno	db_pgno_t	lu
ARG	num		u_int32_t	lu
END