/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996, 1997, 1998, 1999 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint static const char sccsid[] = "@(#)env_region.c 11.7 (Sleepycat) 11/12/99"; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES #include #include #include #include #ifndef _MSC_VER /* _WIN32 */ #include #endif #endif /* !NO_SYSTEM_INCLUDES */ #include "db_int.h" static int CDB___db_des_destroy __P((DB_ENV *, REGION *)); static int CDB___db_des_get __P((DB_ENV *, REGINFO *, REGINFO *, REGION **)); static int CDB___db_e_remfile __P((DB_ENV *)); static int CDB___db_faultmem __P((void *, size_t, int)); /* * CDB___db_e_attach * Join/create the environment * * PUBLIC: int CDB___db_e_attach __P((DB_ENV *)); */ int CDB___db_e_attach(dbenv) DB_ENV *dbenv; { REGENV *renv; REGENV_REF ref; REGINFO *infop; REGION *rp, tregion; size_t size; ssize_t nrw; u_int32_t mbytes, bytes; int retry_cnt, ret, segid; char buf[sizeof(DB_REGION_FMT) + 20]; #if !defined(HAVE_MUTEX_THREADS) /* * !!! * If we don't have spinlocks, we need a file descriptor for fcntl(2) * locking. We use the file handle from the REGENV file for this * purpose. * * Since we may be using shared memory regions, e.g., shmget(2), and * not a mapped-in regular file, the backing file may be only a few * bytes in length. So, this depends on the ability to call fcntl to * lock file offsets much larger than the actual physical file. I * think that's safe -- besides, very few systems actually need this * kind of support, SunOS is the only one still in wide use of which * I'm aware. * * The error case is if an application lacks spinlocks and wants to be * threaded. That doesn't work because fcntl may lock the underlying * process, including all its threads. */ if (F_ISSET(dbenv, DB_ENV_THREAD)) { CDB___db_err(dbenv, "architecture lacks fast mutexes: applications cannot be threaded"); return (EINVAL); } #endif /* Initialization */ retry_cnt = 0; /* Repeated initialization. */ loop: renv = NULL; /* Set up the DB_ENV's REG_INFO structure. */ if ((ret = CDB___os_calloc(1, sizeof(REGINFO), &infop)) != 0) return (ret); infop->id = REG_ID_ENV; infop->mode = dbenv->db_mode; if (F_ISSET(dbenv, DB_ENV_CREATE)) F_SET(infop, REGION_CREATE_OK); /* * We have to single-thread the creation of the REGENV region. Once * it exists, we can do locking using locks in the region, but until * then we have to be the only player in the game. * * If this is a private environment, we are only called once and there * are no possible race conditions. * * If this is a public environment, we use the filesystem to ensure * the creation of the environment file is single-threaded. */ if (F_ISSET(dbenv, DB_ENV_PRIVATE)) goto creation; /* Build the region name. */ (void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV); if ((ret = CDB___db_appname(dbenv, DB_APP_NONE, NULL, buf, 0, NULL, &infop->name)) != 0) goto err; /* * Try to create the file, if we have the authority. We have to ensure * that multiple threads/processes attempting to simultaneously create * the file are properly ordered. Open using the O_CREAT and O_EXCL * flags so that multiple attempts to create the region will return * failure in all but one. POSIX 1003.1 requires that EEXIST be the * errno return value -- I sure hope they're right. */ if (F_ISSET(dbenv, DB_ENV_CREATE)) { if ((ret = CDB___os_open(infop->name, DB_OSO_CREATE | DB_OSO_EXCL, dbenv->db_mode, dbenv->lockfhp)) == 0) goto creation; if (ret != EEXIST) { CDB___db_err(dbenv, "%s: %s", infop->name, CDB_db_strerror(ret)); goto err; } } /* * If we couldn't create the file, try and open it. (If that fails, * we're done.) */ if ((ret = CDB___os_open(infop->name, 0, dbenv->db_mode, dbenv->lockfhp)) != 0) goto err; /* * !!! * The region may be in system memory not backed by the filesystem * (more specifically, not backed by this file), and we're joining * it. In that case, the process that created it will have written * out a REGENV_REF structure as its only contents. We read that * structure before we do anything further, e.g., we can't just map * that file in and then figure out what's going on. * * All of this noise is because some systems don't have a coherent VM * and buffer cache, and what's worse, when you mix operations on the * VM and buffer cache, half the time you hang the system. * * If the file is the size of an REGENV_REF structure, then we know * the real region is in some other memory. (The only way you get a * file that size is to deliberately write it, as it's smaller than * any possible disk sector created by writing a file or mapping the * file into memory.) In which case, retrieve the structure from the * file and use it to acquire the referenced memory. * * If the structure is larger than a REGENV_REF structure, then this * file is backing the shared memory region, and we just map it into * memory. * * And yes, this makes me want to take somebody and kill them. (I * digress -- but you have no freakin' idea. This is unbelievably * stupid and gross, and I've probably spent six months of my life, * now, trying to make different versions of it work.) */ if ((ret = CDB___os_ioinfo(infop->name, dbenv->lockfhp, &mbytes, &bytes, NULL)) != 0) { CDB___db_err(dbenv, "%s: %s", infop->name, CDB_db_strerror(ret)); goto err; } /* * !!! * A size_t is OK -- regions get mapped into memory, and so can't * be larger than a size_t. */ size = mbytes * MEGABYTE + bytes; /* * If the size is 0 or less than the size of a REGENV_REF structure, * the region (or, possibly, the REGENV_REF structure) has not been * fully written. Wait awhile and try again. * * Otherwise, if the size is the size of a REGENV_REF structure, * read it into memory and use it as a reference to the real region. */ segid = INVALID_REGION_SEGID; if (size <= sizeof(ref)) { if (size != sizeof(ref)) goto retry; if ((ret = CDB___os_read(dbenv->lockfhp, &ref, sizeof(ref), &nrw)) != 0 || nrw < (ssize_t)sizeof(ref)) { if (ret == 0) ret = EIO; CDB___db_err(dbenv, "%s: unable to read system-memory information from: %s", infop->name, CDB_db_strerror(ret)); goto err; } size = ref.size; segid = ref.segid; F_SET(dbenv, DB_ENV_SYSTEM_MEM); } /* * If not doing thread locking, we need to save the file handle for * fcntl(2) locking. Otherwise, discard the handle, we no longer * need it, and the less contact between the buffer cache and the VM, * the better. */ #ifdef HAVE_MUTEX_THREADS CDB___os_closehandle(dbenv->lockfhp); #endif /* Call the region join routine to acquire the region. */ memset(&tregion, 0, sizeof(tregion)); tregion.size = size; tregion.segid = segid; if ((ret = CDB___os_r_attach(dbenv, infop, &tregion)) != 0) goto err; /* * The environment's REGENV structure has to live at offset 0 instead * of the usual shalloc information. Set the primary reference and * correct the "addr" value to reference the shalloc region. Note, * this means that all of our offsets (R_ADDR/R_OFFSET) get shifted * as well, but that should be fine. */ infop->primary = R_ADDR(infop, 0); infop->addr = (u_int8_t *)infop->addr + sizeof(REGENV); /* * Check if the environment has had a catastrophic failure. * * Check the magic number to ensure the region is initialized. If the * magic number isn't set, the lock may not have been initialized, and * an attempt to use it could lead to random behavior. * * The panic and magic values aren't protected by any lock, so we never * use them in any check that's more complex than set/not-set. * * !!! * I'd rather play permissions games using the underlying file, but I * can't because Windows/NT filesystems won't open files mode 0. */ renv = infop->primary; if (renv->panic) { ret = CDB___db_panic_msg(dbenv); goto err; } if (renv->magic != DB_REGION_MAGIC) goto retry; /* Lock the environment. */ MUTEX_LOCK(&renv->mutex, dbenv->lockfhp); /* * Finally! We own the environment now. Repeat the panic check, it's * possible that it was set while we waited for the lock. */ if (renv->panic) { ret = CDB___db_panic_msg(dbenv); goto err_unlock; } /* * Get a reference to the underlying REGION information for this * environment. */ if ((ret = CDB___db_des_get(dbenv, infop, infop, &rp)) != 0) goto err_unlock; if (rp == NULL) { CDB___db_err(dbenv, "%s: unable to find environment REGION", infop->name); ret = EINVAL; goto err_unlock; } infop->rp = rp; /* * There's still a possibility for inconsistent data. When we acquired * the size of the region and attached to it, it might have still been * growing as part of its creation. We can detect this by checking the * size we originally found against the region's current size. (The * region's current size has to be final, the creator finished growing * it before releasing the environment for us to lock.) */ if (rp->size != size) { err_unlock: MUTEX_UNLOCK(&renv->mutex); goto retry; } /* Increment the reference count. */ ++renv->refcnt; /* Discard our lock. */ MUTEX_UNLOCK(&renv->mutex); /* * Fault the pages into memory. Note, do this AFTER releasing the * lock, because we're only reading the pages, not writing them. */ (void)CDB___db_faultmem(infop->primary, rp->size, 0); /* Everything looks good, we're done. */ dbenv->reginfo = infop; return (0); creation: /* Create the environment region. */ F_SET(infop, REGION_CREATE); /* * Allocate room for 50 REGION structures plus overhead (we're going * to use this space for last-ditch allocation requests), although we * should never need anything close to that. */ memset(&tregion, 0, sizeof(tregion)); tregion.size = 50 * sizeof(REGION) + 50 * sizeof(MUTEX) + 2048; tregion.segid = INVALID_REGION_SEGID; if ((ret = CDB___os_r_attach(dbenv, infop, &tregion)) != 0) goto err; /* * Fault the pages into memory. Note, do this BEFORE we initialize * anything, because we're writing the pages, not just reading them. */ (void)CDB___db_faultmem(infop->addr, tregion.size, 1); /* * The first object in the region is the REGENV structure. This is * different from the other regions, and, from everything else in * this region, where all objects are allocated from the pool, i.e., * there aren't any fixed locations. The remaining space is made * available for later allocation. * * The allocation space must be size_t aligned, because that's what * the initialization routine is going to store there. To make sure * that happens, the REGENV structure was padded with a final size_t. * No other region needs to worry about it because all of them treat * the entire region as allocation space. * * Set the primary reference and correct the "addr" value to reference * the shalloc region. Note, this requires that we "uncorrect" it at * region detach, and that all of our offsets (R_ADDR/R_OFFSET) will be * shifted as well, but that should be fine. */ infop->primary = R_ADDR(infop, 0); infop->addr = (u_int8_t *)infop->addr + sizeof(REGENV); CDB___db_shalloc_init(infop->addr, tregion.size - sizeof(REGENV)); /* * Initialize the rest of the REGENV structure, except for the magic * number which validates the file/environment. */ renv = infop->primary; renv->panic = 0; CDB_db_version(&renv->majver, &renv->minver, &renv->patch); SH_LIST_INIT(&renv->regionq); renv->refcnt = 1; /* * Lock the environment. * * Check the lock call return. This is the first lock we initialize * and acquire, and we have to know if it fails. (It CAN fail, e.g., * SunOS, when using fcntl(2) for locking and using an in-memory * filesystem as the database home. But you knew that, I'm sure -- it * probably wasn't even worth mentioning.) */ if ((ret = __db_mutex_init(dbenv, &renv->mutex, DB_FCNTL_OFF_GEN, 0)) != 0) { CDB___db_err(dbenv, "%s: unable to initialize environment lock: %s", infop->name, CDB_db_strerror(ret)); goto err; } if (!F_ISSET(&renv->mutex, MUTEX_IGNORE) && (ret = __db_mutex_lock(&renv->mutex, dbenv->lockfhp)) != 0) { CDB___db_err(dbenv, "%s: unable to acquire environment lock: %s", infop->name, CDB_db_strerror(ret)); goto err; } /* * Get the underlying REGION structure for this environment. Note, * we created the underlying OS region before we acquired the REGION * structure, which is backwards from the normal procedure. Update * the REGION structure. */ if ((ret = CDB___db_des_get(dbenv, infop, infop, &rp)) != 0) goto err; infop->rp = rp; rp->size = tregion.size; rp->segid = tregion.segid; /* * !!! * If we create an environment where regions are public and in system * memory, we have to inform processes joining the environment how to * attach to the shared memory segment. So, we write the shared memory * identifier into the file, to be read by those other processes. * * XXX * This is really OS-layer information, but I can't see any easy way * to move it down there without passing down information that it has * no right to know, e.g., that this is the one-and-only REGENV region * and not some other random region. */ if (tregion.segid != INVALID_REGION_SEGID) { ref.size = tregion.size; ref.segid = tregion.segid; if ((ret = CDB___os_write(dbenv->lockfhp, &ref, sizeof(ref), &nrw)) != 0 || nrw != sizeof(ref)) { CDB___db_err(dbenv, "%s: unable to write out public environment ID: %s", infop->name, CDB_db_strerror(ret)); goto err; } } /* * If not doing thread locking, we need to save the file handle for * fcntl(2) locking. Otherwise, discard the handle, we no longer * need it, and the less contact between the buffer cache and the VM, * the better. */ #if defined(HAVE_MUTEX_THREADS) if (F_ISSET(dbenv->lockfhp, DB_FH_VALID)) CDB___os_closehandle(dbenv->lockfhp); #endif /* Validate the file. */ renv->magic = DB_REGION_MAGIC; /* Discard our lock. */ MUTEX_UNLOCK(&renv->mutex); /* Everything looks good, we're done. */ dbenv->reginfo = infop; return (0); err: retry: /* Close any open file handle. */ if (F_ISSET(dbenv->lockfhp, DB_FH_VALID)) (void)CDB___os_closehandle(dbenv->lockfhp); /* * If we joined or created the region, detach from it. If we created * it, destroy it. Note, there's a path in the above code where we're * using a temporary REGION structure because we haven't yet allocated * the real one. In that case the region address (addr) will be filled * in, but the REGION pointer (rp) won't. Fix it. */ if (infop->addr != NULL) { if (infop->rp == NULL) infop->rp = &tregion; /* Reset the addr value that we "corrected" above. */ infop->addr = infop->primary; (void)CDB___os_r_detach(dbenv, infop, F_ISSET(infop, REGION_CREATE)); } /* Free the allocated name and/or REGINFO structure. */ if (infop->name != NULL) CDB___os_freestr(infop->name); CDB___os_free(infop, sizeof(REGINFO)); /* If we had a temporary error, wait awhile and try again. */ if (ret == 0) { if (++retry_cnt > 3) { CDB___db_err(dbenv, "unable to join the environment"); ret = EAGAIN; } else { CDB___os_sleep(retry_cnt * 3, 0); goto loop; } } return (ret); } /* * CDB___db_e_detach -- * Detach from the environment. * * PUBLIC: int CDB___db_e_detach __P((DB_ENV *, int)); */ int CDB___db_e_detach(dbenv, destroy) DB_ENV *dbenv; int destroy; { REGENV *renv; REGINFO *infop; infop = dbenv->reginfo; renv = infop->primary; /* Lock the environment. */ MUTEX_LOCK(&renv->mutex, dbenv->lockfhp); /* Decrement the reference count. */ if (renv->refcnt == 0) { CDB___db_err(dbenv, "region %lu (environment): reference count went negative", infop->rp->id); } else --renv->refcnt; /* Release the lock. */ MUTEX_UNLOCK(&renv->mutex); /* Close the locking file handle. */ if (F_ISSET(dbenv->lockfhp, DB_FH_VALID)) (void)CDB___os_closehandle(dbenv->lockfhp); /* Reset the addr value that we "corrected" above. */ infop->addr = infop->primary; /* * Release the region, and kill our reference. * * We set the DBENV->reginfo field to NULL here and discard its memory. * DBENV->remove calls CDB___dbenv_remove to do the region remove, and * CDB___dbenv_remove attached and then detaches from the region. We don't * want to return to DBENV->remove with a non-NULL DBENV->reginfo field * because it will attempt to detach again as part of its cleanup. */ (void)CDB___os_r_detach(dbenv, infop, destroy); if (infop->name != NULL) CDB___os_free(infop->name, 0); CDB___os_free(dbenv->reginfo, sizeof(REGINFO)); dbenv->reginfo = NULL; return (0); } /* * CDB___db_e_remove -- * Discard an environment if it's not in use. * * PUBLIC: int CDB___db_e_remove __P((DB_ENV *, int)); */ int CDB___db_e_remove(dbenv, force) DB_ENV *dbenv; int force; { REGENV *renv; REGINFO *infop, reginfo; REGION *rp; int ret, saved_value; /* * This routine has to walk a nasty line between not looking into * the environment (which may be corrupted after an app or system * crash), and removing everything that needs removing. What we * do is: * 1. Connect to the environment (so it better be OK). * 2. If the environment is in use (reference count is non-zero), * return EBUSY. * 3. Overwrite the magic number so that any threads of control * attempting to connect will backoff and retry. * 4. Walk the list of regions. Connect to each region and then * disconnect with the destroy flag set. This shouldn't cause * any problems, even if the region is corrupted, because we * should never be looking inside the region. * 5. Walk the list of files in the directory, unlinking any * files that match a region name. Unlink the environment * file last. * * If the force flag is set, we do not acquire any locks during this * process. */ saved_value = DB_GLOBAL(db_mutexlocks); if (force) DB_GLOBAL(db_mutexlocks) = 0; /* Join the environment. */ if ((ret = CDB___db_e_attach(dbenv)) != 0) { /* * If we can't join it, we assume that's because it doesn't * exist. It would be better to know why we failed, but it * probably isn't important. */ ret = 0; if (force) goto remfiles; goto err; } infop = dbenv->reginfo; renv = infop->primary; /* Lock the environment. */ MUTEX_LOCK(&renv->mutex, dbenv->lockfhp); /* If it's in use, we're done. */ if (renv->refcnt == 1 || force) { /* * Set the panic flag and overwrite the magic number. * * !!! * From this point on, there's no going back, we pretty * much ignore errors, and just whack on whatever we can. */ renv->panic = 1; renv->magic = 0; /* * Unlock the environment. We should no longer need the lock * because we've poisoned the pool, but we can't continue to * hold it either, because other routines may want it. */ MUTEX_UNLOCK(&renv->mutex); /* * Attach to each sub-region and destroy it. * * !!! * The REGION_CREATE_OK flag is set for Windows/95 -- regions * are zero'd out when the last reference to the region goes * away, in which case the underlying OS region code requires * callers be prepared to create the region in order to join it. */ memset(®info, 0, sizeof(reginfo)); restart: for (rp = SH_LIST_FIRST(&renv->regionq, __db_region); rp != NULL; rp = SH_LIST_NEXT(rp, q, __db_region)) { if (rp->id == REG_ID_ENV) continue; reginfo.id = rp->id; reginfo.flags = REGION_CREATE_OK; if (CDB___db_r_attach(dbenv, ®info, 0) == 0) { R_UNLOCK(dbenv, ®info); (void)CDB___db_r_detach(dbenv, ®info, 1); } goto restart; } /* Destroy the environment's region. */ (void)CDB___db_e_detach(dbenv, 1); /* Discard the physical files. */ remfiles: (void)CDB___db_e_remfile(dbenv); } else { /* Unlock the environment. */ MUTEX_UNLOCK(&renv->mutex); /* Discard the environment. */ (void)CDB___db_e_detach(dbenv, 0); ret = EBUSY; } err: if (force) DB_GLOBAL(db_mutexlocks) = saved_value; return (ret); } /* * CDB___db_e_remfile -- * Discard any region files in the filesystem. */ static int CDB___db_e_remfile(dbenv) DB_ENV *dbenv; { static char *old_region_names[] = { "__db_lock.share", "__db_log.share", "__db_mpool.share", "__db_txn.share", NULL, }; int cnt, fcnt, lastrm, ret; u_int8_t saved_byte; const char *dir; char *p, **names, *path, buf[sizeof(DB_REGION_FMT) + 20]; /* Get the full path of a file in the environment. */ (void)snprintf(buf, sizeof(buf), "%s", DB_REGION_ENV); if ((ret = CDB___db_appname(dbenv, DB_APP_NONE, NULL, buf, 0, NULL, &path)) != 0) return (ret); /* Get the parent directory for the environment. */ if ((p = CDB___db_rpath(path)) == NULL) { p = path; saved_byte = *p; dir = PATH_DOT; } else { saved_byte = *p; *p = '\0'; dir = path; } /* Get the list of file names. */ ret = CDB___os_dirlist(dir, &names, &fcnt); /* Restore the path, and free it. */ *p = saved_byte; CDB___os_freestr(path); if (ret != 0) { CDB___db_err(dbenv, "%s: %s", dir, CDB_db_strerror(ret)); return (ret); } /* * Search for valid region names, and remove them. We remove the * environment region last, because it's the key to this whole mess. */ for (lastrm = -1, cnt = fcnt; --cnt >= 0;) { if (strlen(names[cnt]) != DB_REGION_NAME_LENGTH || memcmp(names[cnt], DB_REGION_FMT, DB_REGION_NAME_NUM) != 0) continue; if (strcmp(names[cnt], DB_REGION_ENV) == 0) { lastrm = cnt; continue; } for (p = names[cnt] + DB_REGION_NAME_NUM; *p != '\0' && isdigit((int)*p); ++p) ; if (*p != '\0') continue; if (CDB___db_appname(dbenv, DB_APP_NONE, NULL, names[cnt], 0, NULL, &path) == 0) { (void)CDB___os_unlink(path); CDB___os_freestr(path); } } if (lastrm != -1) if (CDB___db_appname(dbenv, DB_APP_NONE, NULL, names[lastrm], 0, NULL, &path) == 0) { (void)CDB___os_unlink(path); CDB___os_freestr(path); } CDB___os_dirfree(names, fcnt); /* * !!! * Backward compatibility -- remove region files from releases * before 2.8.XX. */ for (names = (char **)old_region_names; *names != NULL; ++names) if (CDB___db_appname(dbenv, DB_APP_NONE, NULL, *names, 0, NULL, &path) == 0) { (void)CDB___os_unlink(path); CDB___os_freestr(path); } return (0); } /* * CDB___db_e_stat * Statistics for the environment. * * PUBLIC: int CDB___db_e_stat __P((DB_ENV *, REGENV *, REGION *, int *)); */ int CDB___db_e_stat(dbenv, arg_renv, arg_regions, arg_regions_cnt) DB_ENV *dbenv; REGENV *arg_renv; REGION *arg_regions; int *arg_regions_cnt; { REGENV *renv; REGINFO *infop; REGION *rp; int n; infop = dbenv->reginfo; renv = infop->primary; rp = infop->rp; /* Lock the environment. */ MUTEX_LOCK(&rp->mutex, dbenv->lockfhp); *arg_renv = *renv; for (n = 0, rp = SH_LIST_FIRST(&renv->regionq, __db_region); n < *arg_regions_cnt && rp != NULL; ++n, rp = SH_LIST_NEXT(rp, q, __db_region)) arg_regions[n] = *rp; /* Release the lock. */ rp = infop->rp; MUTEX_UNLOCK(&rp->mutex); *arg_regions_cnt = n == 0 ? n : n - 1; return (0); } /* * CDB___db_r_attach * Join/create a region. * * PUBLIC: int CDB___db_r_attach __P((DB_ENV *, REGINFO *, size_t)); */ int CDB___db_r_attach(dbenv, infop, size) DB_ENV *dbenv; REGINFO *infop; size_t size; { REGENV *renv; REGION *rp; int ret; char buf[sizeof(DB_REGION_FMT) + 20]; renv = ((REGINFO *)dbenv->reginfo)->primary; F_CLR(infop, REGION_CREATE); /* Lock the environment. */ MUTEX_LOCK(&renv->mutex, dbenv->lockfhp); /* Find or create a REGION structure for this region. */ if ((ret = CDB___db_des_get(dbenv, dbenv->reginfo, infop, &rp)) != 0) { MUTEX_UNLOCK(&renv->mutex); return (ret); } infop->rp = rp; infop->id = rp->id; /* If we're creating the region, set the desired size. */ if (F_ISSET(infop, REGION_CREATE)) rp->size = size; /* Join/create the underlying region. */ (void)snprintf(buf, sizeof(buf), DB_REGION_FMT, infop->id); if ((ret = CDB___db_appname(dbenv, DB_APP_NONE, NULL, buf, 0, NULL, &infop->name)) != 0) goto err; if ((ret = CDB___os_r_attach(dbenv, infop, rp)) != 0) goto err; /* * Fault the pages into memory. Note, do this BEFORE we initialize * anything because we're writing pages in created regions, not just * reading them. */ (void)CDB___db_faultmem(infop->addr, rp->size, F_ISSET(infop, REGION_CREATE)); /* * !!! * The underlying layer may have just decided that we are going * to create the region. There are various system issues that * can result in a useless region that requires re-initialization. * * If we created the region, initialize it for allocation. */ if (F_ISSET(infop, REGION_CREATE)) { ((REGION *)(infop->addr))->magic = DB_REGION_MAGIC; (void)CDB___db_shalloc_init(infop->addr, rp->size); } /* * If the underlying REGION isn't the environment, acquire a lock * for it and release our lock on the environment. */ if (infop->id != REG_ID_ENV) { MUTEX_LOCK(&rp->mutex, dbenv->lockfhp); MUTEX_UNLOCK(&renv->mutex); } return (0); /* Discard the underlying region. */ err: if (infop->addr != NULL) (void)CDB___os_r_detach(dbenv, infop, F_ISSET(infop, REGION_CREATE)); infop->rp = NULL; infop->id = REG_ID_INVALID; /* Discard the REGION structure if we created it. */ if (F_ISSET(infop, REGION_CREATE)) (void)CDB___db_des_destroy(dbenv, rp); /* Release the environment lock. */ MUTEX_UNLOCK(&renv->mutex); return (ret); } /* * CDB___db_r_detach -- * Detach from a region. * * PUBLIC: int CDB___db_r_detach __P((DB_ENV *, REGINFO *, int)); */ int CDB___db_r_detach(dbenv, infop, destroy) DB_ENV *dbenv; REGINFO *infop; int destroy; { REGENV *renv; REGION *rp; int ret, t_ret; renv = ((REGINFO *)dbenv->reginfo)->primary; rp = infop->rp; /* Lock the environment. */ MUTEX_LOCK(&renv->mutex, dbenv->lockfhp); /* Acquire the lock for the REGION. */ MUTEX_LOCK(&rp->mutex, dbenv->lockfhp); /* Detach from the underlying OS region. */ ret = CDB___os_r_detach(dbenv, infop, destroy); /* Release the REGION lock. */ MUTEX_UNLOCK(&rp->mutex); /* If we destroyed the region, discard the REGION structure. */ if (destroy && ((t_ret = CDB___db_des_destroy(dbenv, rp)) != 0) && ret == 0) ret = t_ret; /* Release the environment lock. */ MUTEX_UNLOCK(&renv->mutex); /* Destroy the structure. */ if (infop->name != NULL) CDB___os_freestr(infop->name); return (ret); } /* * CDB___db_des_get -- * Return a reference to the shared information for a REGION, * optionally creating a new entry. */ static int CDB___db_des_get(dbenv, env_infop, infop, rpp) DB_ENV *dbenv; REGINFO *env_infop, *infop; REGION **rpp; { REGENV *renv; REGION *rp; int maxid, ret; /* * !!! * Called with the environment already locked. */ *rpp = NULL; renv = env_infop->primary; maxid = REG_ID_ASSIGN; for (rp = SH_LIST_FIRST(&renv->regionq, __db_region); rp != NULL; rp = SH_LIST_NEXT(rp, q, __db_region)) { if (rp->id == infop->id) break; if (rp->id > maxid) maxid = rp->id; } /* * If we didn't find a region, or we found one needing initialization, * and we can't create the region, fail. */ if (!F_ISSET(infop, REGION_CREATE_OK) && (rp == NULL || F_ISSET(rp, REG_DEAD))) return (ENOENT); /* * If we didn't find a region, create and initialize a REGION structure * for the caller. If id was set, use that value, otherwise we use the * next available ID. */ if (rp == NULL) { if ((ret = CDB___db_shalloc(env_infop->addr, sizeof(REGION), MUTEX_ALIGN, &rp)) != 0) return (ret); /* Initialize the region. */ memset(rp, 0, sizeof(*rp)); if ((ret = __db_mutex_init(dbenv, &rp->mutex, R_OFFSET(env_infop, &rp->mutex) + DB_FCNTL_OFF_GEN, 0)) != 0) { CDB___db_shalloc_free(env_infop->addr, rp); return (ret); } rp->segid = INVALID_REGION_SEGID; rp->id = infop->id == REG_ID_INVALID ? maxid + 1 : infop->id; SH_LIST_INSERT_HEAD(&renv->regionq, rp, q, __db_region); F_SET(infop, REGION_CREATE); } else { /* * There is one race -- a caller created a region, was trying * to initialize it for general use, and failed somehow. We * leave the region around and tell each new caller that they * are creating it, because that's easier than dealing with * the races involved in removing it. */ if (F_ISSET(rp, REG_DEAD)) { rp->primary = INVALID_ROFF; F_CLR(rp, REG_DEAD); F_SET(infop, REGION_CREATE); } } *rpp = rp; return (0); } /* * CDB___db_des_destroy -- * Destroy a reference to a REGION. */ static int CDB___db_des_destroy(dbenv, rp) DB_ENV *dbenv; REGION *rp; { REGINFO *infop; /* * !!! * Called with the environment already locked. */ infop = dbenv->reginfo; SH_LIST_REMOVE(rp, q, __db_region); CDB___db_shalloc_free(infop->addr, rp); return (0); } /* * CDB___db_faultmem -- * Fault the region into memory. */ static int CDB___db_faultmem(addr, size, created) void *addr; size_t size; int created; { int ret; u_int8_t *p, *t; /* * It's sometimes significantly faster to page-fault in all of the * region's pages before we run the application, as we see nasty * side-effects when we page-fault while holding various locks, i.e., * the lock takes a long time to acquire because of the underlying * page fault, and the other threads convoy behind the lock holder. * * If we created the region, we write a non-zero value so that the * system can't cheat. If we're just joining the region, we can * only read the value and try to confuse the compiler sufficiently * that it doesn't figure out that we're never really using it. */ ret = 0; if (DB_GLOBAL(db_region_init)) { if (created) for (p = addr, t = (u_int8_t *)addr + size; p < t; p += OS_VMPAGESIZE) p[0] = 0xdb; else for (p = addr, t = (u_int8_t *)addr + size; p < t; p += OS_VMPAGESIZE) ret |= p[0]; } return (ret); }