Resync with mdb.master

pull/8502/head
Howard Chu 2 years ago
parent b6a029f222
commit e22af530ba
No known key found for this signature in database
GPG Key ID: FD2A70B44AB11BA7

@ -1,4 +1,4 @@
Copyright 2011-2019 Howard Chu, Symas Corp.
Copyright 2011-2021 Howard Chu, Symas Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without

@ -1,5 +1,5 @@
/*
* Copyright 2015-2018 Howard Chu, Symas Corp.
* Copyright 2015-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -136,7 +136,7 @@
*
* @author Howard Chu, Symas Corporation.
*
* @copyright Copyright 2011-2019 Howard Chu, Symas Corp. All rights reserved.
* @copyright Copyright 2011-2021 Howard Chu, Symas Corp. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
@ -610,7 +610,7 @@ int mdb_env_create(MDB_env **env);
* <li>#MDB_NOTLS
* Don't use Thread-Local Storage. Tie reader locktable slots to
* #MDB_txn objects instead of to threads. I.e. #mdb_txn_reset() keeps
* the slot reseved for the #MDB_txn object. A thread may use parallel
* the slot reserved for the #MDB_txn object. A thread may use parallel
* read-only transactions. A read-only transaction may span threads if
* the user synchronizes its use. Applications that multiplex many
* user threads over individual OS threads need this option. Such an
@ -968,7 +968,7 @@ void *mdb_env_get_userctx(MDB_env *env);
typedef void MDB_assert_func(MDB_env *env, const char *msg);
/** Set or reset the assert() callback of the environment.
* Disabled if liblmdb is buillt with NDEBUG.
* Disabled if liblmdb is built with NDEBUG.
* @note This hack should become obsolete as lmdb's error handling matures.
* @param[in] env An environment handle returned by #mdb_env_create().
* @param[in] func An #MDB_assert_func function, or 0.

@ -5,7 +5,7 @@
* BerkeleyDB API, but much simplified.
*/
/*
* Copyright 2011-2019 Howard Chu, Symas Corp.
* Copyright 2011-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -96,6 +96,7 @@ static NtCloseFunc *NtClose;
# define SSIZE_MAX INT_MAX
# endif
#endif
#define MDB_OFF_T int64_t
#else
#include <sys/types.h>
#include <sys/stat.h>
@ -108,6 +109,7 @@ static NtCloseFunc *NtClose;
#include <sys/file.h>
#endif
#include <fcntl.h>
#define MDB_OFF_T off_t
#endif
#if defined(__mips) && defined(__linux)
@ -159,7 +161,10 @@ typedef SSIZE_T ssize_t;
#include <resolv.h> /* defines BYTE_ORDER on HPUX and Solaris */
#endif
#if defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__)
#if defined(__FreeBSD__) && defined(__FreeBSD_version) && __FreeBSD_version >= 1100110
# define MDB_USE_POSIX_MUTEX 1
# define MDB_USE_ROBUST 1
#elif defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__)
# if !(defined(MDB_USE_POSIX_MUTEX) || defined(MDB_USE_POSIX_SEM))
# define MDB_USE_SYSV_SEM 1
# endif
@ -585,7 +590,7 @@ static txnid_t mdb_debug_start;
* The string is printed literally, with no format processing.
*/
#define DPUTS(arg) DPRINTF(("%s", arg))
/** Debuging output value of a cursor DBI: Negative in a sub-cursor. */
/** Debugging output value of a cursor DBI: Negative in a sub-cursor. */
#define DDBI(mc) \
(((mc)->mc_flags & C_SUB) ? -(int)(mc)->mc_dbi : (int)(mc)->mc_dbi)
/** @} */
@ -1462,9 +1467,12 @@ struct MDB_env {
HANDLE me_fd; /**< The main data file */
HANDLE me_lfd; /**< The lock file */
HANDLE me_mfd; /**< For writing and syncing the meta pages */
#if defined(MDB_VL32) && defined(_WIN32)
#ifdef _WIN32
#ifdef MDB_VL32
HANDLE me_fmh; /**< File Mapping handle */
#endif
#endif /* MDB_VL32 */
HANDLE me_ovfd; /**< Overlapped/async with write-through file handle */
#endif /* _WIN32 */
/** Failed to update the meta page. Probably an I/O error. */
#define MDB_FATAL_ERROR 0x80000000U
/** using a raw block device */
@ -1492,7 +1500,7 @@ struct MDB_env {
MDB_txn *me_txn; /**< current write transaction */
MDB_txn *me_txn0; /**< prealloc'd write transaction */
mdb_size_t me_mapsize; /**< size of the data memory map */
off_t me_size; /**< current file size */
MDB_OFF_T me_size; /**< current file size */
pgno_t me_maxpg; /**< me_mapsize / me_psize */
MDB_dbx *me_dbxs; /**< array of static DB info */
uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */
@ -1517,6 +1525,8 @@ struct MDB_env {
int me_live_reader; /**< have liveness lock in reader table */
#ifdef _WIN32
int me_pidquery; /**< Used in OpenProcess */
OVERLAPPED *ov; /**< Used for for overlapping I/O requests */
int ovs; /**< Count of OVERLAPPEDs */
#endif
#ifdef MDB_USE_POSIX_MUTEX /* Posix mutexes reside in shared mem */
# define me_rmutex me_txns->mti_rmutex /**< Shared reader lock */
@ -1599,7 +1609,7 @@ static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata,
static int mdb_env_read_header(MDB_env *env, int prev, MDB_meta *meta);
static MDB_meta *mdb_env_pick_meta(const MDB_env *env);
static int mdb_env_write_meta(MDB_txn *txn);
#ifdef MDB_USE_POSIX_MUTEX /* Drop unused excl arg */
#if defined(MDB_USE_POSIX_MUTEX) && !defined(MDB_ROBUST_SUPPORTED) /* Drop unused excl arg */
# define mdb_env_close0(env, excl) mdb_env_close1(env)
#endif
static void mdb_env_close0(MDB_env *env, int excl);
@ -2390,12 +2400,16 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
{
MDB_ID2 mid;
int rc, (*insert)(MDB_ID2L, MDB_ID2 *);
if (txn->mt_flags & MDB_TXN_WRITEMAP) {
#ifdef _WIN32 /* With Windows we always write dirty pages with WriteFile,
* so we always want them ordered */
insert = mdb_mid2l_insert;
#else /* but otherwise with writemaps, we just use msync, we
* don't need the ordering and just append */
if (txn->mt_flags & MDB_TXN_WRITEMAP)
insert = mdb_mid2l_append;
} else {
else
insert = mdb_mid2l_insert;
}
#endif
mid.mid = mp->mp_pgno;
mid.mptr = mp;
rc = insert(txn->mt_u.dirty_list, &mid);
@ -2808,7 +2822,11 @@ mdb_env_sync0(MDB_env *env, int force, pgno_t numpgs)
int rc = 0;
if (env->me_flags & MDB_RDONLY)
return EACCES;
if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) {
if (force
#ifndef _WIN32 /* Sync is normally achieved in Windows by doing WRITE_THROUGH writes */
|| !(env->me_flags & MDB_NOSYNC)
#endif
) {
if (env->me_flags & MDB_WRITEMAP) {
int flags = ((env->me_flags & MDB_MAPASYNC) && !force)
? MS_ASYNC : MS_SYNC;
@ -3354,9 +3372,9 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
txn->mt_parent->mt_flags &= ~MDB_TXN_HAS_CHILD;
env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate;
mdb_midl_free(txn->mt_free_pgs);
mdb_midl_free(txn->mt_spill_pgs);
free(txn->mt_u.dirty_list);
}
mdb_midl_free(txn->mt_spill_pgs);
mdb_midl_free(pghead);
}
@ -3653,21 +3671,30 @@ mdb_page_flush(MDB_txn *txn, int keep)
unsigned psize = env->me_psize, j;
int i, pagecount = dl[0].mid, rc;
size_t size = 0;
off_t pos = 0;
MDB_OFF_T pos = 0;
pgno_t pgno = 0;
MDB_page *dp = NULL;
#ifdef _WIN32
OVERLAPPED ov;
OVERLAPPED *ov = env->ov;
MDB_page *wdp;
int async_i = 0;
HANDLE fd = (env->me_flags & MDB_NOSYNC) ? env->me_fd : env->me_ovfd;
#else
struct iovec iov[MDB_COMMIT_PAGES];
HANDLE fd = env->me_fd;
#endif
ssize_t wsize = 0, wres;
off_t wpos = 0, next_pos = 1; /* impossible pos, so pos != next_pos */
MDB_OFF_T wpos = 0, next_pos = 1; /* impossible pos, so pos != next_pos */
int n = 0;
#endif
j = i = keep;
if (env->me_flags & MDB_WRITEMAP) {
if (env->me_flags & MDB_WRITEMAP
#ifdef _WIN32
/* In windows, we still do writes to the file (with write-through enabled in sync mode),
* as this is faster than FlushViewOfFile/FlushFileBuffers */
&& (env->me_flags & MDB_NOSYNC)
#endif
) {
/* Clear dirty flags */
while (++i <= pagecount) {
dp = dl[i].mptr;
@ -3682,6 +3709,27 @@ mdb_page_flush(MDB_txn *txn, int keep)
goto done;
}
#ifdef _WIN32
if (pagecount - keep >= env->ovs) {
/* ran out of room in ov array, and re-malloc, copy handles and free previous */
int ovs = (pagecount - keep) * 1.5; /* provide extra padding to reduce number of re-allocations */
int new_size = ovs * sizeof(OVERLAPPED);
ov = malloc(new_size);
if (ov == NULL)
return ENOMEM;
int previous_size = env->ovs * sizeof(OVERLAPPED);
memcpy(ov, env->ov, previous_size); /* Copy previous OVERLAPPED data to retain event handles */
/* And clear rest of memory */
memset(&ov[env->ovs], 0, new_size - previous_size);
if (env->ovs > 0) {
free(env->ov); /* release previous allocation */
}
env->ov = ov;
env->ovs = ovs;
}
#endif
/* Write the pages */
for (;;) {
if (++i <= pagecount) {
@ -3699,46 +3747,65 @@ mdb_page_flush(MDB_txn *txn, int keep)
size = psize;
if (IS_OVERFLOW(dp)) size *= dp->mp_pages;
}
/* Write up to MDB_COMMIT_PAGES dirty pages at a time. */
if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE
#ifdef _WIN32
else break;
/* Windows actually supports scatter/gather I/O, but only on
/* If writemap is enabled, consecutive page positions infer
* contiguous (mapped) memory.
* Otherwise force write pages one at a time.
* Windows actually supports scatter/gather I/O, but only on
* unbuffered file handles. Since we're relying on the OS page
* cache for all our data, that's self-defeating. So we just
* write pages one at a time. We use the ov structure to set
* the write offset, to at least save the overhead of a Seek
* system call.
*/
DPRINTF(("committing page %"Yu, pgno));
memset(&ov, 0, sizeof(ov));
ov.Offset = pos & 0xffffffff;
ov.OffsetHigh = pos >> 16 >> 16;
if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) {
|| !(env->me_flags & MDB_WRITEMAP)
#endif
) {
if (n) {
retry_write:
/* Write previous page(s) */
DPRINTF(("committing page %"Z"u", pgno));
#ifdef _WIN32
OVERLAPPED *this_ov = &ov[async_i];
/* Clear status, and keep hEvent, we reuse that */
this_ov->Internal = 0;
this_ov->Offset = wpos & 0xffffffff;
this_ov->OffsetHigh = wpos >> 16 >> 16;
if (!F_ISSET(env->me_flags, MDB_NOSYNC) && !this_ov->hEvent) {
HANDLE event = CreateEvent(NULL, FALSE, FALSE, NULL);
if (!event) {
rc = ErrCode();
DPRINTF(("CreateEvent: %s", strerror(rc)));
return rc;
}
this_ov->hEvent = event;
}
if (!WriteFile(fd, wdp, wsize, NULL, this_ov)) {
rc = ErrCode();
if (rc != ERROR_IO_PENDING) {
DPRINTF(("WriteFile: %d", rc));
return rc;
}
}
async_i++;
#else
/* Write up to MDB_COMMIT_PAGES dirty pages at a time. */
if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) {
if (n) {
retry_write:
/* Write previous page(s) */
#ifdef MDB_USE_PWRITEV
wres = pwritev(env->me_fd, iov, n, wpos);
wres = pwritev(fd, iov, n, wpos);
#else
if (n == 1) {
wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos);
wres = pwrite(fd, iov[0].iov_base, wsize, wpos);
} else {
retry_seek:
if (lseek(env->me_fd, wpos, SEEK_SET) == -1) {
if (lseek(fd, wpos, SEEK_SET) == -1) {
rc = ErrCode();
if (rc == EINTR)
goto retry_seek;
DPRINTF(("lseek: %s", strerror(rc)));
return rc;
}
wres = writev(env->me_fd, iov, n);
wres = writev(fd, iov, n);
}
#endif
if (wres != wsize) {
@ -3753,27 +3820,54 @@ retry_seek:
}
return rc;
}
#endif /* _WIN32 */
n = 0;
}
if (i > pagecount)
break;
wpos = pos;
wsize = 0;
#ifdef _WIN32
wdp = dp;
}
#else
}
DPRINTF(("committing page %"Yu, pgno));
next_pos = pos + size;
iov[n].iov_len = size;
iov[n].iov_base = (char *)dp;
#endif /* _WIN32 */
DPRINTF(("committing page %"Yu, pgno));
next_pos = pos + size;
wsize += size;
n++;
#endif /* _WIN32 */
}
#ifdef MDB_VL32
if (pgno > txn->mt_last_pgno)
txn->mt_last_pgno = pgno;
#endif
/* MIPS has cache coherency issues, this is a no-op everywhere else
#ifdef _WIN32
if (!F_ISSET(env->me_flags, MDB_NOSYNC)) {
/* Now wait for all the asynchronous/overlapped sync/write-through writes to complete.
* We start with the last one so that all the others should already be complete and
* we reduce thread suspend/resuming (in practice, typically about 99.5% of writes are
* done after the last write is done) */
rc = 0;
while (--async_i >= 0) {
if (ov[async_i].hEvent) {
if (!GetOverlappedResult(fd, &ov[async_i], &wres, TRUE)) {
rc = ErrCode(); /* Continue on so that all the event signals are reset */
}
}
}
if (rc) { /* any error on GetOverlappedResult, exit now */
return rc;
}
}
#endif /* _WIN32 */
if (!(env->me_flags & MDB_WRITEMAP)) {
/* Don't free pages when using writemap (can only get here in NOSYNC mode in Windows)
* MIPS has cache coherency issues, this is a no-op everywhere else
* Note: for any size >= on-chip cache size, entire on-chip cache is
* flushed.
*/
@ -3789,6 +3883,7 @@ retry_seek:
}
mdb_dpage_free(env, dp);
}
}
done:
i--;
@ -4174,7 +4269,6 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
if (len == -1 && ErrCode() == EINTR) continue; \
rc = (len >= 0); break; } while(1)
#endif
DPUTS("writing new meta page");
psize = env->me_psize;
@ -4225,7 +4319,7 @@ mdb_env_write_meta(MDB_txn *txn)
MDB_meta meta, metab, *mp;
unsigned flags;
mdb_size_t mapsize;
off_t off;
MDB_OFF_T off;
int rc, len, toggle;
char *ptr;
HANDLE mfd;
@ -4247,6 +4341,7 @@ mdb_env_write_meta(MDB_txn *txn)
if (mapsize < env->me_mapsize)
mapsize = env->me_mapsize;
#ifndef _WIN32 /* We don't want to ever use MSYNC/FlushViewOfFile in Windows */
if (flags & MDB_WRITEMAP) {
mp->mm_mapsize = mapsize;
mp->mm_dbs[FREE_DBI] = txn->mt_dbs[FREE_DBI];
@ -4262,11 +4357,10 @@ mdb_env_write_meta(MDB_txn *txn)
unsigned meta_size = env->me_psize;
rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC;
ptr = (char *)mp - PAGEHDRSZ;
#ifndef _WIN32 /* POSIX msync() requires ptr = start of OS page */
/* POSIX msync() requires ptr = start of OS page */
r2 = (ptr - env->me_map) & (env->me_os_psize - 1);
ptr -= r2;
meta_size += r2;
#endif
if (MDB_MSYNC(ptr, meta_size, rc)) {
rc = ErrCode();
goto fail;
@ -4274,6 +4368,7 @@ mdb_env_write_meta(MDB_txn *txn)
}
goto done;
}
#endif
metab.mm_txnid = mp->mm_txnid;
metab.mm_last_pg = mp->mm_last_pg;
@ -4424,7 +4519,19 @@ mdb_env_map(MDB_env *env, void *addr)
alloctype = MEM_RESERVE;
}
/** Some users are afraid of seeing their disk space getting used
* all at once, so the default is now to do incremental file growth.
* But that has a large performance impact, so give the option of
* allocating the file up front.
*/
#ifdef MDB_FIXEDSIZE
LARGE_INTEGER fsize;
fsize.LowPart = msize & 0xffffffff;
fsize.HighPart = msize >> 16 >> 16;
rc = NtCreateSection(&mh, access, NULL, &fsize, secprot, SEC_RESERVE, env->me_fd);
#else
rc = NtCreateSection(&mh, access, NULL, NULL, secprot, SEC_RESERVE, env->me_fd);
#endif
if (rc)
return mdb_nt2win32(rc);
map = addr;
@ -4441,22 +4548,27 @@ mdb_env_map(MDB_env *env, void *addr)
return mdb_nt2win32(rc);
env->me_map = map;
#else
int mmap_flags = MAP_SHARED;
int prot = PROT_READ;
#ifdef MAP_NOSYNC /* Used on FreeBSD */
if (flags & MDB_NOSYNC)
mmap_flags |= MAP_NOSYNC;
#endif
#ifdef MDB_VL32
(void) flags;
env->me_map = mmap(addr, NUM_METAS * env->me_psize, PROT_READ, MAP_SHARED,
env->me_map = mmap(addr, NUM_METAS * env->me_psize, prot, mmap_flags,
env->me_fd, 0);
if (env->me_map == MAP_FAILED) {
env->me_map = NULL;
return ErrCode();
}
#else
int prot = PROT_READ;
if (flags & MDB_WRITEMAP) {
prot |= PROT_WRITE;
if (!(flags & MDB_RAWPART) && ftruncate(env->me_fd, env->me_mapsize) < 0)
return ErrCode();
}
env->me_map = mmap(addr, env->me_mapsize, prot, MAP_SHARED,
env->me_map = mmap(addr, env->me_mapsize, prot, mmap_flags,
env->me_fd, 0);
if (env->me_map == MAP_FAILED) {
env->me_map = NULL;
@ -4649,7 +4761,7 @@ mdb_fname_init(const char *path, unsigned envflags, MDB_name *fname)
/** File type, access mode etc. for #mdb_fopen() */
enum mdb_fopen_type {
#ifdef _WIN32
MDB_O_RDONLY, MDB_O_RDWR, MDB_O_META, MDB_O_COPY, MDB_O_LOCKS
MDB_O_RDONLY, MDB_O_RDWR, MDB_O_OVERLAPPED, MDB_O_META, MDB_O_COPY, MDB_O_LOCKS
#else
/* A comment in mdb_fopen() explains some O_* flag choices. */
MDB_O_RDONLY= O_RDONLY, /**< for RDONLY me_fd */
@ -4710,6 +4822,11 @@ mdb_fopen(const MDB_env *env, MDB_name *fname,
disp = OPEN_ALWAYS;
attrs = FILE_ATTRIBUTE_NORMAL;
switch (which) {
case MDB_O_OVERLAPPED: /* for unbuffered asynchronous writes (write-through mode)*/
acc = GENERIC_WRITE;
disp = OPEN_EXISTING;
attrs = FILE_FLAG_OVERLAPPED|FILE_FLAG_WRITE_THROUGH;
break;
case MDB_O_RDONLY: /* read-only datafile */
acc = GENERIC_READ;
disp = OPEN_EXISTING;
@ -4786,7 +4903,7 @@ mdb_env_open2(MDB_env *env, int prev)
env->me_pidquery = PROCESS_QUERY_INFORMATION;
/* Grab functions we need from NTDLL */
if (!NtCreateSection) {
HMODULE h = GetModuleHandle("NTDLL.DLL");
HMODULE h = GetModuleHandleW(L"NTDLL.DLL");
if (!h)
return MDB_PROBLEM;
NtClose = (NtCloseFunc *)GetProcAddress(h, "NtClose");
@ -4799,6 +4916,7 @@ mdb_env_open2(MDB_env *env, int prev)
if (!NtCreateSection)
return MDB_PROBLEM;
}
env->ovs = 0;
#endif /* _WIN32 */
#ifdef BROKEN_FDATASYNC
@ -5214,7 +5332,7 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
union semun semu;
#endif
int rc;
off_t size, rsize;
MDB_OFF_T size, rsize;
rc = mdb_fopen(env, fname, MDB_O_LOCKS, mode, &env->me_lfd);
if (rc) {
@ -5556,6 +5674,11 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
mode, &env->me_fd);
if (rc)
goto leave;
#ifdef _WIN32
rc = mdb_fopen(env, &fname, MDB_O_OVERLAPPED, mode, &env->me_ovfd);
if (rc)
goto leave;
#endif
if ((flags & (MDB_RDONLY|MDB_NOLOCK)) == MDB_RDONLY) {
rc = mdb_env_setup_locks(env, &fname, mode, &excl);
@ -5564,10 +5687,10 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
}
if ((rc = mdb_env_open2(env, flags & MDB_PREVSNAPSHOT)) == MDB_SUCCESS) {
if (!(flags & (MDB_RDONLY|MDB_WRITEMAP))) {
/* Synchronous fd for meta writes. Needed even with
* MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset.
*/
if (!(flags & (MDB_RDONLY|MDB_WRITEMAP))) {
rc = mdb_fopen(env, &fname, MDB_O_META, mode, &env->me_mfd);
if (rc)
goto leave;
@ -5674,6 +5797,16 @@ mdb_env_close0(MDB_env *env, int excl)
}
if (env->me_mfd != INVALID_HANDLE_VALUE)
(void) close(env->me_mfd);
#ifdef _WIN32
if (env->ovs > 0) {
for (i = 0; i < env->ovs; i++) {
CloseHandle(env->ov[i].hEvent);
}
free(env->ov);
}
if (env->me_ovfd != INVALID_HANDLE_VALUE)
(void) close(env->me_ovfd);
#endif
if (env->me_fd != INVALID_HANDLE_VALUE)
(void) close(env->me_fd);
if (env->me_txns) {
@ -5721,6 +5854,17 @@ mdb_env_close0(MDB_env *env, int excl)
if (excl > 0)
semctl(env->me_rmutex->semid, 0, IPC_RMID);
}
#elif defined(MDB_ROBUST_SUPPORTED)
/* If we have the filelock: If we are the
* only remaining user, clean up robust
* mutexes.
*/
if (excl == 0)
mdb_env_excl_lock(env, &excl);
if (excl > 0) {
pthread_mutex_destroy(env->me_txns->mti_rmutex);
pthread_mutex_destroy(env->me_txns->mti_wmutex);
}
#endif
munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo));
}
@ -6860,16 +7004,12 @@ skip:
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
mdb_xcursor_init1(mc, leaf);
}
if (data) {
if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS)
return rc;
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
if (rc != MDB_SUCCESS)
return rc;
}
} else if (data) {
if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS)
return rc;
}
MDB_GET_KEY(leaf, key);
@ -6893,7 +7033,8 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
mp = mc->mc_pg[mc->mc_top];
if (mc->mc_db->md_flags & MDB_DUPSORT) {
if ((mc->mc_db->md_flags & MDB_DUPSORT) &&
mc->mc_ki[mc->mc_top] < NUMKEYS(mp)) {
leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
if (op == MDB_PREV || op == MDB_PREV_DUP) {
@ -6935,27 +7076,25 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
DPRINTF(("==> cursor points to page %"Yu" with %u keys, key index %u",
mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
if (!IS_LEAF(mp))
return MDB_CORRUPTED;
if (IS_LEAF2(mp)) {
key->mv_size = mc->mc_db->md_pad;
key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size);
return MDB_SUCCESS;
}
mdb_cassert(mc, IS_LEAF(mp));
leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
mdb_xcursor_init1(mc, leaf);
}
if (data) {
if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS)
return rc;
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL);
if (rc != MDB_SUCCESS)
return rc;
}
} else if (data) {
if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS)
return rc;
}
MDB_GET_KEY(leaf, key);
@ -7113,9 +7252,6 @@ set1:
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
mdb_xcursor_init1(mc, leaf);
}
if (data) {
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) {
rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
} else {
@ -7130,7 +7266,8 @@ set1:
if (rc != MDB_SUCCESS)
return rc;
}
} else if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) {
} else if (data) {
if (op == MDB_GET_BOTH || op == MDB_GET_BOTH_RANGE) {
MDB_val olddata;
MDB_cmp_func *dcmp;
if ((rc = mdb_node_read(mc, leaf, &olddata)) != MDB_SUCCESS)
@ -7188,22 +7325,23 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data)
mc->mc_ki[mc->mc_top] = 0;
if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
if ( key ) {
key->mv_size = mc->mc_db->md_pad;
key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], 0, key->mv_size);
}
return MDB_SUCCESS;
}
if (data) {
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
mdb_xcursor_init1(mc, leaf);
rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
if (rc)
return rc;
} else {
} else if (data) {
if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS)
return rc;
}
}
MDB_GET_KEY(leaf, key);
return MDB_SUCCESS;
}
@ -7232,22 +7370,22 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data)
leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
if (key) {
key->mv_size = mc->mc_db->md_pad;
key->mv_data = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], key->mv_size);
}
return MDB_SUCCESS;
}
if (data) {
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
mdb_xcursor_init1(mc, leaf);
rc = mdb_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL);
if (rc)
return rc;
} else {
} else if (data) {
if ((rc = mdb_node_read(mc, leaf, data)) != MDB_SUCCESS)
return rc;
}
}
MDB_GET_KEY(leaf, key);
return MDB_SUCCESS;
@ -7408,6 +7546,7 @@ fetchm:
rc = MDB_NOTFOUND;
break;
}
mc->mc_flags &= ~C_EOF;
{
MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
@ -8025,6 +8164,8 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
return rc;
mp = mc->mc_pg[mc->mc_top];
if (!IS_LEAF(mp))
return MDB_CORRUPTED;
if (IS_LEAF2(mp))
goto del_key;
leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
@ -9403,14 +9544,17 @@ mdb_cursor_del0(MDB_cursor *mc)
}
}
rc = mdb_rebalance(mc);
if (rc)
goto fail;
if (rc == MDB_SUCCESS) {
/* DB is totally empty now, just bail out.
* Other cursors adjustments were already done
* by mdb_rebalance and aren't needed here.
*/
if (!mc->mc_snum)
if (!mc->mc_snum) {
mc->mc_flags |= C_EOF;
return rc;
}
mp = mc->mc_pg[mc->mc_top];
nkeys = NUMKEYS(mp);
@ -9423,8 +9567,8 @@ mdb_cursor_del0(MDB_cursor *mc)
if (m3->mc_snum < mc->mc_snum)
continue;
if (m3->mc_pg[mc->mc_top] == mp) {
/* if m3 points past last node in page, find next sibling */
if (m3->mc_ki[mc->mc_top] >= mc->mc_ki[mc->mc_top]) {
/* if m3 points past last node in page, find next sibling */
if (m3->mc_ki[mc->mc_top] >= nkeys) {
rc = mdb_cursor_sibling(m3, 1);
if (rc == MDB_NOTFOUND) {
@ -9432,8 +9576,10 @@ mdb_cursor_del0(MDB_cursor *mc)
rc = MDB_SUCCESS;
continue;
}
if (rc)
goto fail;
}
if (mc->mc_db->md_flags & MDB_DUPSORT) {
if (m3->mc_xcursor && !(m3->mc_flags & C_EOF)) {
MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]);
/* If this node has dupdata, it may need to be reinited
* because its data has moved.
@ -9447,16 +9593,19 @@ mdb_cursor_del0(MDB_cursor *mc)
m3->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(node);
} else {
mdb_xcursor_init1(m3, node);
m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL;
rc = mdb_cursor_first(&m3->mc_xcursor->mx_cursor, NULL, NULL);
if (rc)
goto fail;
}
}
m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL;
}
}
}
}
mc->mc_flags |= C_DEL;
}
fail:
if (rc)
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
return rc;

@ -1,5 +1,5 @@
.TH MDB_COPY 1 "2017/07/31" "LMDB 0.9.70"
.\" Copyright 2012-2019 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copyright 2012-2021 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.SH NAME
mdb_copy \- LMDB environment copy tool

@ -1,6 +1,6 @@
/* mdb_copy.c - memory-mapped database backup tool */
/*
* Copyright 2012-2018 Howard Chu, Symas Corp.
* Copyright 2012-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -1,5 +1,5 @@
.TH MDB_DROP 1 "2017/11/19" "LMDB 0.9.70"
.\" Copyright 2014-2018 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copyright 2014-2021 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.SH NAME
mdb_drop \- LMDB database delete tool

@ -1,6 +1,6 @@
/* mdb_drop.c - memory-mapped database delete tool */
/*
* Copyright 2016-2018 Howard Chu, Symas Corp.
* Copyright 2016-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -1,5 +1,5 @@
.TH MDB_DUMP 1 "2017/07/31" "LMDB 0.9.70"
.\" Copyright 2014-2017 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copyright 2014-2021 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.SH NAME
mdb_dump \- LMDB environment export tool

@ -1,6 +1,6 @@
/* mdb_dump.c - memory-mapped database dump tool */
/*
* Copyright 2011-2018 Howard Chu, Symas Corp.
* Copyright 2011-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -181,7 +181,7 @@ int main(int argc, char *argv[])
* -V: print version and exit
* (default) dump only the main DB
*/
while ((i = getopt(argc, argv, "af:lnps:V")) != EOF) {
while ((i = getopt(argc, argv, "af:lnps:vV")) != EOF) {
switch(i) {
case 'V':
printf("%s\n", MDB_VERSION_STRING);
@ -189,7 +189,7 @@ int main(int argc, char *argv[])
break;
case 'l':
list = 1;
/*FALLTHROUGH*/;
/*FALLTHROUGH*/
case 'a':
if (subname)
usage(prog);

@ -1,5 +1,5 @@
.TH MDB_LOAD 1 "2015/09/30" "LMDB 0.9.17"
.\" Copyright 2014-2018 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copyright 2014-2021 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.SH NAME
mdb_load \- LMDB environment import tool

@ -1,6 +1,6 @@
/* mdb_load.c - memory-mapped database load tool */
/*
* Copyright 2011-2018 Howard Chu, Symas Corp.
* Copyright 2011-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -1,5 +1,5 @@
.TH MDB_STAT 1 "2017/07/31" "LMDB 0.9.70"
.\" Copyright 2012-2019 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copyright 2012-2021 Howard Chu, Symas Corp. All Rights Reserved.
.\" Copying restrictions apply. See COPYRIGHT/LICENSE.
.SH NAME
mdb_stat \- LMDB environment status tool

@ -1,6 +1,6 @@
/* mdb_stat.c - memory-mapped database status tool */
/*
* Copyright 2011-2018 Howard Chu, Symas Corp.
* Copyright 2011-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -65,7 +65,7 @@ int main(int argc, char *argv[])
* -V: print version and exit
* (default) print stat of only the main DB
*/
while ((i = getopt(argc, argv, "Vaefnrs:")) != EOF) {
while ((i = getopt(argc, argv, "Vaefnrs:v")) != EOF) {
switch(i) {
case 'V':
printf("%s\n", MDB_VERSION_STRING);

@ -3,8 +3,8 @@
/* $OpenLDAP$ */
/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
*
* Copyright 2000-2019 The OpenLDAP Foundation.
* Portions Copyright 2001-2018 Howard Chu, Symas Corp.
* Copyright 2000-2021 The OpenLDAP Foundation.
* Portions Copyright 2001-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -11,8 +11,8 @@
/* $OpenLDAP$ */
/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
*
* Copyright 2000-2019 The OpenLDAP Foundation.
* Portions Copyright 2001-2019 Howard Chu, Symas Corp.
* Copyright 2000-2021 The OpenLDAP Foundation.
* Portions Copyright 2001-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -1,6 +1,6 @@
/* mtest.c - memory-mapped database tester/toy */
/*
* Copyright 2011-2018 Howard Chu, Symas Corp.
* Copyright 2011-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -1,6 +1,6 @@
/* mtest2.c - memory-mapped database tester/toy */
/*
* Copyright 2011-2018 Howard Chu, Symas Corp.
* Copyright 2011-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -1,6 +1,6 @@
/* mtest3.c - memory-mapped database tester/toy */
/*
* Copyright 2011-2018 Howard Chu, Symas Corp.
* Copyright 2011-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -1,6 +1,6 @@
/* mtest4.c - memory-mapped database tester/toy */
/*
* Copyright 2011-2018 Howard Chu, Symas Corp.
* Copyright 2011-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -1,6 +1,6 @@
/* mtest5.c - memory-mapped database tester/toy */
/*
* Copyright 2011-2018 Howard Chu, Symas Corp.
* Copyright 2011-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -1,6 +1,6 @@
/* mtest6.c - memory-mapped database tester/toy */
/*
* Copyright 2011-2018 Howard Chu, Symas Corp.
* Copyright 2011-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -3,7 +3,7 @@
* Do a line-by-line comparison of this and sample-mdb.txt
*/
/*
* Copyright 2012-2018 Howard Chu, Symas Corp.
* Copyright 2012-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

@ -3,7 +3,7 @@
* Do a line-by-line comparison of this and sample-bdb.txt
*/
/*
* Copyright 2012-2018 Howard Chu, Symas Corp.
* Copyright 2012-2021 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without

Loading…
Cancel
Save