diff options
-rw-r--r-- | TODO | 5 | ||||
-rw-r--r-- | configure.ac | 20 | ||||
-rw-r--r-- | src/cmdline.cpp | 6 | ||||
-rw-r--r-- | src/memory.cpp | 174 | ||||
-rw-r--r-- | src/memory.h | 2 |
5 files changed, 131 insertions, 76 deletions
@@ -250,9 +250,8 @@ Features: Optimizations: * The Windows-specific memory limiting using GetProcessMemoryInfo() - is very slow. Perhaps a similar approach to the generic UNIX - malloc() hooking can be implemented and memory_usage counted - with _msize() from MSVCRT. + is very slow. Perhaps malloc() hooking can be implemented there, + using _msize() to measure the memory required by individual chunks. This must be benchmarked. * Add G_UNLIKELY to all error throws. * Instead of using RTTI to implement the immediate editing command diff --git a/configure.ac b/configure.ac index dc8b1d2..e3fd728 100644 --- a/configure.ac +++ b/configure.ac @@ -163,22 +163,24 @@ AC_CHECK_FUNCS([memset setlocale strchr strrchr fstat], , [ # and UNIXoid systems, so that G_OS_UNIX is sufficient # to test for them. # FIXME: Perhaps it would be more elegant to check whether -# glib defines G_OS_UNIX||G_OS_HAIKU instead... +# glib defines G_OS_UNIX || G_OS_HAIKU instead... case $host in *-*-linux* | *-*-*bsd* | *-*-darwin* | *-*-cygwin* | *-*-haiku*) - AC_SEARCH_LIBS([dlsym], [dl], , [ - AC_MSG_ERROR([Required function dlsym() not found!]) - ]) - AC_CHECK_FUNCS([realpath fchown dup dup2 dlsym], , [ + AC_CHECK_FUNCS([realpath fchown dup dup2], , [ AC_MSG_ERROR([Missing libc function]) ]) ;; esac -# Check for optional libc features. -# Will probably only be found on Linux/glibc or BSD. -AC_CHECK_HEADERS([malloc.h malloc_np.h]) -AC_CHECK_FUNCS([malloc_trim malloc_usable_size]) +# Check for optional glibc features. +# Will probably only be found on Linux/glibc. +AC_CHECK_HEADERS([malloc.h]) +AC_CHECK_FUNCS([malloc_trim mallinfo]) + +# jemalloc-specific functions. +# Will probably only be foudn on FreeBSD. +AC_CHECK_HEADERS([malloc_np.h]) +AC_CHECK_FUNCS([mallctlnametomib mallctlbymib]) # # Config options diff --git a/src/cmdline.cpp b/src/cmdline.cpp index f1bbe83..ffe44d6 100644 --- a/src/cmdline.cpp +++ b/src/cmdline.cpp @@ -264,9 +264,9 @@ Cmdline::keypress(gchar key) * Glibc/Linux-only optimization: Undo stacks can grow very * large - sometimes large enough to make the system * swap and become unresponsive. - * This will often reduce the amount of memory previously - * freed that's still allocated to the program immediately - * when the command-line is terminated: + * This shrink the program break after lots of memory has + * been freed, reducing the virtual memory size and aiding + * in recovering from swapping issues. */ malloc_trim(0); #endif diff --git a/src/memory.cpp b/src/memory.cpp index e989210..15f8820 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -19,16 +19,14 @@ #include "config.h" #endif -#include <stdlib.h> +#include <stdint.h> + #ifdef HAVE_MALLOC_H #include <malloc.h> #endif #ifdef HAVE_MALLOC_NP_H #include <malloc_np.h> #endif -#ifdef HAVE_DLSYM -#include <dlfcn.h> -#endif #include <glib.h> @@ -48,79 +46,133 @@ namespace SciTECO { MemoryLimit memlimit; -#if defined(HAVE_DLSYM) && defined(HAVE_MALLOC_USABLE_SIZE) /* - * This should work on most UNIXoid systems. + * A discussion of memory measurement techniques on Linux + * and UNIXoid operating systems is in order, since this + * problem turned out to be rather tricky. * - * We "hook" into the malloc-functions and count the - * "usable" size of each memory block (which may be - * more than what has been requested). - * This effectively counts all allocations by malloc(), - * g_malloc() and any C++ new() everywhere, has minimal overhead and - * is much faster than the Linux-specific mallinfo(). + * - UNIX has resource limits, which could be used to enforce + * the memory limit, but in case they are hit, malloc() + * will return NULL, so g_malloc() would abort(). + * Wrapping malloc() to work around that has the same + * problems described below. + * - glibc has malloc hooks, but they are non-portable and + * deprecated. + * - It is possible to effectively wrap malloc() by overriding + * the libc's implementation, which will even work when + * statically linking in libc since malloc() is usually + * delcared `weak`. + * - When wrapping malloc(), malloc_usable_size() could be + * used to count the memory consumption. + * This is libc-specific, but available at least in + * glibc and jemalloc (FreeBSD). + * - glibc exports symbols for the original malloc() implementation + * like __libc_malloc() that could be used for wrapping. + * This is undocumented and libc-specific, though. + * - The GNU ld --wrap option allows us to intercept calls, + * but obviously won't work for shared libraries. + * - The portable dlsym() could be used to look up the original + * library symbol, but it may and does call malloc functions, + * eg. calloc() on glibc. + * In other words, there is no way to portably and reliably + * wrap malloc() and friends when linking dynamically. + * - Another difficulty is that, when free() is overridden, every + * function that can __independently__ allocate memory that + * can be passed to free() must also be overridden. + * Otherwise the measurement is not precise and there can even + * be underruns. Thus we'd have to guard against underruns. + * - malloc() and friends are MT-safe, so any replacement function + * would have to be MT-safe as well to avoid memory corruption. + * E.g. even in single-threaded builds, glib might use + * threads internally. + * - There is also the old-school technique of calculating the size + * of the program break, ie. the effective size of the DATA segment. + * This works under the assumption that all allocations are + * performed by extending the program break, as is __traditionally__ + * done by malloc() and friends. + * - Unfortunately, modern malloc() implementations sometimes + * mmap() memory, especially for large allocations. + * SciTECO mostly allocates small chunks. + * Unfortunately, some malloc implementations like jemalloc + * only claim memory using mmap(), thus rendering sbrk(0) + * useless. + * - Furthermore, some malloc-implementations like glibc will + * only shrink the program break when told so explicitly + * using malloc_trim(0). + * - The sbrk(0) method thus depends on implementation details + * of the libc. + * - For these reasons, we rather stick to non-portable, + * libc-specific, perhaps slow, but stable techniques to measure + * memory usage. + * Implementations for yet unsupported UNIXoid systems might + * still want to pick up any of the ideas above, if they can be + * proven to work well on those platforms. */ -static gsize memory_usage = 0; +#ifdef HAVE_MALLINFO +/* + * Linux/glibc-specific implementation. + * Unfortunately, this slows things down when called frequently. + */ gsize MemoryLimit::get_usage(void) { - return memory_usage; -} - -extern "C" { - -void * -malloc(size_t size) -{ - typedef void *(*malloc_cb)(size_t); - static malloc_cb libc_malloc = NULL; - void *ret; + struct mallinfo info = mallinfo(); - if (G_UNLIKELY(!libc_malloc)) - libc_malloc = (malloc_cb)dlsym(RTLD_NEXT, "malloc"); - - ret = libc_malloc(size); - if (G_LIKELY(ret)) - memory_usage += malloc_usable_size(ret); - - return ret; + /* + * NOTE: `uordblks` is an int and thus prone + * to wrap-around issues. + * + * Unfortunately, the only other machine readable + * alternative is malloc_info() which prints + * into a FILE * stream [sic!] and is unspeakably + * slow even if writing to an unbuffered fmemopen()ed + * stream. + */ + return info.uordblks; } -void * -realloc(void *ptr, size_t size) +#elif defined(HAVE_MALLCTLNAMETOMIB) && defined(HAVE_MALLCTLBYMIB) +/* + * FreeBSD/jemalloc-specific implementation. + * Unfortunately, this slows things down when called frequently. + */ + +gsize +MemoryLimit::get_usage(void) { - typedef void *(*realloc_cb)(void *, size_t); - static realloc_cb libc_realloc = NULL; + static size_t epoch_mib[1] = {0}; + static size_t stats_allocated_mib[2] = {0}; - if (G_UNLIKELY(!libc_realloc)) - libc_realloc = (realloc_cb)dlsym(RTLD_NEXT, "realloc"); + uint64_t epoch = 1; + size_t stats_allocated; + size_t stats_allocated_len = sizeof(stats_allocated); - if (ptr) - memory_usage -= malloc_usable_size(ptr); - ptr = libc_realloc(ptr, size); - if (G_LIKELY(ptr)) - memory_usage += malloc_usable_size(ptr); + if (G_UNLIKELY(!epoch_mib[0])) { + size_t len; + int rc; - return ptr; -} + len = G_N_ELEMENTS(epoch_mib); + rc = mallctlnametomib("epoch", epoch_mib, &len); + g_assert(rc == 0 && len == G_N_ELEMENTS(epoch_mib)); -void -free(void *ptr) -{ - typedef void (*free_cb)(void *); - static free_cb libc_free = NULL; + len = G_N_ELEMENTS(stats_allocated_mib); + rc = mallctlnametomib("stats.allocated", + stats_allocated_mib, &len); + g_assert(rc == 0 && len == G_N_ELEMENTS(stats_allocated_mib)); + } - if (G_UNLIKELY(!libc_free)) - libc_free = (free_cb)dlsym(RTLD_NEXT, "free"); + /* refresh statistics */ + mallctlbymib(epoch_mib, G_N_ELEMENTS(epoch_mib), + NULL, NULL, &epoch, sizeof(epoch)); + /* query the total number of allocated bytes */ + mallctlbymib(stats_allocated_mib, G_N_ELEMENTS(stats_allocated_mib), + &stats_allocated, &stats_allocated_len, NULL, 0); - if (ptr) - memory_usage -= malloc_usable_size(ptr); - libc_free(ptr); + return stats_allocated; } -} /* extern "C" */ - #elif defined(G_OS_WIN32) /* * Uses the Windows-specific GetProcessMemoryInfo(), @@ -128,8 +180,10 @@ free(void *ptr) * * FIXME: Unfortunately, this is much slower than the portable * fallback implementation. - * We should try and benchmark a similar approach to the - * UNIX implementation above using MSVCRT-specific APIs (_minfo()). + * It may be possible to overwrite malloc() and friends, + * counting the chunks with the MSVCRT-specific _minfo(). + * Since we will always run against MSVCRT, the disadvantages + * discussed above for the UNIX-case may not be important. */ gsize @@ -173,7 +227,7 @@ MemoryLimit::get_usage(void) return memory_usage; } -#endif /* (!HAVE_DLSYM || !HAVE_MALLOC_USABLE_SIZE) && !G_OS_WIN32 */ +#endif /* MEMORY_USAGE_FALLBACK */ void MemoryLimit::set_limit(gsize new_limit) diff --git a/src/memory.h b/src/memory.h index 7b1591d..903fd16 100644 --- a/src/memory.h +++ b/src/memory.h @@ -76,7 +76,7 @@ public: MemoryLimit() : limit(MEMORY_LIMIT_DEFAULT) {} - gsize get_usage(void); + static gsize get_usage(void); void set_limit(gsize new_limit = 0); |