aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--TODO5
-rw-r--r--configure.ac20
-rw-r--r--src/cmdline.cpp6
-rw-r--r--src/memory.cpp174
-rw-r--r--src/memory.h2
5 files changed, 131 insertions, 76 deletions
diff --git a/TODO b/TODO
index 9230fed..ff1568a 100644
--- a/TODO
+++ b/TODO
@@ -250,9 +250,8 @@ Features:
Optimizations:
* The Windows-specific memory limiting using GetProcessMemoryInfo()
- is very slow. Perhaps a similar approach to the generic UNIX
- malloc() hooking can be implemented and memory_usage counted
- with _msize() from MSVCRT.
+ is very slow. Perhaps malloc() hooking can be implemented there,
+ using _msize() to measure the memory required by individual chunks.
This must be benchmarked.
* Add G_UNLIKELY to all error throws.
* Instead of using RTTI to implement the immediate editing command
diff --git a/configure.ac b/configure.ac
index dc8b1d2..e3fd728 100644
--- a/configure.ac
+++ b/configure.ac
@@ -163,22 +163,24 @@ AC_CHECK_FUNCS([memset setlocale strchr strrchr fstat], , [
# and UNIXoid systems, so that G_OS_UNIX is sufficient
# to test for them.
# FIXME: Perhaps it would be more elegant to check whether
-# glib defines G_OS_UNIX||G_OS_HAIKU instead...
+# glib defines G_OS_UNIX || G_OS_HAIKU instead...
case $host in
*-*-linux* | *-*-*bsd* | *-*-darwin* | *-*-cygwin* | *-*-haiku*)
- AC_SEARCH_LIBS([dlsym], [dl], , [
- AC_MSG_ERROR([Required function dlsym() not found!])
- ])
- AC_CHECK_FUNCS([realpath fchown dup dup2 dlsym], , [
+ AC_CHECK_FUNCS([realpath fchown dup dup2], , [
AC_MSG_ERROR([Missing libc function])
])
;;
esac
-# Check for optional libc features.
-# Will probably only be found on Linux/glibc or BSD.
-AC_CHECK_HEADERS([malloc.h malloc_np.h])
-AC_CHECK_FUNCS([malloc_trim malloc_usable_size])
+# Check for optional glibc features.
+# Will probably only be found on Linux/glibc.
+AC_CHECK_HEADERS([malloc.h])
+AC_CHECK_FUNCS([malloc_trim mallinfo])
+
+# jemalloc-specific functions.
+# Will probably only be foudn on FreeBSD.
+AC_CHECK_HEADERS([malloc_np.h])
+AC_CHECK_FUNCS([mallctlnametomib mallctlbymib])
#
# Config options
diff --git a/src/cmdline.cpp b/src/cmdline.cpp
index f1bbe83..ffe44d6 100644
--- a/src/cmdline.cpp
+++ b/src/cmdline.cpp
@@ -264,9 +264,9 @@ Cmdline::keypress(gchar key)
* Glibc/Linux-only optimization: Undo stacks can grow very
* large - sometimes large enough to make the system
* swap and become unresponsive.
- * This will often reduce the amount of memory previously
- * freed that's still allocated to the program immediately
- * when the command-line is terminated:
+ * This shrink the program break after lots of memory has
+ * been freed, reducing the virtual memory size and aiding
+ * in recovering from swapping issues.
*/
malloc_trim(0);
#endif
diff --git a/src/memory.cpp b/src/memory.cpp
index e989210..15f8820 100644
--- a/src/memory.cpp
+++ b/src/memory.cpp
@@ -19,16 +19,14 @@
#include "config.h"
#endif
-#include <stdlib.h>
+#include <stdint.h>
+
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#ifdef HAVE_MALLOC_NP_H
#include <malloc_np.h>
#endif
-#ifdef HAVE_DLSYM
-#include <dlfcn.h>
-#endif
#include <glib.h>
@@ -48,79 +46,133 @@ namespace SciTECO {
MemoryLimit memlimit;
-#if defined(HAVE_DLSYM) && defined(HAVE_MALLOC_USABLE_SIZE)
/*
- * This should work on most UNIXoid systems.
+ * A discussion of memory measurement techniques on Linux
+ * and UNIXoid operating systems is in order, since this
+ * problem turned out to be rather tricky.
*
- * We "hook" into the malloc-functions and count the
- * "usable" size of each memory block (which may be
- * more than what has been requested).
- * This effectively counts all allocations by malloc(),
- * g_malloc() and any C++ new() everywhere, has minimal overhead and
- * is much faster than the Linux-specific mallinfo().
+ * - UNIX has resource limits, which could be used to enforce
+ * the memory limit, but in case they are hit, malloc()
+ * will return NULL, so g_malloc() would abort().
+ * Wrapping malloc() to work around that has the same
+ * problems described below.
+ * - glibc has malloc hooks, but they are non-portable and
+ * deprecated.
+ * - It is possible to effectively wrap malloc() by overriding
+ * the libc's implementation, which will even work when
+ * statically linking in libc since malloc() is usually
+ * delcared `weak`.
+ * - When wrapping malloc(), malloc_usable_size() could be
+ * used to count the memory consumption.
+ * This is libc-specific, but available at least in
+ * glibc and jemalloc (FreeBSD).
+ * - glibc exports symbols for the original malloc() implementation
+ * like __libc_malloc() that could be used for wrapping.
+ * This is undocumented and libc-specific, though.
+ * - The GNU ld --wrap option allows us to intercept calls,
+ * but obviously won't work for shared libraries.
+ * - The portable dlsym() could be used to look up the original
+ * library symbol, but it may and does call malloc functions,
+ * eg. calloc() on glibc.
+ * In other words, there is no way to portably and reliably
+ * wrap malloc() and friends when linking dynamically.
+ * - Another difficulty is that, when free() is overridden, every
+ * function that can __independently__ allocate memory that
+ * can be passed to free() must also be overridden.
+ * Otherwise the measurement is not precise and there can even
+ * be underruns. Thus we'd have to guard against underruns.
+ * - malloc() and friends are MT-safe, so any replacement function
+ * would have to be MT-safe as well to avoid memory corruption.
+ * E.g. even in single-threaded builds, glib might use
+ * threads internally.
+ * - There is also the old-school technique of calculating the size
+ * of the program break, ie. the effective size of the DATA segment.
+ * This works under the assumption that all allocations are
+ * performed by extending the program break, as is __traditionally__
+ * done by malloc() and friends.
+ * - Unfortunately, modern malloc() implementations sometimes
+ * mmap() memory, especially for large allocations.
+ * SciTECO mostly allocates small chunks.
+ * Unfortunately, some malloc implementations like jemalloc
+ * only claim memory using mmap(), thus rendering sbrk(0)
+ * useless.
+ * - Furthermore, some malloc-implementations like glibc will
+ * only shrink the program break when told so explicitly
+ * using malloc_trim(0).
+ * - The sbrk(0) method thus depends on implementation details
+ * of the libc.
+ * - For these reasons, we rather stick to non-portable,
+ * libc-specific, perhaps slow, but stable techniques to measure
+ * memory usage.
+ * Implementations for yet unsupported UNIXoid systems might
+ * still want to pick up any of the ideas above, if they can be
+ * proven to work well on those platforms.
*/
-static gsize memory_usage = 0;
+#ifdef HAVE_MALLINFO
+/*
+ * Linux/glibc-specific implementation.
+ * Unfortunately, this slows things down when called frequently.
+ */
gsize
MemoryLimit::get_usage(void)
{
- return memory_usage;
-}
-
-extern "C" {
-
-void *
-malloc(size_t size)
-{
- typedef void *(*malloc_cb)(size_t);
- static malloc_cb libc_malloc = NULL;
- void *ret;
+ struct mallinfo info = mallinfo();
- if (G_UNLIKELY(!libc_malloc))
- libc_malloc = (malloc_cb)dlsym(RTLD_NEXT, "malloc");
-
- ret = libc_malloc(size);
- if (G_LIKELY(ret))
- memory_usage += malloc_usable_size(ret);
-
- return ret;
+ /*
+ * NOTE: `uordblks` is an int and thus prone
+ * to wrap-around issues.
+ *
+ * Unfortunately, the only other machine readable
+ * alternative is malloc_info() which prints
+ * into a FILE * stream [sic!] and is unspeakably
+ * slow even if writing to an unbuffered fmemopen()ed
+ * stream.
+ */
+ return info.uordblks;
}
-void *
-realloc(void *ptr, size_t size)
+#elif defined(HAVE_MALLCTLNAMETOMIB) && defined(HAVE_MALLCTLBYMIB)
+/*
+ * FreeBSD/jemalloc-specific implementation.
+ * Unfortunately, this slows things down when called frequently.
+ */
+
+gsize
+MemoryLimit::get_usage(void)
{
- typedef void *(*realloc_cb)(void *, size_t);
- static realloc_cb libc_realloc = NULL;
+ static size_t epoch_mib[1] = {0};
+ static size_t stats_allocated_mib[2] = {0};
- if (G_UNLIKELY(!libc_realloc))
- libc_realloc = (realloc_cb)dlsym(RTLD_NEXT, "realloc");
+ uint64_t epoch = 1;
+ size_t stats_allocated;
+ size_t stats_allocated_len = sizeof(stats_allocated);
- if (ptr)
- memory_usage -= malloc_usable_size(ptr);
- ptr = libc_realloc(ptr, size);
- if (G_LIKELY(ptr))
- memory_usage += malloc_usable_size(ptr);
+ if (G_UNLIKELY(!epoch_mib[0])) {
+ size_t len;
+ int rc;
- return ptr;
-}
+ len = G_N_ELEMENTS(epoch_mib);
+ rc = mallctlnametomib("epoch", epoch_mib, &len);
+ g_assert(rc == 0 && len == G_N_ELEMENTS(epoch_mib));
-void
-free(void *ptr)
-{
- typedef void (*free_cb)(void *);
- static free_cb libc_free = NULL;
+ len = G_N_ELEMENTS(stats_allocated_mib);
+ rc = mallctlnametomib("stats.allocated",
+ stats_allocated_mib, &len);
+ g_assert(rc == 0 && len == G_N_ELEMENTS(stats_allocated_mib));
+ }
- if (G_UNLIKELY(!libc_free))
- libc_free = (free_cb)dlsym(RTLD_NEXT, "free");
+ /* refresh statistics */
+ mallctlbymib(epoch_mib, G_N_ELEMENTS(epoch_mib),
+ NULL, NULL, &epoch, sizeof(epoch));
+ /* query the total number of allocated bytes */
+ mallctlbymib(stats_allocated_mib, G_N_ELEMENTS(stats_allocated_mib),
+ &stats_allocated, &stats_allocated_len, NULL, 0);
- if (ptr)
- memory_usage -= malloc_usable_size(ptr);
- libc_free(ptr);
+ return stats_allocated;
}
-} /* extern "C" */
-
#elif defined(G_OS_WIN32)
/*
* Uses the Windows-specific GetProcessMemoryInfo(),
@@ -128,8 +180,10 @@ free(void *ptr)
*
* FIXME: Unfortunately, this is much slower than the portable
* fallback implementation.
- * We should try and benchmark a similar approach to the
- * UNIX implementation above using MSVCRT-specific APIs (_minfo()).
+ * It may be possible to overwrite malloc() and friends,
+ * counting the chunks with the MSVCRT-specific _minfo().
+ * Since we will always run against MSVCRT, the disadvantages
+ * discussed above for the UNIX-case may not be important.
*/
gsize
@@ -173,7 +227,7 @@ MemoryLimit::get_usage(void)
return memory_usage;
}
-#endif /* (!HAVE_DLSYM || !HAVE_MALLOC_USABLE_SIZE) && !G_OS_WIN32 */
+#endif /* MEMORY_USAGE_FALLBACK */
void
MemoryLimit::set_limit(gsize new_limit)
diff --git a/src/memory.h b/src/memory.h
index 7b1591d..903fd16 100644
--- a/src/memory.h
+++ b/src/memory.h
@@ -76,7 +76,7 @@ public:
MemoryLimit() : limit(MEMORY_LIMIT_DEFAULT) {}
- gsize get_usage(void);
+ static gsize get_usage(void);
void set_limit(gsize new_limit = 0);