diff options
Diffstat (limited to 'src/memory.c')
-rw-r--r-- | src/memory.c | 672 |
1 files changed, 672 insertions, 0 deletions
diff --git a/src/memory.c b/src/memory.c new file mode 100644 index 0000000..f8942fd --- /dev/null +++ b/src/memory.c @@ -0,0 +1,672 @@ +/* + * Copyright (C) 2012-2021 Robin Haberkorn + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define USE_DL_PREFIX /* for dlmalloc */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> + +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif +#ifdef HAVE_MALLOC_NP_H +#include <malloc_np.h> +#endif + +#ifdef HAVE_WINDOWS_H +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#include <psapi.h> +#endif + +/* + * For task_info() on OS X. + */ +#ifdef HAVE_MACH_MACH_H +#include <mach/mach.h> +#endif +#ifdef HAVE_MACH_MESSAGE_H +#include <mach/message.h> +#endif +#ifdef HAVE_MACH_KERN_RETURN_H +#include <mach/kern_return.h> +#endif +#ifdef HAVE_MACH_TASK_INFO_H +#include <mach/task_info.h> +#endif + +/* + * For sysctl() on FreeBSD. + */ +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_SYS_SYSCTL_H +#include <sys/sysctl.h> +#endif + +/* + * For sysconf() on Linux. + */ +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif +#ifdef HAVE_SYS_RESOURCE_H +#include <sys/resource.h> +#endif + +#include <glib.h> + +/* + * For open() (currently only on Linux). + */ +#ifdef G_OS_UNIX +#include <sys/stat.h> +#include <fcntl.h> +#endif + +#include "sciteco.h" +#include "error.h" +#include "undo.h" +#include "memory.h" + +/** + * @file + * Memory measurement and limiting. + * + * A discussion of memory measurement techniques on Linux + * and UNIXoid operating systems is in order, since this + * problem turned out to be rather tricky. + * + * @par Size of the program break + * There is also the old-school technique of calculating the size + * of the program break, ie. the effective size of the DATA segment. + * This works under the assumption that all allocations are + * performed by extending the program break, as is __traditionally__ + * done by malloc() and friends. + * + * - Unfortunately, modern malloc() implementations sometimes + * mmap() memory, especially for large allocations. + * SciTECO mostly allocates small chunks. + * Unfortunately, some malloc implementations like jemalloc + * only claim memory using mmap(), thus rendering sbrk(0) + * useless. + * - Furthermore, some malloc-implementations like glibc will + * only shrink the program break when told so explicitly + * using malloc_trim(0). + * - The sbrk(0) method thus depends on implementation details + * of the libc. + * - However, this might be a suitable backend on old UNIX-platforms + * or a as a fallback for teco_memory_get_usage(). + * + * @par Resource limits + * UNIX has resource limits, which could be used to enforce + * the memory limit, but in case they are hit, malloc() + * will return NULL, so g_malloc() would abort(). + * Wrapping malloc() to work around that has the same + * problems described below. + * + * @par Hooking malloc() + * malloc_usable_size() could be used to count the memory + * consumption by updating a counter after every malloc(), + * realloc() and free(). + * malloc_usable_size() is libc-specific, but available at least in + * glibc and jemalloc (FreeBSD). Windows (MSVCRT) has `_msize()`. + * This would require overwriting or hooking all calls to + * malloc() and friends, though. + * For all other platforms, we'd have to rely on writing the + * heap object size into every heap object, thus wasting + * one word per heap object. + * + * - glibc has malloc hooks, but they are non-portable and + * deprecated. + * - It is possible to effectively wrap malloc() by overriding + * the libc's implementation, which will even work when + * statically linking in libc since malloc() is usually + * declared `weak`. + * This however does probably not work on all platforms and + * means you need to know the original function (pointers). + * It should work sufficiently when linking everything statically. + * - glibc exports symbols for the original malloc() implementation + * like `__libc_malloc()` that could be used for wrapping. + * This is undocumented and libc-specific, though. + * - The GNU ld --wrap option allows us to intercept calls, + * but obviously won't work for shared libraries. + * - The portable dlsym() could be used to look up the original + * library symbol, but it may and does call malloc functions, + * eg. calloc() on glibc. + * Some people work around this using bootstrap makeshift allocators + * used only during dlsym(). + * __In other words, there is no way to portably and reliably + * wrap malloc() and friends when linking dynamically.__ + * - Another difficulty is that, when free() is overridden, every + * function that can __independently__ allocate memory that + * can be passed to free() must also be overridden. + * This is impossible to know without making assumptions about the + * malloc implementation used. + * Otherwise the measurement is not precise and there can even + * be underruns. Thus we'd have to guard against underruns. + * - Unfortunately, it is undefined whether the "usable" size of + * a heap object can change unwittingly, ie. not by malloc() or + * realloc() on that same heap object, but for instance after a + * neighbouring heap object is freed. + * If this can happen, free() on that heap object might subtract + * more than was initially added for this heap object, resulting + * in measurement underruns. + * - malloc() and friends are MT-safe, so any replacement function + * would have to be MT-safe as well to avoid memory corruption. + * + * Memory counting using malloc_usable_size() in overwritten/wrapped + * malloc()/realloc()/free() calls has thus been deemed impractical. + * + * Overriding could only work if we store the allocated size + * at the beginning of each heap object and would link in an external + * malloc() implementation, so that the symbol names are known. + * + * Unfortunately, overwriting libc functions is also non-portable, + * so replacing the libc malloc with an external allocator is tricky. + * On Linux (and hopefully other UNIXes), you can simply link + * in the malloc replacement statically which will even let the + * dynamic linker pick the new implementation. + * On Windows however, we would apparently need incredibly hacky code + * to patch the symbol tables + * (see https://github.com/ned14/nedmalloc/blob/master/winpatcher.c). + * Alternatively, everything __including__ MSVCRT needs to be linked + * in statically. This is not supported by MinGW and would have certain + * disadvantages even if it worked. + * + * @par malloc() introspection + * glibc and some other platforms have mallinfo(). + * But at least on glibc it can get unbearably slow on programs + * with a lot of (virtual/resident) memory. + * Besides, mallinfo's API is broken on 64-bit systems, effectively + * limiting the enforcable memory limit to 4GB. + * Other glibc-specific introspection functions like malloc_info() + * can be even slower because of the syscalls required. + * + * - FreeBSD/jemalloc has mallctl("stats.allocated") which even when + * optimized is significantly slower than the current implementation + * but generally acceptable. + * - dlmalloc has malloc_footprint() which is very fast. + * It was therefore considered to simply import dlmalloc as the default + * allocator on (almost) all platforms. + * Despite problems overwriting malloc() globally on some platforms, + * this turned out to be impractical since malloc_footprint() includes + * only the mmapped memory and memory is not always unmapped even when + * calling malloc_trim(), so we couldn't recover after hitting + * the memory limit. + * - rpmalloc has a cheap rpmalloc_global_statistics() but enabling it + * comes with a memory overhead. + * - There seems to be no other malloc() replacement with a constant-time + * function returning the footprint. + * + * @par Instrumenting all of SciTECO's and C++ allocations. + * If we don't want to count each and every allocation in the system, + * we could also use custom allocators/deallocators together with + * malloc_usable_size(). + * For many objects, the size will also be known at free() time, so + * malloc_usable_size() can be avoided. + * + * - To track Scintilla's memory usage, custom C++ allocators/deallocators + * can be defined. + * - Beginning with C++14 (or earlier with -fsized-deallocation), + * it is possible to globally replace sized allocation/deallocation + * functions, which could be used to avoid the malloc_usable_size() + * workaround. Unfortunately, this may not be used for arrays, + * since the compiler may have to call non-sized variants if the + * original allocation size is unknown - and there is no way to detect + * that when the new[] call is made. + * What's worse is that at least G++ STL is broken seriously and + * some versions will call the non-sized delete() even when sized-deallocation + * is available. Again, this cannot be detected at new() time. + * Therefore, I had to remove the sized-deallocation based + * optimization. + * - This approach has the same disadvantages as wrapping malloc() because + * of the unreliability of malloc_usable_size(). + * Furthermore, all allocations by glib (eg. g_strdup()) will be missed. + * + * @par Directly measuring the resident memory size + * It is of course possible to query the program's RSS via OS APIs. + * This has long been avoided because it is naturally platform-dependant and + * some of the APIs have proven to be too slow for frequent polling. + * + * - Windows has GetProcessMemoryInfo() which is quite slow. + * When polled on a separate thread, the slow down is very acceptable. + * - OS X has task_info(). + * __Its performance is still untested!__ + * - FreeBSD has sysctl(). + * __Its performance is still untested!__ + * - Linux has no APIs but /proc/self/statm. + * Reading it is naturally very slow, but at least of constant time. + * When polled on a separate thread, the slow down is very acceptable. + * Also, use of malloc_trim() after hitting the memory limit is crucial + * since the RSS will otherwise not decrease. + * - Haiku has no usable constant-time API. + * + * @par Conclusion + * Every approach sucks and no platform supports everything. + * We therefore now opted for a combined strategy: + * Most platforms will by default try to replace malloc() with dlmalloc. + * The dlmalloc functions are wrapped and the memory usage is counted via + * malloc_usable_size() which in the case of dlmalloc should never change + * for one heap object unless we realloc() it. + * This should be fastest, the most precise and there is a guaranteed + * malloc_trim(). + * Malloc overriding can be disabled at compile time to aid in memory + * debugging. + * On Windows, we never even try to link in dlmalloc. + * If disabled, we try to directly measure memory consumption using + * OS APIs. + * Polling of the RSS takes place in a dedicated thread that is started + * on demand and paused whenever the main thread is idle (eg. waits for + * user input), so we don't waste cycles. + */ + +/** + * Current memory usage. + * Access must be synchronized using atomic operations. + */ +static gint teco_memory_usage = 0; + +/* + * NOTE: This implementation based on malloc_usable_size() might + * also work with other malloc libraries, given that they provide + * a malloc_usable_size() which does not change for a heap object + * (unless it is reallocated of course). + */ +#ifdef REPLACE_MALLOC + +void * __attribute__((used)) +malloc(size_t size) +{ + void *ptr = dlmalloc(size); + if (G_LIKELY(ptr != NULL)) + g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(ptr)); + return ptr; +} + +void __attribute__((used)) +free(void *ptr) +{ + if (!ptr) + return; + g_atomic_int_add(&teco_memory_usage, -dlmalloc_usable_size(ptr)); + dlfree(ptr); +} + +void * __attribute__((used)) +calloc(size_t nmemb, size_t size) +{ + void *ptr = dlcalloc(nmemb, size); + if (G_LIKELY(ptr != NULL)) + g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(ptr)); + return ptr; +} + +void * __attribute__((used)) +realloc(void *ptr, size_t size) +{ + if (ptr) + g_atomic_int_add(&teco_memory_usage, -dlmalloc_usable_size(ptr)); + ptr = dlrealloc(ptr, size); + if (G_LIKELY(ptr != NULL)) + g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(ptr)); + return ptr; +} + +void * __attribute__((used)) +memalign(size_t alignment, size_t size) +{ + void *ptr = dlmemalign(alignment, size); + if (G_LIKELY(ptr != NULL)) + g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(ptr)); + return ptr; +} + +void *aligned_alloc(size_t, size_t) __attribute__((used, alias("memalign"))); + +int __attribute__((used)) +posix_memalign(void **memptr, size_t alignment, size_t size) +{ + int ret = dlposix_memalign(memptr, alignment, size); + if (G_LIKELY(!ret)) + g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(*memptr)); + return ret; +} + +void * __attribute__((used)) +valloc(size_t size) +{ + void *ptr = dlvalloc(size); + if (G_LIKELY(ptr != NULL)) + g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(ptr)); + return ptr; +} + +/* + * The glibc manual claims we have to replace this function + * but we'd need sysconf(_SC_PAGESIZE) to implement it. + */ +void * __attribute__((used)) +pvalloc(size_t size) +{ + g_assert_not_reached(); + return NULL; +} + +size_t __attribute__((used)) +malloc_usable_size(void *ptr) +{ + return dlmalloc_usable_size(ptr); +} + +int __attribute__((used)) +malloc_trim(size_t pad) +{ + return dlmalloc_trim(pad); +} + +/* + * FIXME: Which platforms might need malloc_trim() to + * recover from hitting the memory limit? + * In other words, which platform's teco_memory_get_usage() + * might return a large value even if most memory has already + * been deallocated? + */ +#elif defined(G_OS_WIN32) + +/* + * On Windows, we never link in dlmalloc. + * + * NOTE: At least on Windows 2000, we run twice as fast than + * when polling from a dedicated thread. + */ +static gsize +teco_memory_get_usage(void) +{ + PROCESS_MEMORY_COUNTERS info; + + /* + * This __should__ not fail since the current process has + * PROCESS_ALL_ACCESS, but who knows... + * Since memory limiting cannot be turned off when this + * happens, we can just as well terminate abnormally. + */ + if (G_UNLIKELY(!GetProcessMemoryInfo(GetCurrentProcess(), + &info, sizeof(info)))) { + g_autofree gchar *msg = g_win32_error_message(GetLastError()); + g_error("Cannot get memory usage: %s", msg); + return 0; + } + + return info.WorkingSetSize; +} + +#define NEED_POLL_THREAD + +#elif defined(HAVE_TASK_INFO) + +/* + * Practically only for Mac OS X. + * + * FIXME: Benchmark whether polling in a thread really + * improves performances as much as on Linux. + * Is this even critical or can we link in dlmalloc? + */ +static gsize +teco_memory_get_usage(void) +{ + struct mach_task_basic_info info; + mach_msg_type_number_t info_count = MACH_TASK_BASIC_INFO_COUNT; + + if (G_UNLIKELY(task_info(mach_task_self(), MACH_TASK_BASIC_INFO, + (task_info_t)&info, &info_count) != KERN_SUCCESS)) + return 0; // FIXME + + return info.resident_size; +} + +#define NEED_POLL_THREAD + +#elif defined(G_OS_UNIX) && defined(HAVE_SYSCTL) + +/* + * Practically only for FreeBSD. + * + * FIXME: Is this even critical or can we link in dlmalloc? + */ +static gsize +teco_memory_get_usage(void) +{ + struct kinfo_proc procstk; + size_t len = sizeof(procstk); + int pidinfo[] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, getpid()}; + + sysctl(pidinfo, G_N_ELEMENTS(pidinfo), &procstk, &len, NULL, 0); + + return procstk.ki_rssize; // FIXME: Which unit? +} + +#define NEED_POLL_THREAD + +#elif defined(G_OS_UNIX) && defined(HAVE_SYSCONF) && defined(HAVE_PROCFS) + +#ifndef HAVE_MALLOC_TRIM +#warning malloc_trim() missing - Might not recover from hitting the memory limit! +#endif + +/* + * Mainly for Linux, but there might be other UNIXoids supporting procfs. + * This would be ridiculously slow if polled from the main thread. + * + * Since Linux supports dlmalloc(), this will usually not be required + * unless you disable it explicitly. + * + * NOTE: This is conciously avoiding glib and stdio APIs since we run in + * a very tight loop and should avoid any unnecessary allocations which could + * significantly slow down the main thread. + */ +static gsize +teco_memory_get_usage(void) +{ + static long page_size = 0; + + if (G_UNLIKELY(!page_size)) + page_size = sysconf(_SC_PAGESIZE); + + int fd = open("/proc/self/statm", O_RDONLY); + if (fd < 0) + /* procfs might not be mounted */ + return 0; + + gchar buf[256]; + ssize_t len = read(fd, buf, sizeof(buf)-1); + close(fd); + if (G_UNLIKELY(len < 0)) + return 0; + buf[len] = '\0'; + + gsize memory_usage = 0; + sscanf(buf, "%*u %" G_GSIZE_FORMAT, &memory_usage); + + return memory_usage * page_size; +} + +#define NEED_POLL_THREAD + +#else + +/* + * We've got neither dlmalloc, nor any particular OS backend. + */ +#warning dlmalloc is disabled and there is no memory counting backend - memory limiting will be unavailable! + +#endif + +#ifdef NEED_POLL_THREAD + +static GThread *teco_memory_thread = NULL; + +static enum { + TECO_MEMORY_STATE_ON, + TECO_MEMORY_STATE_OFF, + TECO_MEMORY_STATE_SHUTDOWN +} teco_memory_state = TECO_MEMORY_STATE_ON; + +static GMutex teco_memory_mutex; +static GCond teco_memory_cond; + +/* + * FIXME: What if we activated the thread only whenever the + * usage is queried in the main thread? + * This would automatically "clock" the threaded polling at the same rate + * as the main thread is polling. + * On the downside, the value of teco_memory_usage would be more outdated, + * so a memory overrun would be detected with even more delay. + */ +static gpointer +teco_memory_poll_thread_cb(gpointer data) +{ + g_mutex_lock(&teco_memory_mutex); + + for (;;) { + while (teco_memory_state == TECO_MEMORY_STATE_ON) { + g_mutex_unlock(&teco_memory_mutex); + /* + * NOTE: teco_memory_mutex is not used for teco_memory_usage + * since it is locked most of the time which would extremely slow + * down the main thread. + */ + g_atomic_int_set(&teco_memory_usage, teco_memory_get_usage()); + g_thread_yield(); + g_mutex_lock(&teco_memory_mutex); + } + if (G_UNLIKELY(teco_memory_state == TECO_MEMORY_STATE_SHUTDOWN)) + break; + + g_cond_wait(&teco_memory_cond, &teco_memory_mutex); + /* teco_memory_mutex is locked */ + } + + g_mutex_unlock(&teco_memory_mutex); + return NULL; +} + +void __attribute__((constructor)) +teco_memory_start_limiting(void) +{ + if (!teco_memory_limit) + return; + + /* + * FIXME: Setting a low thread priority would certainly help. + * This would be less important for platforms like Linux where + * we usually don't need a polling thread at all. + */ + if (G_UNLIKELY(!teco_memory_thread)) + teco_memory_thread = g_thread_new(NULL, teco_memory_poll_thread_cb, NULL); + + g_mutex_lock(&teco_memory_mutex); + teco_memory_state = TECO_MEMORY_STATE_ON; + g_cond_signal(&teco_memory_cond); + g_mutex_unlock(&teco_memory_mutex); +} + +void +teco_memory_stop_limiting(void) +{ + g_mutex_lock(&teco_memory_mutex); + teco_memory_state = TECO_MEMORY_STATE_OFF; + g_mutex_unlock(&teco_memory_mutex); +} + +#ifndef NDEBUG +static void __attribute__((destructor)) +teco_memory_cleanup(void) +{ + if (!teco_memory_thread) + return; + + g_mutex_lock(&teco_memory_mutex); + teco_memory_state = TECO_MEMORY_STATE_SHUTDOWN; + g_cond_signal(&teco_memory_cond); + g_mutex_unlock(&teco_memory_mutex); + + g_thread_join(teco_memory_thread); +} +#endif + +#else /* !NEED_POLL_THREAD */ + +void teco_memory_start_limiting(void) {} +void teco_memory_stop_limiting(void) {} + +#endif + +/** + * Memory limit in bytes (500mb by default, assuming SI units). + * 0 means no limiting. + */ +gsize teco_memory_limit = 500*1000*1000; + +gboolean +teco_memory_set_limit(gsize new_limit, GError **error) +{ + gsize memory_usage = g_atomic_int_get(&teco_memory_usage); + + if (G_UNLIKELY(new_limit && memory_usage > new_limit)) { + g_autofree gchar *usage_str = g_format_size(memory_usage); + g_autofree gchar *limit_str = g_format_size(new_limit); + + g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED, + "Cannot set undo memory limit (%s): " + "Current usage too large (%s).", + limit_str, usage_str); + return FALSE; + } + + teco_undo_gsize(teco_memory_limit) = new_limit; + + if (teco_memory_limit) + teco_memory_start_limiting(); + else + teco_memory_stop_limiting(); + + return TRUE; +} + +gboolean +teco_memory_check(GError **error) +{ + gsize memory_usage = g_atomic_int_get(&teco_memory_usage); + + if (G_UNLIKELY(teco_memory_limit && memory_usage > teco_memory_limit)) { + g_autofree gchar *limit_str = g_format_size(memory_usage); + + g_set_error(error, TECO_ERROR, TECO_ERROR_MEMLIMIT, + "Memory limit (%s) exceeded. See <EJ> command.", + limit_str); + return FALSE; + } + + return TRUE; +} |