/*
* Copyright (C) 2012-2025 Robin Haberkorn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#define USE_DL_PREFIX /* for dlmalloc */
#include
#include
#include
#ifdef HAVE_MALLOC_H
#include
#endif
#ifdef HAVE_MALLOC_NP_H
#include
#endif
#ifdef HAVE_WINDOWS_H
#define WIN32_LEAN_AND_MEAN
#include
#include
#endif
/*
* For task_info() on OS X.
*/
#ifdef HAVE_MACH_MACH_H
#include
#endif
#ifdef HAVE_MACH_MESSAGE_H
#include
#endif
#ifdef HAVE_MACH_KERN_RETURN_H
#include
#endif
#ifdef HAVE_MACH_TASK_INFO_H
#include
#endif
/*
* For sysctl() on FreeBSD.
*/
#ifdef HAVE_SYS_TYPES_H
#include
#endif
#ifdef HAVE_SYS_USER_H
#include
#endif
#ifdef HAVE_SYS_SYSCTL_H
#include
#endif
/*
* For sysconf() on Linux.
*/
#ifdef HAVE_SYS_TIME_H
#include
#endif
#ifdef HAVE_SYS_RESOURCE_H
#include
#endif
#include
/*
* For open() (currently only on Linux).
*/
#ifdef G_OS_UNIX
#include
#include
#endif
#include "sciteco.h"
#include "error.h"
#include "undo.h"
#include "memory.h"
/**
* @file
* Memory measurement and limiting.
*
* A discussion of memory measurement techniques on Linux
* and UNIXoid operating systems is in order, since this
* problem turned out to be rather tricky.
*
* @par Size of the program break
* There is also the old-school technique of calculating the size
* of the program break, i.e. the effective size of the DATA segment.
* This works under the assumption that all allocations are
* performed by extending the program break, as is __traditionally__
* done by malloc() and friends.
*
* - Unfortunately, modern malloc() implementations sometimes
* mmap() memory, especially for large allocations.
* SciTECO mostly allocates small chunks.
* Unfortunately, some malloc implementations like jemalloc
* only claim memory using mmap(), thus rendering sbrk(0)
* useless.
* - Furthermore, some malloc-implementations like glibc will
* only shrink the program break when told so explicitly
* using malloc_trim(0).
* - The sbrk(0) method thus depends on implementation details
* of the libc.
* - However, this might be a suitable backend on old UNIX-platforms
* or a as a fallback for teco_memory_get_usage().
*
* @par Resource limits
* UNIX has resource limits, which could be used to enforce
* the memory limit, but in case they are hit, malloc()
* will return NULL, so g_malloc() would abort().
* Wrapping malloc() to work around that has the same
* problems described below.
*
* @par Hooking malloc()
* malloc_usable_size() could be used to count the memory
* consumption by updating a counter after every malloc(),
* realloc() and free().
* malloc_usable_size() is libc-specific, but available at least in
* glibc and jemalloc (FreeBSD). Windows (MSVCRT) has `_msize()`.
* This would require overwriting or hooking all calls to
* malloc() and friends, though.
* For all other platforms, we'd have to rely on writing the
* heap object size into every heap object, thus wasting
* one word per heap object.
*
* - glibc has malloc hooks, but they are non-portable and
* deprecated.
* - It is possible to effectively wrap malloc() by overriding
* the libc's implementation, which will even work when
* statically linking in libc since malloc() is usually
* declared `weak`.
* This however does probably not work on all platforms and
* means you need to know the original function (pointers).
* It should work sufficiently when linking everything statically.
* - glibc exports symbols for the original malloc() implementation
* like `__libc_malloc()` that could be used for wrapping.
* This is undocumented and libc-specific, though.
* - The GNU ld --wrap option allows us to intercept calls,
* but obviously won't work for shared libraries.
* - The portable dlsym() could be used to look up the original
* library symbol, but it may and does call malloc functions,
* e.g. calloc() on glibc.
* Some people work around this using bootstrap makeshift allocators
* used only during dlsym().
* __In other words, there is no way to portably and reliably
* wrap malloc() and friends when linking dynamically.__
* - Another difficulty is that, when free() is overridden, every
* function that can __independently__ allocate memory that
* can be passed to free() must also be overridden.
* This is impossible to know without making assumptions about the
* malloc implementation used.
* Otherwise the measurement is not precise and there can even
* be underruns. Thus we'd have to guard against underruns.
* - Unfortunately, it is undefined whether the "usable" size of
* a heap object can change unwittingly, i.e. not by malloc() or
* realloc() on that same heap object, but for instance after a
* neighbouring heap object is freed.
* If this can happen, free() on that heap object might subtract
* more than was initially added for this heap object, resulting
* in measurement underruns.
* - malloc() and friends are MT-safe, so any replacement function
* would have to be MT-safe as well to avoid memory corruption.
*
* Memory counting using malloc_usable_size() in overwritten/wrapped
* malloc()/realloc()/free() calls has thus been deemed impractical.
*
* Overriding could only work if we store the allocated size
* at the beginning of each heap object and would link in an external
* malloc() implementation, so that the symbol names are known.
*
* Unfortunately, overwriting libc functions is also non-portable,
* so replacing the libc malloc with an external allocator is tricky.
* On Linux (and hopefully other UNIXes), you can simply link
* in the malloc replacement statically which will even let the
* dynamic linker pick the new implementation.
* On Windows however, we would apparently need incredibly hacky code
* to patch the symbol tables
* (see https://github.com/ned14/nedmalloc/blob/master/winpatcher.c).
* Alternatively, everything __including__ MSVCRT needs to be linked
* in statically. This is not supported by MinGW and would have certain
* disadvantages even if it worked.
*
* @par malloc() introspection
* glibc and some other platforms have mallinfo().
* But at least on glibc it can get unbearably slow on programs
* with a lot of (virtual/resident) memory.
* Besides, mallinfo's API is broken on 64-bit systems, effectively
* limiting the enforcable memory limit to 4GB.
* Other glibc-specific introspection functions like malloc_info()
* can be even slower because of the syscalls required.
*
* - FreeBSD/jemalloc has mallctl("stats.allocated") which even when
* optimized is significantly slower than the current implementation
* but generally acceptable.
* - dlmalloc has malloc_footprint() which is very fast.
* It was therefore considered to simply import dlmalloc as the default
* allocator on (almost) all platforms.
* Despite problems overwriting malloc() globally on some platforms,
* this turned out to be impractical since malloc_footprint() includes
* only the mmapped memory and memory is not always unmapped even when
* calling malloc_trim(), so we couldn't recover after hitting
* the memory limit.
* - rpmalloc has a cheap rpmalloc_global_statistics() but enabling it
* comes with a memory overhead.
* - There seems to be no other malloc() replacement with a constant-time
* function returning the footprint.
*
* @par Instrumenting all of SciTECO's and C++ allocations.
* If we don't want to count each and every allocation in the system,
* we could also use custom allocators/deallocators together with
* malloc_usable_size().
* For many objects, the size will also be known at free() time, so
* malloc_usable_size() can be avoided.
*
* - To track Scintilla's memory usage, custom C++ allocators/deallocators
* can be defined.
* - Beginning with C++14 (or earlier with -fsized-deallocation),
* it is possible to globally replace sized allocation/deallocation
* functions, which could be used to avoid the malloc_usable_size()
* workaround. Unfortunately, this may not be used for arrays,
* since the compiler may have to call non-sized variants if the
* original allocation size is unknown - and there is no way to detect
* that when the new[] call is made.
* What's worse is that at least G++ STL is broken seriously and
* some versions will call the non-sized delete() even when sized-deallocation
* is available. Again, this cannot be detected at new() time.
* Therefore, I had to remove the sized-deallocation based
* optimization.
* - This approach has the same disadvantages as wrapping malloc() because
* of the unreliability of malloc_usable_size().
* Furthermore, all allocations by glib (e.g. g_strdup()) will be missed.
*
* @par Directly measuring the resident memory size
* It is of course possible to query the program's RSS via OS APIs.
* This has long been avoided because it is naturally platform-dependant and
* some of the APIs have proven to be too slow for frequent polling.
*
* - Windows has GetProcessMemoryInfo() which is quite slow.
* When polled on a separate thread, the slow down is very acceptable.
* - OS X has task_info().
* __Its performance is still untested!__
* - FreeBSD has sysctl().
* __Its performance is still untested!__
* - Linux has no APIs but /proc/self/statm.
* Reading it is naturally very slow, but at least of constant time.
* When polled on a separate thread, the slow down is very acceptable.
* Also, use of malloc_trim() after hitting the memory limit is crucial
* since the RSS will otherwise not decrease.
* - Haiku has no usable constant-time API.
*
* @par Conclusion
* Every approach sucks and no platform supports everything.
* We therefore now opted for a combined strategy:
* Most platforms will by default try to replace malloc() with dlmalloc.
* The dlmalloc functions are wrapped and the memory usage is counted via
* malloc_usable_size() which in the case of dlmalloc should never change
* for one heap object unless we realloc() it.
* This should be fastest, the most precise and there is a guaranteed
* malloc_trim().
* Malloc overriding can be disabled at compile time to aid in memory
* debugging.
* On Windows and Mac OS, we never even try to link in dlmalloc.
* If disabled, we try to directly measure memory consumption using
* OS APIs.
* Polling of the RSS takes place in a dedicated thread that is started
* on demand and paused whenever the main thread is idle (e.g. waits for
* user input), so we don't waste cycles.
*/
/**
* Current memory usage.
* Access must be synchronized using atomic operations.
*/
static guint teco_memory_usage = 0;
/*
* NOTE: This implementation based on malloc_usable_size() might
* also work with other malloc libraries, given that they provide
* a malloc_usable_size() which does not change for a heap object
* (unless it is reallocated of course).
*/
#ifdef REPLACE_MALLOC
void * __attribute__((used))
malloc(size_t size)
{
void *ptr = dlmalloc(size);
if (G_LIKELY(ptr != NULL))
g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(ptr));
return ptr;
}
void __attribute__((used))
free(void *ptr)
{
if (!ptr)
return;
g_atomic_int_add(&teco_memory_usage, -dlmalloc_usable_size(ptr));
dlfree(ptr);
}
void * __attribute__((used))
calloc(size_t nmemb, size_t size)
{
void *ptr = dlcalloc(nmemb, size);
if (G_LIKELY(ptr != NULL))
g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(ptr));
return ptr;
}
void * __attribute__((used))
realloc(void *ptr, size_t size)
{
if (ptr)
g_atomic_int_add(&teco_memory_usage, -dlmalloc_usable_size(ptr));
ptr = dlrealloc(ptr, size);
if (G_LIKELY(ptr != NULL))
g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(ptr));
return ptr;
}
void * __attribute__((used))
memalign(size_t alignment, size_t size)
{
void *ptr = dlmemalign(alignment, size);
if (G_LIKELY(ptr != NULL))
g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(ptr));
return ptr;
}
void * __attribute__((used))
aligned_alloc(size_t alignment, size_t size)
{
return memalign(alignment, size);
}
int __attribute__((used))
posix_memalign(void **memptr, size_t alignment, size_t size)
{
int ret = dlposix_memalign(memptr, alignment, size);
if (G_LIKELY(!ret))
g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(*memptr));
return ret;
}
void * __attribute__((used))
valloc(size_t size)
{
void *ptr = dlvalloc(size);
if (G_LIKELY(ptr != NULL))
g_atomic_int_add(&teco_memory_usage, dlmalloc_usable_size(ptr));
return ptr;
}
/*
* The glibc manual claims we have to replace this function
* but we'd need sysconf(_SC_PAGESIZE) to implement it.
*/
void * __attribute__((used))
pvalloc(size_t size)
{
g_assert_not_reached();
return NULL;
}
size_t __attribute__((used))
malloc_usable_size(void *ptr)
{
return dlmalloc_usable_size(ptr);
}
int __attribute__((used))
malloc_trim(size_t pad)
{
return dlmalloc_trim(pad);
}
/*
* FIXME: Which platforms might need malloc_trim() to
* recover from hitting the memory limit?
* In other words, which platform's teco_memory_get_usage()
* might return a large value even if most memory has already
* been deallocated?
*/
#elif defined(G_OS_WIN32)
/*
* On Windows, we never link in dlmalloc.
*
* NOTE: At least on Windows 2000, we run twice as fast than
* when polling from a dedicated thread.
*
* NOTE: On Wine, we cannot recover from hitting the memory
* limit, but this is probably because malloc() is actually
* forwarded to the glibc which needs malloc_trim().
* This should not be a problem on real Windows.
*/
static gsize
teco_memory_get_usage(void)
{
PROCESS_MEMORY_COUNTERS info;
/*
* This __should__ not fail since the current process has
* PROCESS_ALL_ACCESS, but who knows...
* Since memory limiting cannot be turned off when this
* happens, we can just as well terminate abnormally.
*/
if (G_UNLIKELY(!GetProcessMemoryInfo(GetCurrentProcess(),
&info, sizeof(info)))) {
g_autofree gchar *msg = g_win32_error_message(GetLastError());
g_error("Cannot get memory usage: %s", msg);
return 0;
}
return info.WorkingSetSize;
}
#define NEED_POLL_THREAD
#elif defined(HAVE_TASK_INFO)
/*
* Practically only for Mac OS X.
*
* NOTE: Running in a dedicated polling thread does indeed
* improve our performance significantly.
*
* FIXME: We cannot simply overwrite weak malloc() functions
* like on Linux since this does not affect shared libraries
* unless $DYLD_FORCE_FLAT_NAMESPACE is set.
* It should be possible to change the default malloc zone, though.
* First experiments have been unsuccessful.
* But see https://github.com/gperftools/gperftools/blob/master/src/libc_override_osx.h
* https://chromium.googlesource.com/chromium/src/base/+/refs/heads/main/allocator/allocator_interception_mac.mm
*/
static gsize
teco_memory_get_usage(void)
{
struct mach_task_basic_info info;
mach_msg_type_number_t info_count = MACH_TASK_BASIC_INFO_COUNT;
if (G_UNLIKELY(task_info(mach_task_self(), MACH_TASK_BASIC_INFO,
(task_info_t)&info, &info_count) != KERN_SUCCESS))
return 0;
return info.resident_size;
}
#define NEED_POLL_THREAD
#elif defined(G_OS_UNIX) && defined(HAVE_SYSCONF) && defined(HAVE_SYSCTL)
/*
* Practically only for FreeBSD.
*
* The malloc replacement via dlmalloc also works on FreeBSD,
* but this implementation has been benchmarked to be up to 4 times faster
* (but only if we poll in a separate thread).
* On the downside, this will of course be less precise.
*/
static gsize
teco_memory_get_usage(void)
{
static long page_size = 0;
if (G_UNLIKELY(!page_size))
page_size = sysconf(_SC_PAGESIZE);
struct kinfo_proc procstk;
size_t len = sizeof(procstk);
int pidinfo[] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, getpid()};
if (G_UNLIKELY(sysctl(pidinfo, G_N_ELEMENTS(pidinfo),
&procstk, &len, NULL, 0) < 0))
return 0;
return procstk.ki_rssize * page_size;
}
/**
* Options passed to jemalloc.
*
* It's crucial to disable opt.retain, so that freeing memory after
* recovering from memory limit hits actually decreases the RSS.
* The reasons for activating the option, mentioned in jemalloc(3),
* shouldn't be relevant on FreeBSD.
*/
const gchar *malloc_conf = "retain:false";
#define NEED_POLL_THREAD
#elif defined(G_OS_UNIX) && defined(HAVE_SYSCONF) && defined(HAVE_PROCFS)
#ifndef HAVE_MALLOC_TRIM
#warning malloc_trim() missing - Might not recover from hitting the memory limit!
#endif
/*
* Mainly for Linux, but there might be other UNIXoids supporting procfs.
* This would be ridiculously slow if polled from the main thread.
*
* Since Linux supports dlmalloc(), this will usually not be required
* unless you disable it explicitly.
*
* NOTE: This is conciously avoiding glib and stdio APIs since we run in
* a very tight loop and should avoid any unnecessary allocations which could
* significantly slow down the main thread.
*/
static gsize
teco_memory_get_usage(void)
{
static long page_size = 0;
if (G_UNLIKELY(!page_size))
page_size = sysconf(_SC_PAGESIZE);
int fd = open("/proc/self/statm", O_RDONLY);
if (fd < 0)
/* procfs might not be mounted */
return 0;
gchar buf[256];
ssize_t len = read(fd, buf, sizeof(buf)-1);
close(fd);
if (G_UNLIKELY(len < 0))
return 0;
buf[len] = '\0';
gsize memory_usage = 0;
sscanf(buf, "%*u %" G_GSIZE_FORMAT, &memory_usage);
return memory_usage * page_size;
}
#define NEED_POLL_THREAD
#else
/*
* We've got neither dlmalloc, nor any particular OS backend.
*/
#warning dlmalloc is disabled and there is no memory counting backend - memory limiting will be unavailable!
#endif
#ifdef NEED_POLL_THREAD
static GThread *teco_memory_thread = NULL;
static enum {
TECO_MEMORY_STATE_ON,
TECO_MEMORY_STATE_OFF,
TECO_MEMORY_STATE_SHUTDOWN
} teco_memory_state = TECO_MEMORY_STATE_ON;
static GMutex teco_memory_mutex;
static GCond teco_memory_cond;
/*
* FIXME: What if we activated the thread only whenever the
* usage is queried in the main thread?
* This would automatically "clock" the threaded polling at the same rate
* as the main thread is polling.
* On the downside, the value of teco_memory_usage would be more outdated,
* so a memory overrun would be detected with even more delay.
*/
static gpointer
teco_memory_poll_thread_cb(gpointer data)
{
g_mutex_lock(&teco_memory_mutex);
for (;;) {
while (teco_memory_state == TECO_MEMORY_STATE_ON) {
g_mutex_unlock(&teco_memory_mutex);
/*
* NOTE: teco_memory_mutex is not used for teco_memory_usage
* since it is locked most of the time which would extremely slow
* down the main thread.
*/
g_atomic_int_set(&teco_memory_usage, teco_memory_get_usage());
g_thread_yield();
g_mutex_lock(&teco_memory_mutex);
}
if (G_UNLIKELY(teco_memory_state == TECO_MEMORY_STATE_SHUTDOWN))
break;
g_cond_wait(&teco_memory_cond, &teco_memory_mutex);
/* teco_memory_mutex is locked */
}
g_mutex_unlock(&teco_memory_mutex);
return NULL;
}
void __attribute__((constructor))
teco_memory_start_limiting(void)
{
if (!teco_memory_limit)
return;
/*
* FIXME: Setting a low thread priority would certainly help.
* This would be less important for platforms like Linux where
* we usually don't need a polling thread at all.
*/
if (G_UNLIKELY(!teco_memory_thread))
teco_memory_thread = g_thread_new(NULL, teco_memory_poll_thread_cb, NULL);
g_mutex_lock(&teco_memory_mutex);
teco_memory_state = TECO_MEMORY_STATE_ON;
g_cond_signal(&teco_memory_cond);
g_mutex_unlock(&teco_memory_mutex);
}
void
teco_memory_stop_limiting(void)
{
g_mutex_lock(&teco_memory_mutex);
teco_memory_state = TECO_MEMORY_STATE_OFF;
g_mutex_unlock(&teco_memory_mutex);
}
static void TECO_DEBUG_CLEANUP
teco_memory_cleanup(void)
{
if (!teco_memory_thread)
return;
g_mutex_lock(&teco_memory_mutex);
teco_memory_state = TECO_MEMORY_STATE_SHUTDOWN;
g_cond_signal(&teco_memory_cond);
g_mutex_unlock(&teco_memory_mutex);
g_thread_join(teco_memory_thread);
}
#else /* !NEED_POLL_THREAD */
void teco_memory_start_limiting(void) {}
void teco_memory_stop_limiting(void) {}
#endif
/**
* Memory limit in bytes (500mb by default, assuming SI units).
* 0 means no limiting.
*/
gsize teco_memory_limit = 500*1000*1000;
gboolean
teco_memory_set_limit(gsize new_limit, GError **error)
{
gsize memory_usage = (guint)g_atomic_int_get(&teco_memory_usage);
if (G_UNLIKELY(new_limit && memory_usage > new_limit)) {
g_autofree gchar *usage_str = g_format_size(memory_usage);
g_autofree gchar *limit_str = g_format_size(new_limit);
g_set_error(error, TECO_ERROR, TECO_ERROR_FAILED,
"Cannot set undo memory limit (%s): "
"Current usage too large (%s).",
limit_str, usage_str);
return FALSE;
}
teco_undo_gsize(teco_memory_limit) = new_limit;
if (teco_memory_limit)
teco_memory_start_limiting();
else
teco_memory_stop_limiting();
return TRUE;
}
/**
* Check whether the memory limit is exceeded or would be
* exceeded by an allocation.
*
* @param request Size of the requested allocation or 0 if
* you want to check the current memory usage.
*/
gboolean
teco_memory_check(gsize request, GError **error)
{
gsize memory_usage = (guint)g_atomic_int_get(&teco_memory_usage);
gsize requested_memory_usage = memory_usage+request;
/*
* Check for overflows.
* NOTE: Glib 2.48 has g_size_checked_add().
*/
if (G_UNLIKELY(requested_memory_usage < memory_usage))
/* guaranteed to fail if memory limiting is enabled */
requested_memory_usage = G_MAXSIZE;
if (G_UNLIKELY(teco_memory_limit && requested_memory_usage >= teco_memory_limit)) {
g_autofree gchar *limit_str = g_format_size(requested_memory_usage);
g_set_error(error, TECO_ERROR, TECO_ERROR_MEMLIMIT,
"Memory limit (%s) exceeded. See command.",
limit_str);
return FALSE;
}
return TRUE;
}