diff --git a/doc/libpmemobj/pmemobj_ctl_get.3.md b/doc/libpmemobj/pmemobj_ctl_get.3.md index 803e4893cbad0a31f559ff93529554f90980cc7c..336d4e12a183a800f8375146f19b55da4de15b21 100644 --- a/doc/libpmemobj/pmemobj_ctl_get.3.md +++ b/doc/libpmemobj/pmemobj_ctl_get.3.md @@ -7,7 +7,7 @@ header: PMDK date: pmemobj API version 2.3 ... -[comment]: <> (Copyright 2017-2019, Intel Corporation) +[comment]: <> (Copyright 2017-2020, Intel Corporation) [comment]: <> (Redistribution and use in source and binary forms, with or without) [comment]: <> (modification, are permitted provided that the following conditions) @@ -328,21 +328,58 @@ naming in the application (e.g. when writing a library that uses libpmemobj). The required class identifier will be stored in the `class_id` field of the `struct pobj_alloc_class_desc`. -stats.enabled | rw | - | int | int | - | boolean +stats.enabled | rw | - | enum pobj_stats_enabled | enum pobj_stats_enabled | - | +string -Enables or disables runtime collection of statistics. Statistics are not -recalculated after enabling; any operations that occur between disabling and -re-enabling will not be reflected in subsequent values. +Enables or disables runtime collection of statistics. There are two types of +statistics: persistent and transient ones. Persistent statistics survive pool +restarts, whereas transient ones don't. Statistics are not recalculated after +enabling; any operations that occur between disabling and re-enabling will not +be reflected in subsequent values. -Statistics are disabled by default. Enabling them may have non-trivial -performance impact. +Only transient statistics are enabled by default. Enabling persistent statistics +may have non-trivial performance impact. -stats.heap.curr_allocated | r- | - | int | - | - | - +stats.heap.curr_allocated | r- | - | uint64_t | - | - | - Reads the number of bytes currently allocated in the heap. If statistics were disabled at any time in the lifetime of the heap, this value may be inaccurate. +This is a persistent statistic. + +stats.heap.run_allocated | r- | - | uint64_t | - | - | - + +Reads the number of bytes currently allocated using run-based allocation +classes, i.e., huge allocations are not accounted for in this statistic. +This is useful for comparison against stats.heap.run_active to estimate the +ratio between active and allocated memory. + +This is a transient statistic and is rebuilt every time the pool is opened. + +stats.heap.run_active | r- | - | uint64_t | - | - | - + +Reads the number of bytes currently occupied by all run memory blocks, including +both allocated and free space, i.e., this is all the all space that's not +occupied by huge allocations. + +This value is a sum of all allocated and free run memory. In systems where +memory is efficiently used, `run_active` should closely track +`run_allocated`, and the amount of active, but free, memory should be minimal. + +A large relative difference between active memory and allocated memory is +indicative of heap fragmentation. This information can be used to make +a decision to call **pmemobj_defrag()**(3) if the fragmentation looks to be high. + +However, for small heaps `run_active` might be disproportionately higher than +`run_allocated` because the allocator typically activates a significantly larger +amount of memory than is required to satisfy a single request in the +anticipation of future needs. For example, the first allocation of 100 bytes +in a heap will trigger activation of 256 kilobytes of space. + +This is a transient statistic and is rebuilt lazily every time the pool +is opened. + heap.size.granularity | rw- | - | uint64_t | uint64_t | - | long long Reads or modifies the granularity with which the heap grows when OOM. diff --git a/src/include/libpmemobj/ctl.h b/src/include/libpmemobj/ctl.h index 7d01f74f9ebc96c1bfd30dbf43e27a74f3a10963..e055cc484f9d7e785ae4e1ba04c9a8d831a486d4 100644 --- a/src/include/libpmemobj/ctl.h +++ b/src/include/libpmemobj/ctl.h @@ -1,5 +1,5 @@ /* - * Copyright 2017-2018, Intel Corporation + * Copyright 2017-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -166,6 +166,13 @@ struct pobj_alloc_class_desc { unsigned class_id; }; +enum pobj_stats_enabled { + POBJ_STATS_ENABLED_TRANSIENT, + POBJ_STATS_ENABLED_BOTH, + POBJ_STATS_ENABLED_PERSISTENT, + POBJ_STATS_DISABLED, +}; + #ifndef _WIN32 /* EXPERIMENTAL */ int pmemobj_ctl_get(PMEMobjpool *pop, const char *name, void *arg); diff --git a/src/libpmemobj/heap.c b/src/libpmemobj/heap.c index 3ac93a288ff577614dde44eaeb3179566ec06bec..1291095d24b27d8b78a35157e82e2af9425d4dbf 100644 --- a/src/libpmemobj/heap.c +++ b/src/libpmemobj/heap.c @@ -489,6 +489,9 @@ heap_run_create(struct palloc_heap *heap, struct bucket *b, return -1; } + STATS_INC(heap->stats, transient, heap_run_active, + m->size_idx * CHUNKSIZE); + return 0; } @@ -558,6 +561,9 @@ heap_run_into_free_chunk(struct palloc_heap *heap, m->block_off = 0; m->size_idx = hdr->size_idx; + STATS_SUB(heap->stats, transient, heap_run_active, + m->size_idx * CHUNKSIZE); + /* * The only thing this could race with is heap_memblock_on_free() * because that function is called after processing the operation, @@ -582,7 +588,7 @@ heap_run_into_free_chunk(struct palloc_heap *heap, * Returns 1 if reclaimed chunk, 0 otherwise. */ static int -heap_reclaim_run(struct palloc_heap *heap, struct memory_block *m) +heap_reclaim_run(struct palloc_heap *heap, struct memory_block *m, int startup) { struct chunk_run *run = heap_get_chunk_run(heap, m); struct chunk_header *hdr = heap_get_chunk_hdr(heap, m); @@ -605,6 +611,13 @@ heap_reclaim_run(struct palloc_heap *heap, struct memory_block *m) if (e.free_space == c->run.nallocs) return 1; + if (startup) { + STATS_INC(heap->stats, transient, heap_run_active, + m->size_idx * CHUNKSIZE); + STATS_INC(heap->stats, transient, heap_run_allocated, + c->run.nallocs - e.free_space); + } + if (recycler_put(heap->rt->recyclers[c->id], m, e) < 0) ERR("lost runtime tracking info of %u run due to OOM", c->id); @@ -634,10 +647,9 @@ heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, switch (hdr->type) { case CHUNK_TYPE_RUN: - if (heap_reclaim_run(heap, &m) != 0) { + if (heap_reclaim_run(heap, &m, 1) != 0) heap_run_into_free_chunk(heap, bucket, &m); - } break; case CHUNK_TYPE_FREE: heap_free_chunk_reuse(heap, bucket, &m); @@ -853,7 +865,7 @@ heap_reuse_from_recycler(struct palloc_heap *heap, void heap_discard_run(struct palloc_heap *heap, struct memory_block *m) { - if (heap_reclaim_run(heap, m)) { + if (heap_reclaim_run(heap, m, 0)) { struct bucket *defb = heap_bucket_acquire(heap, DEFAULT_ALLOC_CLASS_ID, 0); diff --git a/src/libpmemobj/palloc.c b/src/libpmemobj/palloc.c index aee3f0f10dfcdca9e2f376cd207dce01b98470ca..7f15fbeacff52841f7f83596decf286ca52b3cda 100644 --- a/src/libpmemobj/palloc.c +++ b/src/libpmemobj/palloc.c @@ -418,6 +418,10 @@ palloc_heap_action_on_process(struct palloc_heap *heap, if (act->new_state == MEMBLOCK_ALLOCATED) { STATS_INC(heap->stats, persistent, heap_curr_allocated, act->m.m_ops->get_real_size(&act->m)); + if (act->m.type == MEMORY_BLOCK_RUN) { + STATS_INC(heap->stats, transient, heap_run_allocated, + act->m.m_ops->get_real_size(&act->m)); + } } else if (act->new_state == MEMBLOCK_FREE) { if (On_valgrind) { void *ptr = act->m.m_ops->get_user_data(&act->m); @@ -442,6 +446,10 @@ palloc_heap_action_on_process(struct palloc_heap *heap, STATS_SUB(heap->stats, persistent, heap_curr_allocated, act->m.m_ops->get_real_size(&act->m)); + if (act->m.type == MEMORY_BLOCK_RUN) { + STATS_SUB(heap->stats, transient, heap_run_allocated, + act->m.m_ops->get_real_size(&act->m)); + } heap_memblock_on_free(heap, &act->m); } } @@ -945,9 +953,8 @@ palloc_defrag(struct palloc_heap *heap, uint64_t **objv, size_t objcnt, if (operation_reserve(ctx, entries_size) != 0) goto err; - palloc_publish(heap, - VEC_ARR(&actv), - VEC_SIZE(&actv), ctx); + palloc_publish(heap, VEC_ARR(&actv), VEC_SIZE(&actv), + ctx); operation_start(ctx); VEC_CLEAR(&actv); diff --git a/src/libpmemobj/stats.c b/src/libpmemobj/stats.c index 2441b38e0e4013afc32dabd0fba7c4573ba9e0e7..cfb34e57cf756963462b5c7861426d61f00410e8 100644 --- a/src/libpmemobj/stats.c +++ b/src/libpmemobj/stats.c @@ -39,8 +39,13 @@ STATS_CTL_HANDLER(persistent, curr_allocated, heap_curr_allocated); +STATS_CTL_HANDLER(transient, run_allocated, heap_run_allocated); +STATS_CTL_HANDLER(transient, run_active, heap_run_active); + static const struct ctl_node CTL_NODE(heap)[] = { STATS_CTL_LEAF(persistent, curr_allocated), + STATS_CTL_LEAF(transient, run_allocated), + STATS_CTL_LEAF(transient, run_active), CTL_NODE_END }; @@ -55,9 +60,43 @@ CTL_READ_HANDLER(enabled)(void *ctx, { PMEMobjpool *pop = ctx; - int *arg_out = arg; + enum pobj_stats_enabled *arg_out = arg; - *arg_out = pop->stats->enabled > 0; + *arg_out = pop->stats->enabled; + + return 0; +} + +/* + * stats_enabled_parser -- parses the stats enabled type + */ +static int +stats_enabled_parser(const void *arg, void *dest, size_t dest_size) +{ + const char *vstr = arg; + enum pobj_stats_enabled *enabled = dest; + ASSERTeq(dest_size, sizeof(enum pobj_stats_enabled)); + + int bool_out; + if (ctl_arg_boolean(arg, &bool_out, sizeof(bool_out)) == 0) { + *enabled = bool_out ? + POBJ_STATS_ENABLED_BOTH : POBJ_STATS_DISABLED; + return 0; + } + + if (strcmp(vstr, "disabled") == 0) { + *enabled = POBJ_STATS_DISABLED; + } else if (strcmp(vstr, "both") == 0) { + *enabled = POBJ_STATS_ENABLED_BOTH; + } else if (strcmp(vstr, "persistent") == 0) { + *enabled = POBJ_STATS_ENABLED_PERSISTENT; + } else if (strcmp(vstr, "transient") == 0) { + *enabled = POBJ_STATS_ENABLED_TRANSIENT; + } else { + ERR("invalid enable type"); + errno = EINVAL; + return -1; + } return 0; } @@ -72,14 +111,19 @@ CTL_WRITE_HANDLER(enabled)(void *ctx, { PMEMobjpool *pop = ctx; - int arg_in = *(int *)arg; - - pop->stats->enabled = arg_in > 0; + pop->stats->enabled = *(enum pobj_stats_enabled *)arg; return 0; } -static const struct ctl_argument CTL_ARG(enabled) = CTL_ARG_BOOLEAN; +static const struct ctl_argument CTL_ARG(enabled) = { + .dest_size = sizeof(enum pobj_stats_enabled), + .parsers = { + CTL_ARG_PARSER(sizeof(enum pobj_stats_enabled), + stats_enabled_parser), + CTL_ARG_PARSER_END + } +}; static const struct ctl_node CTL_NODE(stats)[] = { CTL_CHILD(heap), @@ -100,7 +144,7 @@ stats_new(PMEMobjpool *pop) return NULL; } - s->enabled = 0; + s->enabled = POBJ_STATS_ENABLED_TRANSIENT; s->persistent = &pop->stats_persistent; VALGRIND_ADD_TO_GLOBAL_TX_IGNORE(s->persistent, sizeof(*s->persistent)); s->transient = Zalloc(sizeof(struct stats_transient)); diff --git a/src/libpmemobj/stats.h b/src/libpmemobj/stats.h index 02fe55e1ef3a42360ebdff64c7c85d41f85d971f..e3390be057ea8d5678564290c2da1e7b159e3722 100644 --- a/src/libpmemobj/stats.h +++ b/src/libpmemobj/stats.h @@ -1,5 +1,5 @@ /* - * Copyright 2017-2018, Intel Corporation + * Copyright 2017-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -38,13 +38,15 @@ #define LIBPMEMOBJ_STATS_H 1 #include "ctl.h" +#include "libpmemobj/ctl.h" #ifdef __cplusplus extern "C" { #endif struct stats_transient { - int unused; + uint64_t heap_run_allocated; + uint64_t heap_run_active; }; struct stats_persistent { @@ -52,25 +54,59 @@ struct stats_persistent { }; struct stats { - int enabled; + enum pobj_stats_enabled enabled; struct stats_transient *transient; struct stats_persistent *persistent; }; #define STATS_INC(stats, type, name, value) do {\ - if ((stats)->enabled)\ - util_fetch_and_add64((&(stats)->type->name), (value));\ + STATS_INC_##type(stats, name, value);\ +} while (0) + +#define STATS_INC_transient(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_TRANSIENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_fetch_and_add64((&(stats)->transient->name), (value));\ +} while (0) + +#define STATS_INC_persistent(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_PERSISTENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_fetch_and_add64((&(stats)->persistent->name), (value));\ } while (0) #define STATS_SUB(stats, type, name, value) do {\ - if ((stats)->enabled)\ - util_fetch_and_sub64((&(stats)->type->name), (value));\ + STATS_SUB_##type(stats, name, value);\ +} while (0) + +#define STATS_SUB_transient(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_TRANSIENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_fetch_and_sub64((&(stats)->transient->name), (value));\ +} while (0) + +#define STATS_SUB_persistent(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_PERSISTENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_fetch_and_sub64((&(stats)->persistent->name), (value));\ } while (0) #define STATS_SET(stats, type, name, value) do {\ - if ((stats)->enabled)\ - util_atomic_store_explicit64((&(stats)->type->name), (value),\ - memory_order_release);\ + STATS_SET_##type(stats, name, value);\ +} while (0) + +#define STATS_SET_transient(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_TRANSIENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_atomic_store_explicit64((&(stats)->transient->name),\ + (value), memory_order_release);\ +} while (0) + +#define STATS_SET_persistent(stats, name, value) do {\ + if ((stats)->enabled == POBJ_STATS_ENABLED_PERSISTENT ||\ + (stats)->enabled == POBJ_STATS_ENABLED_BOTH)\ + util_atomic_store_explicit64((&(stats)->persistent->name),\ + (value), memory_order_release);\ } while (0) #define STATS_CTL_LEAF(type, name)\ diff --git a/src/test/obj_ctl_stats/obj_ctl_stats.c b/src/test/obj_ctl_stats/obj_ctl_stats.c index b25e790337b2c5124b0c455a8d2c7d167c9c8a08..7e443a190850adbe97e99df9d0bc410d55dbb95f 100644 --- a/src/test/obj_ctl_stats/obj_ctl_stats.c +++ b/src/test/obj_ctl_stats/obj_ctl_stats.c @@ -76,12 +76,21 @@ main(int argc, char *argv[]) UT_ASSERTeq(ret, 0); UT_ASSERTeq(allocated, oid_size); + size_t run_allocated = 0; + ret = pmemobj_ctl_get(pop, "stats.heap.run_allocated", &run_allocated); + UT_ASSERTeq(ret, 0); + UT_ASSERT(run_allocated /* 2 allocs */ > allocated /* 1 alloc */); + pmemobj_free(&oid); ret = pmemobj_ctl_get(pop, "stats.heap.curr_allocated", &allocated); UT_ASSERTeq(ret, 0); UT_ASSERTeq(allocated, 0); + ret = pmemobj_ctl_get(pop, "stats.heap.run_allocated", &run_allocated); + UT_ASSERTeq(ret, 0); + UT_ASSERT(run_allocated /* 2 allocs */ > allocated /* 1 alloc */); + TX_BEGIN(pop) { oid = pmemobj_tx_alloc(1, 0); } TX_ONABORT { @@ -92,6 +101,27 @@ main(int argc, char *argv[]) UT_ASSERTeq(ret, 0); UT_ASSERTeq(allocated, oid_size); + enum pobj_stats_enabled enum_enabled; + ret = pmemobj_ctl_get(pop, "stats.enabled", &enum_enabled); + UT_ASSERTeq(enabled, POBJ_STATS_ENABLED_BOTH); + UT_ASSERTeq(ret, 0); + + run_allocated = 0; + ret = pmemobj_ctl_get(pop, "stats.heap.run_allocated", &run_allocated); + UT_ASSERTeq(ret, 0); + + enum_enabled = POBJ_STATS_ENABLED_PERSISTENT; /* transient disabled */ + ret = pmemobj_ctl_set(pop, "stats.enabled", &enum_enabled); + UT_ASSERTeq(ret, 0); + + ret = pmemobj_alloc(pop, &oid, 1, 0, NULL, NULL); + UT_ASSERTeq(ret, 0); + + size_t tmp = 0; + ret = pmemobj_ctl_get(pop, "stats.heap.run_allocated", &tmp); + UT_ASSERTeq(ret, 0); + UT_ASSERTeq(tmp, run_allocated); /* shouldn't change */ + pmemobj_close(pop); DONE(NULL); diff --git a/src/test/obj_fragmentation2/obj_fragmentation2.c b/src/test/obj_fragmentation2/obj_fragmentation2.c index 90ae43be883b225031dfb5b58fceb0c94bd8d686..809e08bc193f4fad895888a9c25246d6d799751a 100644 --- a/src/test/obj_fragmentation2/obj_fragmentation2.c +++ b/src/test/obj_fragmentation2/obj_fragmentation2.c @@ -199,6 +199,15 @@ static float workloads_defrag_target[] = { 0.01f, 0.01f, 0.01f, 0.01f, 0.01f, 0.056f, 0.1f, 0.13f, 0.01f }; +/* last workload operates only on huge chunks, so run stats are useless */ +static float workloads_stat_target[] = { + 0.01f, 1.1f, 1.1f, 0.86f, 0.76f, 1.01f, 0.23f, 1.24f, 2100.f +}; + +static float workloads_defrag_stat_target[] = { + 0.01f, 0.01f, 0.01f, 0.02f, 0.02f, 0.04f, 0.08f, 0.12f, 2100.f +}; + int main(int argc, char *argv[]) { @@ -229,6 +238,16 @@ main(int argc, char *argv[]) workloads[w](pop); + /* this is to trigger global recycling */ + pmemobj_defrag(pop, NULL, 0, NULL); + + size_t active = 0; + size_t allocated = 0; + pmemobj_ctl_get(pop, "stats.heap.run_active", &active); + pmemobj_ctl_get(pop, "stats.heap.run_allocated", &allocated); + float stat_frag = ((float)active / allocated) - 1.f; + UT_ASSERT(stat_frag <= workloads_stat_target[w]); + if (defrag) { PMEMoid **objectsf = ZALLOC(sizeof(PMEMoid) * nobjects); for (size_t i = 0; i < nobjects; ++i) @@ -237,6 +256,17 @@ main(int argc, char *argv[]) pmemobj_defrag(pop, objectsf, nobjects, NULL); FREE(objectsf); + + active = 0; + allocated = 0; + + /* this is to trigger global recycling */ + pmemobj_defrag(pop, NULL, 0, NULL); + + pmemobj_ctl_get(pop, "stats.heap.run_active", &active); + pmemobj_ctl_get(pop, "stats.heap.run_allocated", &allocated); + stat_frag = ((float)active / allocated) - 1.f; + UT_ASSERT(stat_frag <= workloads_defrag_stat_target[w]); } PMEMoid oid;