diff --git a/doc/libpmemobj/pmemobj_ctl_get.3.md b/doc/libpmemobj/pmemobj_ctl_get.3.md index 803e4893cbad0a31f559ff93529554f90980cc7c..fa374def8aa5a6f237dda3dc3e21d2ca2092674f 100644 --- a/doc/libpmemobj/pmemobj_ctl_get.3.md +++ b/doc/libpmemobj/pmemobj_ctl_get.3.md @@ -7,7 +7,7 @@ header: PMDK date: pmemobj API version 2.3 ... -[comment]: <> (Copyright 2017-2019, Intel Corporation) +[comment]: <> (Copyright 2017-2020, Intel Corporation) [comment]: <> (Redistribution and use in source and binary forms, with or without) [comment]: <> (modification, are permitted provided that the following conditions) @@ -337,12 +337,44 @@ re-enabling will not be reflected in subsequent values. Statistics are disabled by default. Enabling them may have non-trivial performance impact. -stats.heap.curr_allocated | r- | - | int | - | - | - +stats.heap.curr_allocated | r- | - | uint64_t | - | - | - Reads the number of bytes currently allocated in the heap. If statistics were disabled at any time in the lifetime of the heap, this value may be inaccurate. +stats.heap.run_allocated | r- | - | uint64_t | - | - | - + +Reads the number of bytes currently allocated using run-based allocation +classes, i.e., huge allocations are not accounted for in this statistic. +This is useful for comparison against stats.heap.run_active to estimate the +ratio between active and allocated memory. + +This is a transient statistic and is rebuilt every time the pool is opened. + +stats.heap.run_active | r- | - | uint64_t | - | - | - + +Reads the number of bytes currently occupied by all memory blocks occupied by +runs, including both allocated and free space, i.e., all space that's not +occupied by huge blocks. + +This value is a sum of all allocated and free run memory. In systems where +memory is efficiently used, `run_active` should closely track +`run_allocated`, and the amount of active, but free, memory should be minimal. + +A large relative difference between active memory and allocated memory is +indicative of heap fragmentation. This information can be used to make +a decision to call **pmemobj_defrag()**(3) if the fragmentation looks to be high. + +However, for small heaps `run_active` might be disproportionately higher than +`run_allocated` because the allocator typically activates a significantly larger +amount of memory than is required to satisfy a single request in the +anticipation of future needs. For example, the first allocation of 100 bytes +in a heap will trigger activation of 256 kilobytes of space. + +This is a transient statistic and is rebuilt lazily every time the pool +is opened. + heap.size.granularity | rw- | - | uint64_t | uint64_t | - | long long Reads or modifies the granularity with which the heap grows when OOM. diff --git a/src/libpmemobj/heap.c b/src/libpmemobj/heap.c index 3ac93a288ff577614dde44eaeb3179566ec06bec..1291095d24b27d8b78a35157e82e2af9425d4dbf 100644 --- a/src/libpmemobj/heap.c +++ b/src/libpmemobj/heap.c @@ -489,6 +489,9 @@ heap_run_create(struct palloc_heap *heap, struct bucket *b, return -1; } + STATS_INC(heap->stats, transient, heap_run_active, + m->size_idx * CHUNKSIZE); + return 0; } @@ -558,6 +561,9 @@ heap_run_into_free_chunk(struct palloc_heap *heap, m->block_off = 0; m->size_idx = hdr->size_idx; + STATS_SUB(heap->stats, transient, heap_run_active, + m->size_idx * CHUNKSIZE); + /* * The only thing this could race with is heap_memblock_on_free() * because that function is called after processing the operation, @@ -582,7 +588,7 @@ heap_run_into_free_chunk(struct palloc_heap *heap, * Returns 1 if reclaimed chunk, 0 otherwise. */ static int -heap_reclaim_run(struct palloc_heap *heap, struct memory_block *m) +heap_reclaim_run(struct palloc_heap *heap, struct memory_block *m, int startup) { struct chunk_run *run = heap_get_chunk_run(heap, m); struct chunk_header *hdr = heap_get_chunk_hdr(heap, m); @@ -605,6 +611,13 @@ heap_reclaim_run(struct palloc_heap *heap, struct memory_block *m) if (e.free_space == c->run.nallocs) return 1; + if (startup) { + STATS_INC(heap->stats, transient, heap_run_active, + m->size_idx * CHUNKSIZE); + STATS_INC(heap->stats, transient, heap_run_allocated, + c->run.nallocs - e.free_space); + } + if (recycler_put(heap->rt->recyclers[c->id], m, e) < 0) ERR("lost runtime tracking info of %u run due to OOM", c->id); @@ -634,10 +647,9 @@ heap_reclaim_zone_garbage(struct palloc_heap *heap, struct bucket *bucket, switch (hdr->type) { case CHUNK_TYPE_RUN: - if (heap_reclaim_run(heap, &m) != 0) { + if (heap_reclaim_run(heap, &m, 1) != 0) heap_run_into_free_chunk(heap, bucket, &m); - } break; case CHUNK_TYPE_FREE: heap_free_chunk_reuse(heap, bucket, &m); @@ -853,7 +865,7 @@ heap_reuse_from_recycler(struct palloc_heap *heap, void heap_discard_run(struct palloc_heap *heap, struct memory_block *m) { - if (heap_reclaim_run(heap, m)) { + if (heap_reclaim_run(heap, m, 0)) { struct bucket *defb = heap_bucket_acquire(heap, DEFAULT_ALLOC_CLASS_ID, 0); diff --git a/src/libpmemobj/palloc.c b/src/libpmemobj/palloc.c index aee3f0f10dfcdca9e2f376cd207dce01b98470ca..7f15fbeacff52841f7f83596decf286ca52b3cda 100644 --- a/src/libpmemobj/palloc.c +++ b/src/libpmemobj/palloc.c @@ -418,6 +418,10 @@ palloc_heap_action_on_process(struct palloc_heap *heap, if (act->new_state == MEMBLOCK_ALLOCATED) { STATS_INC(heap->stats, persistent, heap_curr_allocated, act->m.m_ops->get_real_size(&act->m)); + if (act->m.type == MEMORY_BLOCK_RUN) { + STATS_INC(heap->stats, transient, heap_run_allocated, + act->m.m_ops->get_real_size(&act->m)); + } } else if (act->new_state == MEMBLOCK_FREE) { if (On_valgrind) { void *ptr = act->m.m_ops->get_user_data(&act->m); @@ -442,6 +446,10 @@ palloc_heap_action_on_process(struct palloc_heap *heap, STATS_SUB(heap->stats, persistent, heap_curr_allocated, act->m.m_ops->get_real_size(&act->m)); + if (act->m.type == MEMORY_BLOCK_RUN) { + STATS_SUB(heap->stats, transient, heap_run_allocated, + act->m.m_ops->get_real_size(&act->m)); + } heap_memblock_on_free(heap, &act->m); } } @@ -945,9 +953,8 @@ palloc_defrag(struct palloc_heap *heap, uint64_t **objv, size_t objcnt, if (operation_reserve(ctx, entries_size) != 0) goto err; - palloc_publish(heap, - VEC_ARR(&actv), - VEC_SIZE(&actv), ctx); + palloc_publish(heap, VEC_ARR(&actv), VEC_SIZE(&actv), + ctx); operation_start(ctx); VEC_CLEAR(&actv); diff --git a/src/libpmemobj/stats.c b/src/libpmemobj/stats.c index 2441b38e0e4013afc32dabd0fba7c4573ba9e0e7..ede15d06c49166b04edf7fd9a72ccc3d20745871 100644 --- a/src/libpmemobj/stats.c +++ b/src/libpmemobj/stats.c @@ -39,8 +39,13 @@ STATS_CTL_HANDLER(persistent, curr_allocated, heap_curr_allocated); +STATS_CTL_HANDLER(transient, run_allocated, heap_run_allocated); +STATS_CTL_HANDLER(transient, run_active, heap_run_active); + static const struct ctl_node CTL_NODE(heap)[] = { STATS_CTL_LEAF(persistent, curr_allocated), + STATS_CTL_LEAF(transient, run_allocated), + STATS_CTL_LEAF(transient, run_active), CTL_NODE_END }; diff --git a/src/libpmemobj/stats.h b/src/libpmemobj/stats.h index 02fe55e1ef3a42360ebdff64c7c85d41f85d971f..54378bdadc14167295bd154291f6a4d375ca56fd 100644 --- a/src/libpmemobj/stats.h +++ b/src/libpmemobj/stats.h @@ -1,5 +1,5 @@ /* - * Copyright 2017-2018, Intel Corporation + * Copyright 2017-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -44,7 +44,8 @@ extern "C" { #endif struct stats_transient { - int unused; + uint64_t heap_run_allocated; + uint64_t heap_run_active; }; struct stats_persistent { diff --git a/src/test/obj_ctl_stats/obj_ctl_stats.c b/src/test/obj_ctl_stats/obj_ctl_stats.c index b25e790337b2c5124b0c455a8d2c7d167c9c8a08..f7bbf2327a2810b79592ad1b04f2d945b52e56ca 100644 --- a/src/test/obj_ctl_stats/obj_ctl_stats.c +++ b/src/test/obj_ctl_stats/obj_ctl_stats.c @@ -76,12 +76,22 @@ main(int argc, char *argv[]) UT_ASSERTeq(ret, 0); UT_ASSERTeq(allocated, oid_size); + size_t run_allocated = 0; + ret = pmemobj_ctl_get(pop, "stats.heap.run_allocated", &run_allocated); + UT_ASSERTeq(ret, 0); + UT_ASSERTeq(allocated, run_allocated); + pmemobj_free(&oid); ret = pmemobj_ctl_get(pop, "stats.heap.curr_allocated", &allocated); UT_ASSERTeq(ret, 0); UT_ASSERTeq(allocated, 0); + allocated = 0; + ret = pmemobj_ctl_get(pop, "stats.heap.run_allocated", &run_allocated); + UT_ASSERTeq(ret, 0); + UT_ASSERTeq(allocated, run_allocated); + TX_BEGIN(pop) { oid = pmemobj_tx_alloc(1, 0); } TX_ONABORT { diff --git a/src/test/obj_fragmentation2/obj_fragmentation2.c b/src/test/obj_fragmentation2/obj_fragmentation2.c index 90ae43be883b225031dfb5b58fceb0c94bd8d686..89252345b94c832ee1ffeb98b85000210ec4ce66 100644 --- a/src/test/obj_fragmentation2/obj_fragmentation2.c +++ b/src/test/obj_fragmentation2/obj_fragmentation2.c @@ -199,6 +199,15 @@ static float workloads_defrag_target[] = { 0.01f, 0.01f, 0.01f, 0.01f, 0.01f, 0.056f, 0.1f, 0.13f, 0.01f }; +/* last workload operates only on huge chunks, so run stats are useless */ +static float workloads_stat_target[] = { + 0.01f, 1.1f, 1.1f, 0.86f, 0.76f, 1.01f, 0.23f, 1.24f, 2100.f +}; + +static float workloads_defrag_stat_target[] = { + 0.01f, 0.01f, 0.01f, 0.02f, 0.02f, 0.04f, 0.08f, 0.12f, 2100.f +}; + int main(int argc, char *argv[]) { @@ -227,8 +236,21 @@ main(int argc, char *argv[]) objects = ZALLOC(sizeof(PMEMoid) * MAX_OBJECTS); UT_ASSERTne(objects, NULL); + int enabled = 1; + pmemobj_ctl_set(pop, "stats.enabled", &enabled); + workloads[w](pop); + /* this is to trigger global recycling */ + pmemobj_defrag(pop, NULL, 0, NULL); + + size_t active = 0; + size_t allocated = 0; + pmemobj_ctl_get(pop, "stats.heap.run_active", &active); + pmemobj_ctl_get(pop, "stats.heap.run_allocated", &allocated); + float stat_frag = ((float)active / allocated) - 1.f; + UT_ASSERT(stat_frag <= workloads_stat_target[w]); + if (defrag) { PMEMoid **objectsf = ZALLOC(sizeof(PMEMoid) * nobjects); for (size_t i = 0; i < nobjects; ++i) @@ -237,6 +259,17 @@ main(int argc, char *argv[]) pmemobj_defrag(pop, objectsf, nobjects, NULL); FREE(objectsf); + + active = 0; + allocated = 0; + + /* this is to trigger global recycling */ + pmemobj_defrag(pop, NULL, 0, NULL); + + pmemobj_ctl_get(pop, "stats.heap.run_active", &active); + pmemobj_ctl_get(pop, "stats.heap.run_allocated", &allocated); + stat_frag = ((float)active / allocated) - 1.f; + UT_ASSERT(stat_frag <= workloads_defrag_stat_target[w]); } PMEMoid oid;