diff --git a/src/libpmem2/ppc64/.cstyleignore b/src/libpmem2/ppc64/.cstyleignore
new file mode 100644
index 0000000000000000000000000000000000000000..27bb1279e4c31ed5a7ec81573a1cc76b82623127
--- /dev/null
+++ b/src/libpmem2/ppc64/.cstyleignore
@@ -0,0 +1 @@
+init.c
diff --git a/src/libpmem2/ppc64/init.c b/src/libpmem2/ppc64/init.c
index 44dbd1727ecfdbdad2aa191d40e09fba21a9e521..1327ff9913b24b2e5af562a66951b4df6b2e099f 100644
--- a/src/libpmem2/ppc64/init.c
+++ b/src/libpmem2/ppc64/init.c
@@ -3,22 +3,41 @@
 /* Copyright 2019-2020, Intel Corporation */
 
 #include <errno.h>
+#include <sys/mman.h>
 
 #include "out.h"
 #include "pmem2_arch.h"
 #include "util.h"
 
+/*
+ * Older assemblers versions do not support the latest versions of L, e.g.
+ * Binutils 2.34.
+ * Workaround this by using longs.
+ */
+#define __SYNC(l) ".long (0x7c0004AC | ((" #l ") << 21))"
+#define __DCBF(ra, rb, l) ".long (0x7c0000AC | ((" #l ") << 21)"	\
+	" | ((" #ra ") << 16) | ((" #rb ") << 11))"
+
 static void
 ppc_fence(void)
 {
 	LOG(15, NULL);
 
 	/*
-	 * Force a memory barrier to flush out all cache lines
+	 * Force a memory barrier to flush out all cache lines.
+	 * Uses a heavyweight sync in order to guarantee the memory ordering
+	 * even with a data cache flush.
+	 * According to the POWER ISA 3.1, phwsync (aka. sync (L=4)) is treated
+	 * as a hwsync by processors compatible with previous versions of the
+	 * POWER ISA.
 	 */
-	asm volatile(
-		"lwsync"
-		: : : "memory");
+	asm volatile(__SYNC(4) : : : "memory");
+}
+
+static void
+ppc_fence_empty(void)
+{
+	LOG(15, NULL);
 }
 
 static void
@@ -32,22 +51,66 @@ ppc_flush(const void *addr, size_t size)
 	/* round down the address */
 	uptr &= ~(CACHELINE_SIZE - 1);
 	while (uptr < end) {
-		/* issue a dcbst instruction for the cache line */
-		asm volatile(
-			"dcbst 0,%0"
-			: :"r"(uptr) : "memory");
+		/*
+		 * Flush the data cache block.
+		 * According to the POWER ISA 3.1, dcbstps (aka. dcbf (L=6))
+		 * behaves as dcbf (L=0) on previous processors.
+		 */
+		asm volatile(__DCBF(0, %0, 6) : :"r"(uptr) : "memory");
 
 		uptr += CACHELINE_SIZE;
 	}
 }
 
+static void
+ppc_flush_msync(const void *addr, size_t size)
+{
+	LOG(15, "addr %p len %zu", addr, len);
+	/* this implementation is copy of pmem_msync */
+
+	VALGRIND_DO_CHECK_MEM_IS_ADDRESSABLE(addr, len);
+
+	/*
+	 * msync requires addr to be a multiple of pagesize but there are no
+	 * requirements for len. Align addr down and change len so that
+	 * [addr, addr + len) still contains initial range.
+	 */
+
+	/* increase len by the amount we gain when we round addr down */
+	len += (uintptr_t)addr & (Pagesize - 1);
+
+	/* round addr down to page boundary */
+	uintptr_t uptr = (uintptr_t)addr & ~((uintptr_t)Pagesize - 1);
+
+	/*
+	 * msync accepts addresses aligned to page boundary, so we may sync
+	 * more and part of it may have been marked as undefined/inaccessible
+	 * Msyncing such memory is not a bug, so as a workaround temporarily
+	 * disable error reporting.
+	 */
+	VALGRIND_DO_DISABLE_ERROR_REPORTING;
+
+	int ret;
+	if ((ret = msync((void *)uptr, len, MS_SYNC)) < 0)
+		ERR("!msync");
+
+	VALGRIND_DO_ENABLE_ERROR_REPORTING;
+
+	/* full flush */
+	VALGRIND_DO_PERSIST(uptr, len);
+}
+
 void
 pmem2_arch_init(struct pmem2_arch_info *info)
 {
 	LOG(3, "libpmem*: PPC64 support");
 	LOG(3, "PMDK PPC64 support is currently experimental");
 	LOG(3, "Please don't use this library in production environment");
-
-	info->fence = ppc_fence;
-	info->flush = ppc_flush;
+	if (On_valgrind) {
+		info->fence = ppc_fence_empty;
+		info->flush = ppc_flush_msync;
+	} else {
+		info->fence = ppc_fence;
+		info->flush = ppc_flush;
+	}
 }
diff --git a/src/libpmemobj/heap.c b/src/libpmemobj/heap.c
index 090e0804e0734797453227516d7d53b9c7bdf0b4..4cbb52c42914eba6537535697fe8de6ad6d83715 100644
--- a/src/libpmemobj/heap.c
+++ b/src/libpmemobj/heap.c
@@ -584,7 +584,7 @@ heap_reclaim_run(struct palloc_heap *heap, struct memory_block *m, int startup)
 		STATS_INC(heap->stats, transient, heap_run_active,
 			m->size_idx * CHUNKSIZE);
 		STATS_INC(heap->stats, transient, heap_run_allocated,
-			c->rdsc.nallocs - e.free_space);
+			(c->rdsc.nallocs - e.free_space) * run->hdr.block_size);
 	}
 
 	if (recycler_put(heap->rt->recyclers[c->id], m, e) < 0)
diff --git a/src/test/obj_ctl_stats/obj_ctl_stats.c b/src/test/obj_ctl_stats/obj_ctl_stats.c
index 8c2882c862a0699d41ff192427676937eca9a7dc..6e4b0230e732aac28e1825ecc7d4988e20b19181 100644
--- a/src/test/obj_ctl_stats/obj_ctl_stats.c
+++ b/src/test/obj_ctl_stats/obj_ctl_stats.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: BSD-3-Clause
-/* Copyright 2017-2019, Intel Corporation */
+/* Copyright 2017-2020, Intel Corporation */
 
 /*
  * obj_ctl_stats.c -- tests for the libpmemobj statistics module
@@ -93,6 +93,29 @@ main(int argc, char *argv[])
 	UT_ASSERTeq(ret, 0);
 	UT_ASSERTeq(tmp, run_allocated); /* shouldn't change */
 
+	/* the deallocated object shouldn't be reflected in rebuilt stats */
+	pmemobj_free(&oid);
+
+	pmemobj_close(pop);
+
+	pop = pmemobj_open(path, "ctl");
+	UT_ASSERTne(pop, NULL);
+
+	/* stats are rebuilt lazily, so initially this should be 0 */
+	tmp = 0;
+	ret = pmemobj_ctl_get(pop, "stats.heap.run_allocated", &tmp);
+	UT_ASSERTeq(ret, 0);
+	UT_ASSERTeq(tmp, 0);
+
+	ret = pmemobj_alloc(pop, NULL, 1, 0, NULL, NULL);
+	UT_ASSERTeq(ret, 0);
+
+	/* after first alloc, the previously allocated object will be found */
+	tmp = 0;
+	ret = pmemobj_ctl_get(pop, "stats.heap.run_allocated", &tmp);
+	UT_ASSERTeq(ret, 0);
+	UT_ASSERTeq(tmp, run_allocated + oid_size);
+
 	pmemobj_close(pop);
 
 	DONE(NULL);