diff --git a/doc/libpmem2/pmem2_map_new.3.md b/doc/libpmem2/pmem2_map_new.3.md index 12f1a36158e4a04fe6efda579010fe81a95a3a87..23ddec16c209c5786abe6a78fa6eda56c61a8198 100644 --- a/doc/libpmem2/pmem2_map_new.3.md +++ b/doc/libpmem2/pmem2_map_new.3.md @@ -41,6 +41,9 @@ The **pmem2_map_new**() function creates a new mapping in the virtual address sp of the calling process. This function requires a configuration *config* of the mapping and the data source *source*. +Optionally, the mapping can be created at the offset of the virtual memory reservation +set in the configuration *config*. See **pmem2_config_set_vm_reservation**(3) for details. + For a mapping to succeed, the *config* structure must have the granularity parameter set to the appropriate level. See **pmem2_config_set_required_store_granularity**(3) and **libpmem2**(7) for more details. @@ -95,6 +98,9 @@ the alignment required for specific *\*source*. Please see and the base mapping address (reservation address + reservation offset) is not aligned to the device DAX granularity. Please see **pmem2_config_set_vm_reservation**(3). (Linux only) +* **PMEM2_E_ADDRESS_UNALIGNED** - when mapping to a virtual memory reservation and the region +for the mapping exceeds reservation size. Please see **pmem2_config_set_vm_reservation**(3). + * **PMEM2_E_NOSUPP** - when config-provided protection flags combination is not supported. * **PMEM2_E_NO_ACCESS** - there is a conflict between mapping protection and file opening mode. @@ -115,4 +121,5 @@ It can also return all errors from the underlying **pmem2_config_set_required_store_granularity**(3), **pmem2_source_alignment**(3), **pmem2_source_from_fd**(3), **pmem2_source_size**(3), **pmem2_map_delete**(3), +**pmem2_config_set_vm_reservation**(3), **libpmem2**(7) and **<http://pmem.io>** diff --git a/src/libpmem2/map.c b/src/libpmem2/map.c index 88534ca30f976d4fa700d74207ed88353168f403..cce1e74bca191fc6a68e5b7fce710d3011cf9782 100644 --- a/src/libpmem2/map.c +++ b/src/libpmem2/map.c @@ -134,41 +134,44 @@ pmem2_validate_offset(const struct pmem2_config *cfg, size_t *offset, return 0; } -static struct ravl_interval *ri; -static os_rwlock_t lock; - /* * mapping_min - return min boundary for mapping */ static size_t -mapping_min(void *map) +mapping_min(void *addr) { - return (size_t)pmem2_map_get_address(map); + struct pmem2_map *map = (struct pmem2_map *)addr; + return (size_t)map->addr; } /* * mapping_max - return max boundary for mapping */ static size_t -mapping_max(void *map) +mapping_max(void *addr) { - return (size_t)pmem2_map_get_address(map) + - pmem2_map_get_size(map); + struct pmem2_map *map = (struct pmem2_map *)addr; + return (size_t)map->addr + map->content_length; } +static struct pmem2_state { + struct ravl_interval *range_map; + os_rwlock_t range_map_lock; +} State; + /* * pmem2_map_init -- initialize the map module */ void -pmem2_map_init(void) +pmem2_map_init() { - os_rwlock_init(&lock); + util_rwlock_init(&State.range_map_lock); - util_rwlock_wrlock(&lock); - ri = ravl_interval_new(mapping_min, mapping_max); - util_rwlock_unlock(&lock); + util_rwlock_wrlock(&State.range_map_lock); + State.range_map = ravl_interval_new(mapping_min, mapping_max); + util_rwlock_unlock(&State.range_map_lock); - if (!ri) + if (!State.range_map) abort(); } @@ -178,11 +181,9 @@ pmem2_map_init(void) void pmem2_map_fini(void) { - util_rwlock_wrlock(&lock); - ravl_interval_delete(ri); - util_rwlock_unlock(&lock); - - os_rwlock_destroy(&lock); + util_rwlock_wrlock(&State.range_map_lock); + ravl_interval_delete(State.range_map); + util_rwlock_unlock(&State.range_map_lock); } /* @@ -191,9 +192,9 @@ pmem2_map_fini(void) int pmem2_register_mapping(struct pmem2_map *map) { - util_rwlock_wrlock(&lock); - int ret = ravl_interval_insert(ri, map); - util_rwlock_unlock(&lock); + util_rwlock_wrlock(&State.range_map_lock); + int ret = ravl_interval_insert(State.range_map, map); + util_rwlock_unlock(&State.range_map_lock); return ret; } @@ -207,15 +208,15 @@ pmem2_unregister_mapping(struct pmem2_map *map) int ret = 0; struct ravl_interval_node *node; - util_rwlock_wrlock(&lock); - node = ravl_interval_find_equal(ri, map); + util_rwlock_wrlock(&State.range_map_lock); + node = ravl_interval_find_equal(State.range_map, map); if (node) { - ret = ravl_interval_remove(ri, node); + ret = ravl_interval_remove(State.range_map, node); } else { ERR("Cannot find mapping %p to delete", map); ret = PMEM2_E_MAPPING_NOT_FOUND; } - util_rwlock_unlock(&lock); + util_rwlock_unlock(&State.range_map_lock); return ret; } @@ -233,9 +234,9 @@ pmem2_map_find(const void *addr, size_t len) struct ravl_interval_node *node; - util_rwlock_rdlock(&lock); - node = ravl_interval_find(ri, &map); - util_rwlock_unlock(&lock); + util_rwlock_rdlock(&State.range_map_lock); + node = ravl_interval_find(State.range_map, &map); + util_rwlock_unlock(&State.range_map_lock); if (!node) return NULL; diff --git a/src/libpmem2/map.h b/src/libpmem2/map.h index aabe3f7331a554618d86c2141dd5fad2a03bdcc4..9cc172905208627b1be87d37ad7a8f147c849db0 100644 --- a/src/libpmem2/map.h +++ b/src/libpmem2/map.h @@ -45,10 +45,6 @@ struct pmem2_map { struct pmem2_vm_reservation *reserv; }; -#ifdef _WIN32 -os_rwlock_t split_merge_lock; -#endif - enum pmem2_granularity get_min_granularity(bool eADR, bool is_pmem, enum pmem2_sharing_type sharing); struct pmem2_map *pmem2_map_find(const void *addr, size_t len); diff --git a/src/libpmem2/map_posix.c b/src/libpmem2/map_posix.c index 0b09f8b07eb2ac86d384e170216ee7c376bd3334..302cc7a19d3a7031a661cc6900ed91c574923988 100644 --- a/src/libpmem2/map_posix.c +++ b/src/libpmem2/map_posix.c @@ -21,6 +21,7 @@ #include "persist.h" #include "pmem2_utils.h" #include "source.h" +#include "sys_util.h" #include "valgrind_internal.h" #ifndef MAP_SYNC @@ -370,57 +371,54 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, size_t alignment = get_map_alignment(content_length, src_alignment); - /* prepare pmem2_map structure */ - map = (struct pmem2_map *)pmem2_malloc(sizeof(*map), &ret); - if (!map) - return ret; - - void *reserv = NULL; - if (cfg->reserv) { - void *rsv = cfg->reserv; + void *reserv_region = NULL; + void *rsv = cfg->reserv; + if (rsv) { void *rsv_addr = pmem2_vm_reservation_get_address(rsv); size_t rsv_size = pmem2_vm_reservation_get_size(rsv); size_t rsv_offset = cfg->reserv_offset; - /* check if reservation has enough space */ - if (rsv_offset + content_length > rsv_size) { - ret = PMEM2_E_LENGTH_OUT_OF_RANGE; - ERR( - "Reservation %p has not enough space for the intended content", - rsv); - goto err; - } + reserved_length = roundup(content_length, Pagesize); if (rsv_offset % Mmap_align) { ret = PMEM2_E_OFFSET_UNALIGNED; ERR( "virtual memory reservation offset %zu is not a multiple of %llu", rsv_offset, Mmap_align); - goto err; + return ret; } - reserv = (char *)rsv_addr + rsv_offset; - if ((size_t)reserv % alignment) { + if (rsv_offset + reserved_length > rsv_size) { + ret = PMEM2_E_LENGTH_OUT_OF_RANGE; + ERR( + "Reservation %p has not enough space for the intended content", + rsv); + return ret; + } + + reserv_region = (char *)rsv_addr + rsv_offset; + if ((size_t)reserv_region % alignment) { ret = PMEM2_E_ADDRESS_UNALIGNED; ERR( "base mapping address %p (virtual memory reservation address + offset)" \ " is not a multiple of %zu required by device DAX", - reserv, alignment); - goto err; + reserv_region, alignment); + return ret; } - reserved_length = roundup(content_length, Pagesize); - - map->addr = reserv; - map->content_length = content_length; - - /* register wanted vm reservation region */ - ret = vm_reservation_map_register(cfg->reserv, map); - if (ret) - goto err; + /* check if the region in the reservation is occupied */ + if (vm_reservation_map_find_acquire(rsv, rsv_offset, + reserved_length)) { + ret = PMEM2_E_MAPPING_EXISTS; + ERR( + "region of the reservation %p at the offset %zu and " + "length %zu is at least partly occupied by other mapping", + rsv, rsv_offset, reserved_length); + goto err_reservation_release; + } } else { /* find a hint for the mapping */ - ret = map_reserve(content_length, alignment, &reserv, + ret = map_reserve(content_length, alignment, &reserv_region, &reserved_length, cfg); if (ret != 0) { if (ret == PMEM2_E_MAPPING_EXISTS) @@ -429,11 +427,11 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, else LOG(1, "cannot find a contiguous region of given size"); - goto err; + return ret; } } - ASSERTne(reserv, NULL); + ASSERTne(reserv_region, NULL); if (cfg->sharing == PMEM2_PRIVATE) { flags |= MAP_PRIVATE; @@ -448,15 +446,15 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, ASSERT(0); } - ret = file_map(reserv, content_length, proto, flags, map_fd, off, + ret = file_map(reserv_region, content_length, proto, flags, map_fd, off, &map_sync, &addr); if (ret) { /* * unmap the reservation mapping only * if it wasn't provided by the config */ - if (!cfg->reserv) - munmap(reserv, reserved_length); + if (!rsv) + munmap(reserv_region, reserved_length); if (ret == -EACCES) ret = PMEM2_E_NO_ACCESS; @@ -464,7 +462,7 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, ret = PMEM2_E_NOSUPP; else if (ret == -EEXIST) ret = PMEM2_E_MAPPING_EXISTS; - goto err_file_map; + goto err_reservation_release; } LOG(3, "mapped at %p", addr); @@ -486,22 +484,33 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, cfg->requested_max_granularity); ERR("%s", err); ret = PMEM2_E_GRANULARITY_NOT_SUPPORTED; - goto err_gran_reg; + goto err_undo_mapping; } + /* prepare pmem2_map structure */ + map = (struct pmem2_map *)pmem2_malloc(sizeof(*map), &ret); + if (!map) + goto err_undo_mapping; + map->addr = addr; map->reserved_length = reserved_length; map->content_length = content_length; map->effective_granularity = available_min_granularity; pmem2_set_flush_fns(map); pmem2_set_mem_fns(map); - map->reserv = cfg->reserv; + map->reserv = rsv; map->source = *src; map->source.value.fd = INVALID_FD; /* fd should not be used after map */ ret = pmem2_register_mapping(map); if (ret) { - goto err_gran_reg; + goto err_free_map_struct; + } + + if (rsv) { + ret = vm_reservation_map_register_release(rsv, map); + if (ret) + goto err_unregister_map; } *map_ptr = map; @@ -514,21 +523,22 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, return 0; -err_gran_reg: +err_unregister_map: + pmem2_unregister_mapping(map); +err_free_map_struct: + Free(map); +err_undo_mapping: /* * if the reservation was given by pmem2_config, instead of unmapping, * we will need to mend the reservation */ - if (cfg->reserv) - vm_reservation_mend(cfg->reserv, addr, reserved_length); + if (rsv) + vm_reservation_mend(rsv, addr, reserved_length); else unmap(addr, reserved_length); -err_file_map: - if (cfg->reserv) - vm_reservation_map_unregister(cfg->reserv, map); -err: - free(map); - +err_reservation_release: + if (rsv) + vm_reservation_release(rsv); return ret; } @@ -543,6 +553,9 @@ pmem2_map_delete(struct pmem2_map **map_ptr) int ret = 0; struct pmem2_map *map = *map_ptr; + size_t map_len = map->content_length; + void *map_addr = map->addr; + struct pmem2_vm_reservation *rsv = map->reserv; ret = pmem2_unregister_mapping(map); if (ret) @@ -554,26 +567,40 @@ pmem2_map_delete(struct pmem2_map **map_ptr) * by pmem2. */ if (map->reserved_length) { - VALGRIND_REMOVE_PMEM_MAPPING(map->addr, map->content_length); - - if (map->reserv) { - ret = vm_reservation_map_unregister(map->reserv, map); + VALGRIND_REMOVE_PMEM_MAPPING(map_addr, map_len); + + if (rsv) { + size_t rsv_offset = (size_t)map_addr - + (size_t)rsv->addr; + if (!vm_reservation_map_find_acquire(rsv, rsv_offset, + map_len)) { + ret = PMEM2_E_MAPPING_NOT_FOUND; + goto err_reservation_release; + } + + ret = vm_reservation_mend(rsv, map_addr, map_len); if (ret) - return ret; + goto err_reservation_release; - ret = vm_reservation_mend(map->reserv, map->addr, - map->reserved_length); + ret = vm_reservation_map_unregister_release(rsv, map); if (ret) - return ret; + goto err_register_map; } else { - ret = unmap(map->addr, map->reserved_length); + ret = unmap(map_addr, map_len); if (ret) - return ret; + goto err_register_map; } } Free(map); *map_ptr = NULL; + return 0; + +err_reservation_release: + vm_reservation_release(rsv); +err_register_map: + VALGRIND_REGISTER_PMEM_MAPPING(map_addr, map_len); + pmem2_register_mapping(map); return ret; } diff --git a/src/libpmem2/map_windows.c b/src/libpmem2/map_windows.c index 8f761382eb15b0cec36cf9534b5742375f3306e3..b35926dc033c5d89f569026e4e83ccde4b0070fa 100644 --- a/src/libpmem2/map_windows.c +++ b/src/libpmem2/map_windows.c @@ -109,14 +109,15 @@ struct pmem2_map *vm_reservation_map_find_closest_later( size_t reserv_offset, size_t len); /* - * reservation_mend -- unmaps given mapping and mends reservation area + * vm_reservation_unmap -- unmaps given region of the reservation, + * preserves the placeholder */ static int -reservation_mend(struct pmem2_vm_reservation *rsv, void *addr, size_t length) +vm_reservation_unmap(struct pmem2_vm_reservation *rsv, void *addr, + size_t length) { void *rsv_addr = pmem2_vm_reservation_get_address(rsv); size_t rsv_size = pmem2_vm_reservation_get_size(rsv); - size_t rsv_offset = (size_t)addr - (size_t)rsv->addr; if (addr < rsv_addr || (char *)addr + length > (char *)rsv_addr + rsv_size) @@ -130,25 +131,38 @@ reservation_mend(struct pmem2_vm_reservation *rsv, void *addr, size_t length) return pmem2_lasterror_to_err(); } + return 0; +} + +/* + * vm_reservation_merge -- merges given placeholder region with his neighbouring + * placeholders + */ +static int +vm_reservation_merge(struct pmem2_vm_reservation *rsv, void *addr, + size_t length) +{ + size_t rsv_size = pmem2_vm_reservation_get_size(rsv); + size_t rsv_offset = (size_t)addr - (size_t)rsv->addr; + /* - * Before mapping to the reservation, it is neccessary to split - * the unoccupied region into separate placeholders, so that - * the mapping and the cut out placeholder will be of the same - * size. + * After unmapping from the reservation, it is neccessary to merge + * the unoccupied neighbours so that the placeholders will be available + * for splitting for the required size of the mapping. */ - void *mend_addr = addr; - size_t mend_size = length; + void *merge_addr = addr; + size_t merge_size = length; struct pmem2_map *map = NULL; if (rsv_offset > 0) { map = vm_reservation_map_find_closest_prior(rsv, rsv_offset, length); if (map) { - mend_addr = (char *)map->addr + map->reserved_length; - mend_size += (char *)addr - (char *)mend_addr; + merge_addr = (char *)map->addr + map->reserved_length; + merge_size += (char *)addr - (char *)merge_addr; } else { - mend_addr = rsv->addr; - mend_size += rsv_offset; + merge_addr = rsv->addr; + merge_size += rsv_offset; } } @@ -156,14 +170,14 @@ reservation_mend(struct pmem2_vm_reservation *rsv, void *addr, size_t length) map = vm_reservation_map_find_closest_later(rsv, rsv_offset, length); if (map) - mend_size += (char *)map->addr - (char *)addr - length; + merge_size += (char *)map->addr - (char *)addr - length; else - mend_size += rsv->size - rsv_offset - length; + merge_size += rsv->size - rsv_offset - length; } - if (addr != mend_addr) { - ret = VirtualFree(mend_addr, - mend_size, + if ((addr != merge_addr) || (length != merge_size)) { + int ret = VirtualFree(merge_addr, + merge_size, MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS); if (!ret) { ERR("!!VirtualFree"); @@ -176,16 +190,20 @@ reservation_mend(struct pmem2_vm_reservation *rsv, void *addr, size_t length) } /* - * reservation_split - splits the virtual memory reservation into - * separate regions + * vm_reservation_split - splits the virtual memory reservation into + * separate regions */ int -reservation_split(struct pmem2_vm_reservation *rsv, size_t rsv_offset, +vm_reservation_split(struct pmem2_vm_reservation *rsv, size_t rsv_offset, size_t length) { + LOG(3, "rsv %p rsv_offset %zu length %zu", rsv, rsv_offset, length); + void *rsv_addr = pmem2_vm_reservation_get_address(rsv); size_t rsv_size = pmem2_vm_reservation_get_size(rsv); + LOG(3, "rsv_addr %p rsv_size %zu", rsv_addr, rsv_size); + if ((rsv_offset > 0 && !vm_reservation_map_find(rsv, rsv_offset - 1, 1)) || (rsv_offset + length < rsv_size && @@ -331,61 +349,57 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, return pmem2_lasterror_to_err(); } - /* prepare pmem2_map structure */ - struct pmem2_map *map; - map = (struct pmem2_map *)pmem2_malloc(sizeof(*map), &ret); - if (!map) - goto err_close_mapping_handle; - void *base; - if (cfg->reserv) { - void *rsv = cfg->reserv; + void *rsv = cfg->reserv; + if (rsv) { void *rsv_addr = pmem2_vm_reservation_get_address(rsv); size_t rsv_size = pmem2_vm_reservation_get_size(rsv); size_t rsv_offset = cfg->reserv_offset; - /* check if reservation has enough space */ + if (rsv_offset % Mmap_align) { + ret = PMEM2_E_OFFSET_UNALIGNED; + ERR( + "offset from the beggining of virtual memory " + "reservation %zu is not a multiple of %llu", + rsv_offset, Mmap_align); + goto err_close_mapping_handle; + } + if (rsv_offset + length > rsv_size) { ret = PMEM2_E_LENGTH_OUT_OF_RANGE; ERR( - "reservation has not enought space, offset %zu, length %zu, rsv size %zu", - rsv_offset, length, rsv_size); - goto err_free_map_struct; + "length of the mapping %zu combined with the " + "offset into the reservation %zu exceeds virtual " + "memory reservation size %zu", + length, effective_offset, rsv_size); + goto err_close_mapping_handle; } - if (rsv_offset % Mmap_align) { - ret = PMEM2_E_OFFSET_UNALIGNED; + if (vm_reservation_map_find_acquire(rsv, rsv_offset, length)) { + ret = PMEM2_E_MAPPING_EXISTS; ERR( - "virtual memory reservation offset %zu is not a multiple of %llu", - rsv_offset, Mmap_align); - goto err_free_map_struct; + "region of the reservation %p at the offset %zu and " + "length %zu is at least partly occupied by other mapping", + rsv, rsv_offset, length); + goto err_reservation_release; } - map->addr = (char *)rsv_addr + rsv_offset; - map->content_length = length; - - /* register wanted vm reservation region */ - ret = vm_reservation_map_register(cfg->reserv, map); - if (ret) - goto err_free_map_struct; - + void *addr = (char *)rsv_addr + rsv_offset; /* * Before mapping to the reservation, it is neccessary to split - * the unoccupied region into separate placeholders, so that - * the mapping and the cut out placeholder will be of the same - * size. + * the unoccupied region into separate placeholders, + * so that the size to be mapped and the cut out placeholder + * size will be the same. */ - util_rwlock_wrlock(&split_merge_lock); - ret = reservation_split(rsv, rsv_offset, length); - util_rwlock_unlock(&split_merge_lock); + ret = vm_reservation_split(rsv, rsv_offset, length); if (ret) - goto err_vm_reserv_unregister; + goto err_reservation_release; /* replace placeholder with a regular mapping */ base = MapViewOfFile3(mh, NULL, - (char *)rsv_addr + rsv_offset, /* addr in reservation */ - 0, + addr, /* addr in reservation */ + effective_offset, length, MEM_REPLACE_PLACEHOLDER, proto, @@ -399,8 +413,10 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, ret = PMEM2_E_MAPPING_EXISTS; else ret = pmem2_lasterror_to_err(); - goto err_vm_reserv_unregister; + goto err_merge_reservation_regions; } + + ASSERTeq(base, addr); } else { /* obtain a pointer to the mapping view */ base = MapViewOfFile(mh, @@ -412,14 +428,14 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, if (base == NULL) { ERR("!!MapViewOfFile"); ret = pmem2_lasterror_to_err(); - goto err_free_map_struct; + goto err_close_mapping_handle; } } if (!CloseHandle(mh)) { ERR("!!CloseHandle"); ret = pmem2_lasterror_to_err(); - goto err_unmap_base; + goto err_undo_mapping; } enum pmem2_granularity available_min_granularity = @@ -428,7 +444,7 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, int direct_access = is_direct_access(src->value.handle); if (direct_access < 0) { ret = direct_access; - goto err_unmap_base; + goto err_undo_mapping; } bool eADR = (pmem2_auto_flush() == 1); @@ -452,9 +468,15 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, cfg->requested_max_granularity); ERR("%s", err); ret = PMEM2_E_GRANULARITY_NOT_SUPPORTED; - goto err_unmap_base; + goto err_undo_mapping; } + /* prepare pmem2_map structure */ + struct pmem2_map *map; + map = (struct pmem2_map *)pmem2_malloc(sizeof(*map), &ret); + if (!map) + goto err_undo_mapping; + map->addr = base; /* * XXX probably in some cases the reserved length > the content length. @@ -463,40 +485,42 @@ pmem2_map_new(struct pmem2_map **map_ptr, const struct pmem2_config *cfg, map->reserved_length = length; map->content_length = length; map->effective_granularity = available_min_granularity; - map->reserv = cfg->reserv; + map->reserv = rsv; map->source = *src; pmem2_set_flush_fns(map); pmem2_set_mem_fns(map); ret = pmem2_register_mapping(map); - if (ret) - goto err_unmap_base; + if (ret) { + goto err_free_map_struct; + } + + if (rsv) { + ret = vm_reservation_map_register_release(rsv, map); + if (ret) + goto err_unregister_map; + } /* return a pointer to the pmem2_map structure */ *map_ptr = map; return ret; -err_unmap_base: - /* - * if the reservation was given by pmem2_config, instead of unmapping, - * we will need to map with MAP_FIXED to mend the reservation - */ - if (cfg->reserv) { - reservation_mend(cfg->reserv, base, length); - vm_reservation_map_unregister(cfg->reserv, map); - } else - UnmapViewOfFile(base); - free(map); - - return ret; - -err_vm_reserv_unregister: - vm_reservation_map_unregister(cfg->reserv, map); - +err_unregister_map: + pmem2_unregister_mapping(map); err_free_map_struct: free(map); - +err_undo_mapping: + if (rsv) + vm_reservation_unmap(rsv, base, length); + else + UnmapViewOfFile(base); +err_merge_reservation_regions: + if (rsv) + vm_reservation_merge(rsv, base, length); +err_reservation_release: + if (rsv) + vm_reservation_release(rsv); err_close_mapping_handle: CloseHandle(mh); return ret; @@ -508,31 +532,46 @@ err_close_mapping_handle: int pmem2_map_delete(struct pmem2_map **map_ptr) { - LOG(3, "mapp %p", map_ptr); + LOG(3, "map_ptr %p", map_ptr); PMEM2_ERR_CLR(); struct pmem2_map *map = *map_ptr; + size_t map_len = map->content_length; + void *map_addr = map->addr; + struct pmem2_vm_reservation *rsv = map->reserv; int ret = pmem2_unregister_mapping(map); if (ret) return ret; if (map->reserved_length != 0) { - if (map->reserv) { - util_rwlock_wrlock(&split_merge_lock); - ret = reservation_mend(map->reserv, map->addr, - map->reserved_length); - util_rwlock_unlock(&split_merge_lock); + if (rsv) { + size_t rsv_offset = (size_t)map_addr - + (size_t)rsv->addr; + if (!vm_reservation_map_find_acquire(rsv, rsv_offset, + map_len)) { + ret = PMEM2_E_MAPPING_NOT_FOUND; + goto err_reservation_release; + } + + ret = vm_reservation_unmap(rsv, map->addr, + map->reserved_length); + if (ret) + goto err_reservation_release; + + ret = vm_reservation_merge(rsv, map->addr, + map->reserved_length); if (ret) - return ret; + goto err_reservation_release; - ret = vm_reservation_map_unregister(map->reserv, map); + ret = vm_reservation_map_unregister_release(rsv, map); if (ret) - return ret; + goto err_register_map; } else { if (!UnmapViewOfFile(map->addr)) { ERR("!!UnmapViewOfFile"); - return pmem2_lasterror_to_err(); + ret = pmem2_lasterror_to_err(); + goto err_register_map; } } } @@ -541,4 +580,10 @@ pmem2_map_delete(struct pmem2_map **map_ptr) *map_ptr = NULL; return 0; + +err_reservation_release: + vm_reservation_release(rsv); +err_register_map: + pmem2_register_mapping(map); + return ret; } diff --git a/src/libpmem2/vm_reservation.c b/src/libpmem2/vm_reservation.c index a9b51f11739e4242c054640eb0f40d90c5c07144..ae5f6c9f1b0c55f536d0489c47a59da8730b586d 100644 --- a/src/libpmem2/vm_reservation.c +++ b/src/libpmem2/vm_reservation.c @@ -8,7 +8,6 @@ #include "alloc.h" #include "map.h" #include "pmem2_utils.h" -#include "os_thread.h" #include "ravl_interval.h" #include "sys_util.h" #include "vm_reservation.h" @@ -49,19 +48,20 @@ pmem2_vm_reservation_get_size(struct pmem2_vm_reservation *rsv) * mapping_min - return min boundary for mapping */ static size_t -mapping_min(void *map) +mapping_min(void *addr) { - return (size_t)pmem2_map_get_address(map); + struct pmem2_map *map = (struct pmem2_map *)addr; + return (size_t)map->addr; } /* * mapping_max - return max boundary for mapping */ static size_t -mapping_max(void *map) +mapping_max(void *addr) { - return (size_t)pmem2_map_get_address(map) + - pmem2_map_get_size(map); + struct pmem2_map *map = (struct pmem2_map *)addr; + return (size_t)map->addr + map->content_length; } /* @@ -70,10 +70,9 @@ mapping_max(void *map) static int vm_reservation_init(struct pmem2_vm_reservation *rsv) { - os_rwlock_init(&rsv->lock); + util_rwlock_init(&rsv->lock); rsv->itree = ravl_interval_new(mapping_min, mapping_max); - if (!rsv->itree) return -1; @@ -87,6 +86,7 @@ static void vm_reservation_fini(struct pmem2_vm_reservation *rsv) { ravl_interval_delete(rsv->itree); + util_rwlock_destroy(&rsv->lock); } /* @@ -99,24 +99,26 @@ pmem2_vm_reservation_new(struct pmem2_vm_reservation **rsv_ptr, PMEM2_ERR_CLR(); *rsv_ptr = NULL; - unsigned long long gran = Mmap_align; - - if (addr && (unsigned long long)addr % gran) { + /* + * base address has to be aligned to the allocation granularity + * on Windows, and to page size otherwise + */ + if (addr && (unsigned long long)addr % Mmap_align) { ERR("address %p is not a multiple of 0x%llx", addr, - gran); + Mmap_align); return PMEM2_E_ADDRESS_UNALIGNED; } - if (size % gran) { + /* the size must always be a multiple of the page size */ + if (size % Pagesize) { ERR("reservation size %zu is not a multiple of %llu", - size, gran); + size, Pagesize); return PMEM2_E_LENGTH_UNALIGNED; } int ret; struct pmem2_vm_reservation *rsv = pmem2_malloc( sizeof(struct pmem2_vm_reservation), &ret); - if (ret) return ret; @@ -158,8 +160,7 @@ pmem2_vm_reservation_delete(struct pmem2_vm_reservation **rsv_ptr) /* check if reservation contains any mapping */ if (vm_reservation_map_find(rsv, 0, rsv->size)) { - ERR("vm reservation %p already contains a mapping", - rsv); + ERR("vm reservation %p isn't empty", rsv); return PMEM2_E_VM_RESERVATION_NOT_EMPTY; } @@ -174,20 +175,20 @@ pmem2_vm_reservation_delete(struct pmem2_vm_reservation **rsv_ptr) } /* - * vm_reservation_map_register -- register mapping in the mappings tree - * of reservation structure + * vm_reservation_map_register_release -- register mapping in the mappings tree + * of reservation structure and release previously acquired lock regardless + * of the success or failure of the function. */ int -vm_reservation_map_register(struct pmem2_vm_reservation *rsv, +vm_reservation_map_register_release(struct pmem2_vm_reservation *rsv, struct pmem2_map *map) { - util_rwlock_wrlock(&rsv->lock); int ret = ravl_interval_insert(rsv->itree, map); util_rwlock_unlock(&rsv->lock); if (ret == -EEXIST) { - ERR("Mapping %p in the reservation %p already exists", - map, rsv); + ERR( + "mapping at the given region of the reservation already exist"); return PMEM2_E_MAPPING_EXISTS; } @@ -195,17 +196,17 @@ vm_reservation_map_register(struct pmem2_vm_reservation *rsv, } /* - * vm_reservation_map_unregister -- unregister mapping from the mapping tree - * of reservation structure + * vm_reservation_map_unregister_release -- unregister mapping from the mapping + * tree of reservation structure and release previously acquired lock regardless + * of the success or failure of the function. */ int -vm_reservation_map_unregister(struct pmem2_vm_reservation *rsv, +vm_reservation_map_unregister_release(struct pmem2_vm_reservation *rsv, struct pmem2_map *map) { int ret = 0; struct ravl_interval_node *node; - util_rwlock_wrlock(&rsv->lock); node = ravl_interval_find_equal(rsv->itree, map); if (node) { ret = ravl_interval_remove(rsv->itree, node); @@ -220,12 +221,12 @@ vm_reservation_map_unregister(struct pmem2_vm_reservation *rsv, } /* - * vm_reservation_map_find -- find the earliest mapping overlapping with - * (addr, addr+size) range + * vm_reservation_map_find -- find the earliest mapping overlapping + * with (addr, addr+size) range */ struct pmem2_map * -vm_reservation_map_find(struct pmem2_vm_reservation *rsv, size_t reserv_offset, - size_t len) +vm_reservation_map_find(struct pmem2_vm_reservation *rsv, + size_t reserv_offset, size_t len) { struct pmem2_map map; map.addr = (char *)rsv->addr + reserv_offset; @@ -233,12 +234,43 @@ vm_reservation_map_find(struct pmem2_vm_reservation *rsv, size_t reserv_offset, struct ravl_interval_node *node; - util_rwlock_rdlock(&rsv->lock); node = ravl_interval_find(rsv->itree, &map); - util_rwlock_unlock(&rsv->lock); if (!node) return NULL; return (struct pmem2_map *)ravl_interval_data(node); } + +/* + * vm_reservation_map_find_acquire -- find the earliest mapping overlapping + * with (addr, addr+size) range. This function acquires a lock and keeps it + * until next release operation. + */ +struct pmem2_map * +vm_reservation_map_find_acquire(struct pmem2_vm_reservation *rsv, + size_t reserv_offset, size_t len) +{ + struct pmem2_map map; + map.addr = (char *)rsv->addr + reserv_offset; + map.content_length = len; + + struct ravl_interval_node *node; + + util_rwlock_wrlock(&rsv->lock); + node = ravl_interval_find(rsv->itree, &map); + + if (!node) + return NULL; + + return (struct pmem2_map *)ravl_interval_data(node); +} + +/* + * vm_reservation_release -- releases previously acquired lock + */ +void +vm_reservation_release(struct pmem2_vm_reservation *rsv) +{ + util_rwlock_unlock(&rsv->lock); +} diff --git a/src/libpmem2/vm_reservation.h b/src/libpmem2/vm_reservation.h index 3b476c085077b0e736fa2611a7143f7b9241b42d..bcae6ef23ffce173e81cffb608edb0671c9185ed 100644 --- a/src/libpmem2/vm_reservation.h +++ b/src/libpmem2/vm_reservation.h @@ -16,11 +16,15 @@ struct pmem2_vm_reservation { os_rwlock_t lock; }; -int vm_reservation_map_register(struct pmem2_vm_reservation *rsv, +int vm_reservation_map_register_release(struct pmem2_vm_reservation *rsv, struct pmem2_map *map); -int vm_reservation_map_unregister(struct pmem2_vm_reservation *rsv, +int vm_reservation_map_unregister_release(struct pmem2_vm_reservation *rsv, struct pmem2_map *map); struct pmem2_map *vm_reservation_map_find(struct pmem2_vm_reservation *rsv, size_t reserv_offset, size_t len); +struct pmem2_map *vm_reservation_map_find_acquire( + struct pmem2_vm_reservation *rsv, size_t reserv_offset, + size_t len); +void vm_reservation_release(struct pmem2_vm_reservation *rsv); #endif /* vm_reservation.h */ diff --git a/src/libpmem2/vm_reservation_posix.c b/src/libpmem2/vm_reservation_posix.c index 612f822d9b21f636ce9b34a60e4e5a3aa0cc5d02..ecbe3a96d921f99c24ca0d72522d6296ba558ddc 100644 --- a/src/libpmem2/vm_reservation_posix.c +++ b/src/libpmem2/vm_reservation_posix.c @@ -32,8 +32,6 @@ vm_reservation_reserve_memory(void *addr, size_t size, void **raddr, */ #ifdef MAP_FIXED_NOREPLACE map_flag = MAP_FIXED_NOREPLACE; -#else - map_flag = 0; #endif } diff --git a/src/libpmem2/vm_reservation_windows.c b/src/libpmem2/vm_reservation_windows.c index ab63a0d433c70de1771e30e740709ba350766826..e8c3cacecf5358e2dafc93af506c1805b054bfef 100644 --- a/src/libpmem2/vm_reservation_windows.c +++ b/src/libpmem2/vm_reservation_windows.c @@ -84,9 +84,7 @@ vm_reservation_map_find_closest_prior(struct pmem2_vm_reservation *rsv, struct ravl_interval_node *node; - util_rwlock_rdlock(&rsv->lock); node = ravl_interval_find_closest_prior(rsv->itree, &map); - util_rwlock_unlock(&rsv->lock); if (!node) return NULL; @@ -108,9 +106,7 @@ vm_reservation_map_find_closest_later(struct pmem2_vm_reservation *rsv, struct ravl_interval_node *node; - util_rwlock_rdlock(&rsv->lock); node = ravl_interval_find_closest_later(rsv->itree, &map); - util_rwlock_unlock(&rsv->lock); if (!node) return NULL; diff --git a/src/test/pmem2_vm_reservation/TESTS.py b/src/test/pmem2_vm_reservation/TESTS.py index 154256681254108493007a0b2918a07461dfde25..0b07d7e70425387bc1566e721d6403cbb5ab0dd7 100755 --- a/src/test/pmem2_vm_reservation/TESTS.py +++ b/src/test/pmem2_vm_reservation/TESTS.py @@ -311,7 +311,7 @@ class TEST35(PMEM2_VM_RESERVATION_ASYNC): """ test_case = "test_vm_reserv_async_map_unmap_multiple_files" threads = 32 - ops_per_thread = 10000 + ops_per_thread = 1000 class TEST36(PMEM2_VM_RESERVATION_ASYNC_DEVDAX): diff --git a/src/test/pmem2_vm_reservation/pmem2_vm_reservation.c b/src/test/pmem2_vm_reservation/pmem2_vm_reservation.c index 33f43e2772cadd6254da4950e86f991a656c5838..6c433fca224ed860e1dec68157c28e991312f1ba 100644 --- a/src/test/pmem2_vm_reservation/pmem2_vm_reservation.c +++ b/src/test/pmem2_vm_reservation/pmem2_vm_reservation.c @@ -6,6 +6,9 @@ */ #include <stdbool.h> +#ifndef _WIN32 +#include <pthread.h> +#endif #include "config.h" #include "fault_injection.h" @@ -423,6 +426,7 @@ test_vm_reserv_map_file(const struct test_case *tc, ret = pmem2_map_new(&map, &cfg, src); UT_PMEM2_EXPECT_RETURN(ret, 0); + UT_ASSERTne(map, NULL); UT_ASSERTeq(pmem2_map_get_address(map), (char *)rsv_addr + rsv_offset); ret = pmem2_map_delete(&map); @@ -1010,43 +1014,54 @@ test_vm_reserv_map_invalid_granularity(const struct test_case *tc, #define MAX_THREADS 32 struct worker_args { - struct pmem2_config cfg; - struct pmem2_source *src; - struct pmem2_map **map; size_t n_ops; - os_mutex_t lock; + struct pmem2_vm_reservation *rsv; + size_t rsv_offset; + struct FHandle *fh; }; static void * -map_worker(void *arg) +map_unmap_worker(void *arg) { struct worker_args *warg = arg; - for (size_t n = 0; n < warg->n_ops; n++) { - if (!(*warg->map)) { - int ret = pmem2_map_new(warg->map, &warg->cfg, - warg->src); - if (ret != PMEM2_E_MAPPING_EXISTS) - UT_ASSERTeq(ret, 0); - } - } + struct pmem2_vm_reservation *rsv = warg->rsv; + struct FHandle *fh = warg->fh; - return NULL; -} + void *rsv_addr; + size_t rsv_offset; + size_t n_ops = warg->n_ops; + struct pmem2_config cfg; + struct pmem2_source *src; + struct pmem2_map *map = NULL; -static void * -unmap_worker(void *arg) -{ - struct worker_args *warg = arg; + rsv_addr = pmem2_vm_reservation_get_address(rsv); + rsv_offset = warg->rsv_offset; + + pmem2_config_init(&cfg); + pmem2_config_set_required_store_granularity(&cfg, + PMEM2_GRANULARITY_PAGE); + pmem2_config_set_vm_reservation(&cfg, rsv, rsv_offset); + PMEM2_SOURCE_FROM_FH(&src, fh); + + int ret; + for (size_t n = 0; n < n_ops; n++) { + if (map == NULL) { + ret = pmem2_map_new(&map, &cfg, src); + UT_ASSERTeq(ret, 0); + UT_ASSERTeq(pmem2_map_get_address(map), + (char *)rsv_addr + rsv_offset); + } - for (size_t n = 0; n < warg->n_ops; n++) { - if (*(warg->map)) { - int ret = pmem2_map_delete(warg->map); - if (ret != PMEM2_E_MAPPING_NOT_FOUND) - UT_ASSERTeq(ret, 0); + if (map != NULL) { + ret = pmem2_map_delete(&map); + UT_ASSERTeq(ret, 0); + UT_ASSERTeq(map, NULL); } } + PMEM2_SOURCE_DELETE(&src); + return NULL; } @@ -1056,8 +1071,19 @@ run_worker(void *(worker_func)(void *arg), struct worker_args args[], { os_thread_t threads[MAX_THREADS]; +#ifdef _WIN32 for (size_t n = 0; n < n_threads; n++) THREAD_CREATE(&threads[n], NULL, worker_func, &args[n]); +#else + pthread_attr_t attr; + pthread_attr_init(&attr); + /* thread stack size is set to 16MB */ + pthread_attr_setstacksize(&attr, (1 << 24)); + + for (size_t n = 0; n < n_threads; n++) + THREAD_CREATE(&threads[n], (os_thread_attr_t *)&attr, + worker_func, &args[n]); +#endif for (size_t n = 0; n < n_threads; n++) THREAD_JOIN(&threads[n], NULL); @@ -1073,32 +1099,30 @@ test_vm_reserv_async_map_unmap_multiple_files(const struct test_case *tc, int argc, char *argv[]) { if (argc < 4) - UT_FATAL("usage: test_vm_reserv_async_map_unmap_multiple_files" + UT_FATAL("usage: test_vm_reserv_async_map_unmap_multiple_files " "<file> <size> <threads> <ops/thread>"); + size_t n_threads = ATOU(argv[2]); + if (n_threads > MAX_THREADS) + UT_FATAL("threads %zu > MAX_THREADS %u", + n_threads, MAX_THREADS); + char *file = argv[0]; size_t size = ATOUL(argv[1]); - size_t n_threads = ATOU(argv[2]); size_t ops_per_thread = ATOU(argv[3]); size_t alignment = get_align_by_filename(file); void *rsv_addr; size_t rsv_size; size_t rsv_offset; - struct pmem2_config cfg; - struct pmem2_map *map[MAX_THREADS]; struct pmem2_vm_reservation *rsv; - struct pmem2_source *src; struct FHandle *fh; struct worker_args args[MAX_THREADS]; - for (size_t n = 0; n < n_threads; n++) - map[n] = NULL; - /* * reservation will fit as many files as there are threads + 1, * it's expanded by the length of alignment, for the device DAX */ - rsv_size = (n_threads + 1) * (size / 2) + alignment; + rsv_size = n_threads * size + alignment; int ret = pmem2_vm_reservation_new(&rsv, NULL, rsv_size); UT_ASSERTeq(ret, 0); @@ -1107,32 +1131,28 @@ test_vm_reserv_async_map_unmap_multiple_files(const struct test_case *tc, UT_ASSERTne(rsv_addr, NULL); UT_ASSERTeq(pmem2_vm_reservation_get_size(rsv), rsv_size); + fh = UT_FH_OPEN(FH_FD, file, FH_RDWR); + /* in case of DevDax */ size_t offset_align = offset_align_to_devdax(rsv_addr, alignment); - ut_pmem2_prepare_config(&cfg, &src, &fh, FH_FD, file, 0, 0, FH_RDWR); - /* - * the offset increases by the half of file size. + * the offset increases by the size of the file. */ for (size_t n = 0; n < n_threads; n++) { /* calculate offset for each thread */ - rsv_offset = ALIGN_DOWN((n % n_threads) * (size / 2), alignment) - + offset_align; - pmem2_config_set_vm_reservation(&cfg, rsv, rsv_offset); + rsv_offset = ALIGN_DOWN(n * size, alignment) + offset_align; - args[n].cfg = cfg; - args[n].src = src; - args[n].map = &(map[n]); args[n].n_ops = ops_per_thread; + args[n].rsv = rsv; + args[n].rsv_offset = rsv_offset; + args[n].fh = fh; } - run_worker(map_worker, args, n_threads); - run_worker(unmap_worker, args, n_threads); + run_worker(map_unmap_worker, args, n_threads); ret = pmem2_vm_reservation_delete(&rsv); UT_ASSERTeq(ret, 0); - PMEM2_SOURCE_DELETE(&src); UT_FH_CLOSE(fh); return 4;