Date:	Sun, 02 Jan 2011 19:37:42 +0200
From: Vasileios Karakasis <bkk@cslab.ece.ntua.gr>
To: linux-numa@vger.kernel.org
Cc: "'Kornilios Kourtis'" <kkourt@cslab.ece.ntua.gr>
Subject: realloc function

Hi,

I am submitting a patch for a realloc function that might be useful. The
proposed numa_realloc() is merely a wrapper to mremap(), which it calls
with the flag MREMAP_MAYMOVE. The policy of the vm area is copied by the
kernel in case of moving. I am also submitting a test program, that
keeps expanding an initial allocation until a limit is reached and
checks the mempolicy of the expanded area in every iteration.

My use case is a dynamic array implementation which uses realloc() to
dynamically expand the array and I want to convert it to a numa-aware
implementation.

PS: I could also submit a brief manpage entry for the new function, if
you agree.

Best regards,
Vasileios Karakasis

Jan 11, 2011

Hi,
I am submitting the final patch. Essentially, it is my original enhanced
with some comments about the rationale as we discussed it here and an
entry + brief description in the man page.

and acked by Andi Kleen


diff -urN numactl-2.0.6-orig/libnuma.c numactl-2.0.6/libnuma.c
--- numactl-2.0.6-orig/libnuma.c	2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/libnuma.c	2011-01-10 23:49:58.000000000 +0200
@@ -871,6 +871,23 @@
 	return mem;
 } 
 
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
+{
+	char *mem;
+	mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
+	if (mem == (char *)-1)
+		return NULL;
+	/*
+	 *	The memory policy of the allocated pages is preserved by mremap(), so
+	 *	there is no need to (re)set it here. If the policy of the original
+	 *	allocation is not set, the new pages will be allocated according to the
+	 *	process' mempolicy. Trying to allocate explicitly the new pages on the
+	 *	same node as the original ones would require changing the policy of the
+	 *	newly allocated pages, which violates the numa_realloc() semantics.
+	 */ 
+	return mem;
+}
+
 void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
 {
 	char *mem;
diff -urN numactl-2.0.6-orig/Makefile numactl-2.0.6/Makefile
--- numactl-2.0.6-orig/Makefile	2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/Makefile	2011-01-03 23:22:57.000000000 +0200
@@ -31,7 +31,7 @@
 	      test/after test/before threadtest test_move_pages \
 	      test/mbind_mig_pages test/migrate_pages \
 	      migratepages migspeed migspeed.o libnuma.a \
-	      test/move_pages
+	      test/move_pages test/realloc_test
 SOURCES := bitops.c libnuma.c distance.c memhog.c numactl.c numademo.c \
 	numamon.c shm.c stream_lib.c stream_main.c syscall.c util.c mt.c \
 	clearcache.c test/*.c
@@ -43,7 +43,7 @@
 all: numactl migratepages migspeed libnuma.so numademo numamon memhog \
      test/tshared stream test/mynode test/pagesize test/ftok test/prefered \
      test/randmap test/nodemap test/distance test/tbitmap test/move_pages \
-     test/mbind_mig_pages test/migrate_pages libnuma.a
+     test/mbind_mig_pages test/migrate_pages test/realloc_test libnuma.a
 
 numactl: numactl.o util.o shm.o bitops.o libnuma.so
 
@@ -123,6 +123,8 @@
 
 test/migrate_pages: test/migrate_pages.c libnuma.so
 
+test/realloc_test: test/realloc_test.c libnuma.so
+
 .PHONY: install all clean html depend
 
 MANPAGES := numa.3 numactl.8 numastat.8 migratepages.8 migspeed.8
diff -urN numactl-2.0.6-orig/numa.3 numactl-2.0.6/numa.3
--- numactl-2.0.6-orig/numa.3	2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/numa.3	2011-01-10 23:39:02.000000000 +0200
@@ -87,6 +87,8 @@
 .BI "void *numa_alloc_interleaved_subset(size_t " size ",  struct bitmask *" nodemask );
 .BI "void *numa_alloc(size_t " size );
 .br
+.BI "void *numa_realloc(void *"old_addr ", size_t " old_size ", size_t " new_size );
+.br
 .BI "void numa_free(void *" start ", size_t " size );
 .sp
 .BI "int numa_run_on_node(int " node );
@@ -599,6 +601,39 @@
 .BR numa_free ().
 On errors NULL is returned.
 
+.BR numa_realloc ()
+changes the size of the memory area pointed to by
+.I old_addr
+from
+.I old_size
+to
+.I new_size.
+The memory area pointed to by
+.I old_addr
+must have been allocated with one of the
+.BR numa_alloc*
+functions.
+The
+.I new_size
+will be rounded up to a multiple of the system page size. The contents of the
+memory area will be unchanged to the minimum of the old and new sizes; newly
+allocated memory will be uninitialized. The memory policy (and node bindings)
+associated with the original memory area will be preserved in the resized
+area. For example, if the initial area was allocated with a call to
+.BR numa_alloc_onnode(),
+then the new pages (if the area is enlarged) will be allocated on the same node.
+However, if no memory policy was set for the original area, then
+.BR numa_realloc ()
+cannot guarantee that the new pages will be allocated on the same node. On
+success, the address of the resized area is returned (which might be different
+from that of the initial area), otherwise NULL is returned and
+.I errno
+is set to indicate the error. The pointer returned by
+.BR numa_realloc ()
+is suitable for passing to
+.BR numa_free ().
+
+
 .BR numa_free ()
 frees
 .I size
diff -urN numactl-2.0.6-orig/numa.h numactl-2.0.6/numa.h
--- numactl-2.0.6-orig/numa.h	2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/numa.h	2011-01-11 00:06:12.000000000 +0200
@@ -212,6 +212,8 @@
 void *numa_alloc_local(size_t size);
 /* Allocation with current policy */
 void *numa_alloc(size_t size);
+/* Change the size of a memory area preserving the memory policy */
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
 /* Free memory allocated by the functions above */
 void numa_free(void *mem, size_t size);
 
diff -urN numactl-2.0.6-orig/test/realloc_test.c numactl-2.0.6/test/realloc_test.c
--- numactl-2.0.6-orig/test/realloc_test.c	1970-01-01 02:00:00.000000000 +0200
+++ numactl-2.0.6/test/realloc_test.c	2011-01-10 23:55:37.000000000 +0200
@@ -0,0 +1,108 @@
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include "numa.h"
+#include "numaif.h"
+
+#define DEFAULT_NR_PAGES	1024
+
+static int parse_int(const char *str)
+{
+	char	*endptr;
+	long	ret = strtol(str, &endptr, 0);
+	if (*endptr != '\0') {
+		fprintf(stderr, "[error] strtol() failed: parse error: %s\n", endptr);
+		exit(1);
+	}
+
+	if (errno == ERANGE)
+		fprintf(stderr, "[warning] strtol() out of range\n");
+
+	if (ret > INT_MAX || ret < INT_MIN) {
+		fprintf(stderr, "[warning] parse_int() out of range\n");
+		ret = (ret > 0) ? INT_MAX : INT_MIN;
+	}
+
+	return (int) ret;
+}
+
+int main(int argc, char **argv)
+{
+	char	*mem;
+	int		page_size = numa_pagesize();
+	int		node = 0;
+	int		nr_pages = DEFAULT_NR_PAGES;
+
+	if (numa_available() < 0) {
+		fprintf(stderr, "numa is not available");
+		exit(1);
+	}
+
+	if (argc > 1)
+		node = parse_int(argv[1]);
+	if (argc > 2)
+		nr_pages = parse_int(argv[2]);
+	
+	mem = numa_alloc_onnode(page_size, node);
+
+	/* Store the policy of the newly allocated area */
+	unsigned long	nodemask;
+	int				mode;
+	int				nr_nodes = numa_num_possible_nodes();
+	if (get_mempolicy(&mode, &nodemask, nr_nodes, mem,
+					  MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+		perror("get_mempolicy() failed");
+		exit(1);
+	}
+
+	/* Print some info */
+	printf("Page size: %d\n", page_size);
+	printf("Pages realloc'ed: %d\n", nr_pages);
+	printf("Allocate data in node: %d\n", node);
+
+	int i;
+	int nr_inplace = 0;
+	int nr_moved   = 0;
+	for (i = 0; i < nr_pages; i++) {
+		/* Enlarge mem with one more page */
+		char	*new_mem = numa_realloc(mem, (i+1)*page_size, (i+2)*page_size);
+		if (!new_mem) {
+			perror("numa_realloc() failed");
+			exit(1);
+		}
+
+		if (new_mem == mem)
+			++nr_inplace;
+		else
+			++nr_moved;
+		mem = new_mem;
+
+		/* Check the policy of the realloc'ed area */
+		unsigned long	realloc_nodemask;
+		int				realloc_mode;
+		if (get_mempolicy(&realloc_mode, &realloc_nodemask,
+						  nr_nodes, mem, MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+			perror("get_mempolicy() failed");
+			exit(1);
+		}
+
+		assert(realloc_nodemask == nodemask &&
+			   realloc_mode == mode && "policy changed");
+	}
+
+	/* Shrink to the original size */
+	mem = numa_realloc(mem, (nr_pages + 1)*page_size, page_size);
+	if (!mem) {
+		perror("numa_realloc() failed");
+		exit(1);
+	}
+
+	numa_free(mem, page_size);
+	printf("In-place reallocs: %d\n", nr_inplace);
+	printf("Moved reallocs: %d\n", nr_moved);
+	return 0;
+}
diff -urN numactl-2.0.6-orig/versions.ldscript numactl-2.0.6/versions.ldscript
--- numactl-2.0.6-orig/versions.ldscript	2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/versions.ldscript	2011-01-10 18:36:37.000000000 +0200
@@ -87,6 +87,7 @@
     numa_alloc_interleaved_subset;
     numa_alloc_local;
     numa_alloc_onnode;
+    numa_realloc;
     numa_allocate_cpumask;
     numa_allocate_nodemask;
     numa_available;
