kernel_optimize_test/fs
Jiri Pirko 1f10206cf8 getrusage: fill ru_maxrss value
Make ->ru_maxrss value in struct rusage filled accordingly to rss hiwater
mark.  This struct is filled as a parameter to getrusage syscall.
->ru_maxrss value is set to KBs which is the way it is done in BSD
systems.  /usr/bin/time (gnu time) application converts ->ru_maxrss to KBs
which seems to be incorrect behavior.  Maintainer of this util was
notified by me with the patch which corrects it and cc'ed.

To make this happen we extend struct signal_struct by two fields.  The
first one is ->maxrss which we use to store rss hiwater of the task.  The
second one is ->cmaxrss which we use to store highest rss hiwater of all
task childs.  These values are used in k_getrusage() to actually fill
->ru_maxrss.  k_getrusage() uses current rss hiwater value directly if mm
struct exists.

Note:
exec() clear mm->hiwater_rss, but doesn't clear sig->maxrss.
it is intetionally behavior. *BSD getrusage have exec() inheriting.

test programs
========================================================

getrusage.c
===========
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <signal.h>
 #include <sys/mman.h>

 #include "common.h"

 #define err(str) perror(str), exit(1)

int main(int argc, char** argv)
{
	int status;

	printf("allocate 100MB\n");
	consume(100);

	printf("testcase1: fork inherit? \n");
	printf("  expect: initial.self ~= child.self\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		show_rusage("fork child");
		_exit(0);
	}
	printf("\n");

	printf("testcase2: fork inherit? (cont.) \n");
	printf("  expect: initial.children ~= 100MB, but child.children = 0\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		show_rusage("child");
		_exit(0);
	}
	printf("\n");

	printf("testcase3: fork + malloc \n");
	printf("  expect: child.self ~= initial.self + 50MB\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		printf("allocate +50MB\n");
		consume(50);
		show_rusage("fork child");
		_exit(0);
	}
	printf("\n");

	printf("testcase4: grandchild maxrss\n");
	printf("  expect: post_wait.children ~= 300MB\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
		show_rusage("post_wait");
	} else {
		system("./child -n 0 -g 300");
		_exit(0);
	}
	printf("\n");

	printf("testcase5: zombie\n");
	printf("  expect: pre_wait ~= initial, IOW the zombie process is not accounted.\n");
	printf("          post_wait ~= 400MB, IOW wait() collect child's max_rss. \n");
	show_rusage("initial");
	if (__fork()) {
		sleep(1); /* children become zombie */
		show_rusage("pre_wait");
		wait(&status);
		show_rusage("post_wait");
	} else {
		system("./child -n 400");
		_exit(0);
	}
	printf("\n");

	printf("testcase6: SIG_IGN\n");
	printf("  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).\n");
	show_rusage("initial");
	signal(SIGCHLD, SIG_IGN);
	if (__fork()) {
		sleep(1); /* children become zombie */
		show_rusage("after_zombie");
	} else {
		system("./child -n 500");
		_exit(0);
	}
	printf("\n");
	signal(SIGCHLD, SIG_DFL);

	printf("testcase7: exec (without fork) \n");
	printf("  expect: initial ~= exec \n");
	show_rusage("initial");
	execl("./child", "child", "-v", NULL);

	return 0;
}

child.c
=======
 #include <sys/types.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>

 #include "common.h"

int main(int argc, char** argv)
{
	int status;
	int c;
	long consume_size = 0;
	long grandchild_consume_size = 0;
	int show = 0;

	while ((c = getopt(argc, argv, "n:g:v")) != -1) {
		switch (c) {
		case 'n':
			consume_size = atol(optarg);
			break;
		case 'v':
			show = 1;
			break;
		case 'g':

			grandchild_consume_size = atol(optarg);
			break;
		default:
			break;
		}
	}

	if (show)
		show_rusage("exec");

	if (consume_size) {
		printf("child alloc %ldMB\n", consume_size);
		consume(consume_size);
	}

	if (grandchild_consume_size) {
		if (fork()) {
			wait(&status);
		} else {
			printf("grandchild alloc %ldMB\n", grandchild_consume_size);
			consume(grandchild_consume_size);

			exit(0);
		}
	}

	return 0;
}

common.c
========
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <signal.h>
 #include <sys/mman.h>

 #include "common.h"
 #define err(str) perror(str), exit(1)

void show_rusage(char *prefix)
{
    	int err, err2;
    	struct rusage rusage_self;
    	struct rusage rusage_children;

    	printf("%s: ", prefix);
    	err = getrusage(RUSAGE_SELF, &rusage_self);
    	if (!err)
    		printf("self %ld ", rusage_self.ru_maxrss);
    	err2 = getrusage(RUSAGE_CHILDREN, &rusage_children);
    	if (!err2)
    		printf("children %ld ", rusage_children.ru_maxrss);

    	printf("\n");
}

/* Some buggy OS need this worthless CPU waste. */
void make_pagefault(void)
{
	void *addr;
	int size = getpagesize();
	int i;

	for (i=0; i<1000; i++) {
		addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
		if (addr == MAP_FAILED)
			err("make_pagefault");
		memset(addr, 0, size);
		munmap(addr, size);
	}
}

void consume(int mega)
{
    	size_t sz = mega * 1024 * 1024;
    	void *ptr;

    	ptr = malloc(sz);
    	memset(ptr, 0, sz);
	make_pagefault();
}

pid_t __fork(void)
{
	pid_t pid;

	pid = fork();
	make_pagefault();

	return pid;
}

common.h
========
void show_rusage(char *prefix);
void make_pagefault(void);
void consume(int mega);
pid_t __fork(void);

FreeBSD result (expected result)
========================================================
allocate 100MB
testcase1: fork inherit?
  expect: initial.self ~= child.self
initial: self 103492 children 0
fork child: self 103540 children 0

testcase2: fork inherit? (cont.)
  expect: initial.children ~= 100MB, but child.children = 0
initial: self 103540 children 103540
child: self 103564 children 0

testcase3: fork + malloc
  expect: child.self ~= initial.self + 50MB
initial: self 103564 children 103564
allocate +50MB
fork child: self 154860 children 0

testcase4: grandchild maxrss
  expect: post_wait.children ~= 300MB
initial: self 103564 children 154860
grandchild alloc 300MB
post_wait: self 103564 children 308720

testcase5: zombie
  expect: pre_wait ~= initial, IOW the zombie process is not accounted.
          post_wait ~= 400MB, IOW wait() collect child's max_rss.
initial: self 103564 children 308720
child alloc 400MB
pre_wait: self 103564 children 308720
post_wait: self 103564 children 411312

testcase6: SIG_IGN
  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
initial: self 103564 children 411312
child alloc 500MB
after_zombie: self 103624 children 411312

testcase7: exec (without fork)
  expect: initial ~= exec
initial: self 103624 children 411312
exec: self 103624 children 411312

Linux result (actual test result)
========================================================
allocate 100MB
testcase1: fork inherit?
  expect: initial.self ~= child.self
initial: self 102848 children 0
fork child: self 102572 children 0

testcase2: fork inherit? (cont.)
  expect: initial.children ~= 100MB, but child.children = 0
initial: self 102876 children 102644
child: self 102572 children 0

testcase3: fork + malloc
  expect: child.self ~= initial.self + 50MB
initial: self 102876 children 102644
allocate +50MB
fork child: self 153804 children 0

testcase4: grandchild maxrss
  expect: post_wait.children ~= 300MB
initial: self 102876 children 153864
grandchild alloc 300MB
post_wait: self 102876 children 307536

testcase5: zombie
  expect: pre_wait ~= initial, IOW the zombie process is not accounted.
          post_wait ~= 400MB, IOW wait() collect child's max_rss.
initial: self 102876 children 307536
child alloc 400MB
pre_wait: self 102876 children 307536
post_wait: self 102876 children 410076

testcase6: SIG_IGN
  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
initial: self 102876 children 410076
child alloc 500MB
after_zombie: self 102880 children 410076

testcase7: exec (without fork)
  expect: initial ~= exec
initial: self 102880 children 410076
exec: self 102880 children 410076

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-09-23 07:39:30 -07:00
..
9p
adfs
affs
afs seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
autofs trivial: remove unnecessary semicolons 2009-09-21 15:14:58 +02:00
autofs4
befs const: mark remaining super_operations const 2009-09-22 07:17:24 -07:00
bfs
btrfs Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial 2009-09-22 07:51:45 -07:00
cachefiles
cifs Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial 2009-09-22 07:51:45 -07:00
coda
configfs
cramfs
debugfs
devpts Move magic numbers into magic.h 2009-09-23 07:39:28 -07:00
dlm seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
ecryptfs const: mark remaining address_space_operations const 2009-09-22 07:17:24 -07:00
efs
exofs
exportfs
ext2 const: make block_device_operations const 2009-09-22 07:17:25 -07:00
ext3 const: make struct super_block::s_qcop const 2009-09-22 07:17:24 -07:00
ext4 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial 2009-09-22 07:51:45 -07:00
fat fat: Opencode sync_page_range_nolock() 2009-09-14 17:08:17 +02:00
freevxfs
fscache
fuse Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse 2009-09-18 09:23:03 -07:00
gfs2 trivial: fix typo "to to" in multiple files 2009-09-21 15:14:55 +02:00
hfs
hfsplus
hostfs
hpfs
hppfs
hugetlbfs Move magic numbers into magic.h 2009-09-23 07:39:28 -07:00
isofs
jbd jbd: Annotate transaction start also for journal_restart() 2009-09-16 17:44:10 +02:00
jbd2 seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
jffs2 const: mark remaining export_operations const 2009-09-22 07:17:24 -07:00
jfs
lockd Merge branch 'for-2.6.32' of git://linux-nfs.org/~bfields/linux 2009-09-22 07:54:33 -07:00
minix
ncpfs
nfs seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
nfs_common
nfsd seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
nilfs2 const: mark remaining inode_operations as const 2009-09-22 07:17:24 -07:00
nls
notify
ntfs ntfs: remove ntfs_file_write 2009-09-23 07:39:29 -07:00
ocfs2 seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
omfs const: mark remaining inode_operations as const 2009-09-22 07:17:24 -07:00
openpromfs
partitions const: make block_device_operations const 2009-09-22 07:17:25 -07:00
proc seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
qnx4 qnx4: remove write support 2009-09-23 07:39:30 -07:00
quota const: make struct super_block::s_qcop const 2009-09-22 07:17:24 -07:00
ramfs
reiserfs const: make struct super_block::s_qcop const 2009-09-22 07:17:24 -07:00
romfs const: mark remaining inode_operations as const 2009-09-22 07:17:24 -07:00
smbfs smbfs: read buffer overflow 2009-09-23 07:39:27 -07:00
squashfs const: mark remaining super_operations const 2009-09-22 07:17:24 -07:00
sysfs
sysv
ubifs const: mark remaining address_space_operations const 2009-09-22 07:17:24 -07:00
udf udf: Fix possible corruption when close races with write 2009-09-14 19:13:01 +02:00
ufs
xfs Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial 2009-09-22 07:51:45 -07:00
aio.c aio.c: move EXPORT* macros to line after function 2009-09-23 07:39:29 -07:00
anon_inodes.c anonfd: split interface into file creation and install 2009-09-23 07:39:29 -07:00
attr.c
bad_inode.c
binfmt_aout.c
binfmt_elf_fdpic.c mm: add get_dump_page 2009-09-22 07:17:40 -07:00
binfmt_elf.c mm: add get_dump_page 2009-09-22 07:17:40 -07:00
binfmt_em86.c
binfmt_flat.c
binfmt_misc.c
binfmt_script.c
binfmt_som.c
bio-integrity.c
bio.c
block_dev.c const: make block_device_operations const 2009-09-22 07:17:25 -07:00
buffer.c fs/buffer.c: clean up EXPORT* macros 2009-09-23 07:39:29 -07:00
char_dev.c
compat_binfmt_elf.c
compat_ioctl.c
compat.c fix compat_sys_utimensat() 2009-09-23 07:39:30 -07:00
dcache.c
dcookies.c
direct-io.c
drop_caches.c
eventfd.c anonfd: split interface into file creation and install 2009-09-23 07:39:29 -07:00
eventpoll.c
exec.c getrusage: fill ru_maxrss value 2009-09-23 07:39:30 -07:00
fcntl.c
fifo.c
file_table.c
file.c
filesystems.c
fs_struct.c
fs-writeback.c writeback: fix possible bdi writeback refcounting problem 2009-09-16 15:18:53 +02:00
generic_acl.c
inode.c fs: turn iprune_mutex into rwsem 2009-09-23 07:39:29 -07:00
internal.h
ioctl.c
ioprio.c
Kconfig tmpfs: depend on shmem 2009-09-22 07:17:41 -07:00
Kconfig.binfmt
libfs.c
locks.c const: make lock_manager_operations const 2009-09-22 07:17:25 -07:00
Makefile
mbcache.c
mpage.c
namei.c
namespace.c
nfsctl.c
no-block.c
open.c
pipe.c
pnode.c
pnode.h
posix_acl.c
read_write.c
read_write.h
readdir.c
select.c poll/select: avoid arithmetic overflow in __estimate_accuracy() 2009-09-23 07:39:27 -07:00
seq_file.c
signalfd.c
splice.c Merge branch 'for-2.6.32' of git://git.kernel.dk/linux-2.6-block 2009-09-14 17:55:15 -07:00
stack.c
stat.c
super.c const: mark remaining super_operations const 2009-09-22 07:17:24 -07:00
sync.c fs/buffer.c: clean up EXPORT* macros 2009-09-23 07:39:29 -07:00
timerfd.c
utimes.c
xattr_acl.c
xattr.c