malloc申请内存背后

参考学习于:https://sploitfun.wordpress.com/2015/02/11/syscalls-used-by-malloc/

其实相当于翻译吧,自己也是要实践一下的

我们看看下面这副图,malloc是通过调用brk或者mmap系统调用去获得内存的

sGVsoF3VjOo5z1XDVPhU9dw.png

brk

brk从内核获得内存是通过增加program break location实现的(里面的数据不是填充0的)

最开始start_brk和heap段的末端(brk)指向同一个位置

当ASLR关闭的时候,start_brk和brk是指向data或bss段的末尾

那么当ASLR开启的时候,start_brk和brk跟data或bss段有一个随机的brk offset(应该说得就是随机偏移,难道还跟brk有关?)

blob.png

上面这张图就可以看到start_brk是heap段的开始,brk (program break brk )是heap段的结束

还是编程看看吧

/* sbrk and brk example */
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
int main()
{
        void *curr_brk, *tmp_brk = NULL;
        printf("Welcome to sbrk example:%d\n", getpid());
        /* sbrk(0) gives current program break location */
        tmp_brk = curr_brk = sbrk(0);
        printf("Program Break Location1:%p\n", curr_brk);
        getchar();
        /* brk(addr) increments/decrements program break location */
        brk(curr_brk+4096);
        curr_brk = sbrk(0);
        printf("Program break Location2:%p\n", curr_brk);
        getchar();
        brk(tmp_brk);
        curr_brk = sbrk(0);
        printf("Program Break Location3:%p\n", curr_brk);
        getchar();
        return 0;
}

就是用sbrk获取Program Break Location,brk去增加或者减少Program Break Location

编译

gcc -o brk ./brk.c

分析输出结果:

Before increasing program break:

没有heap段,下面的应该是由于系统默认开了ASLR,所以Program Break Location1没在程序的data或bss段最后

[email protected]:~/learn/learnMalloc# ./brk 
Welcome to sbrk example:4110
Program Break Location1:0x9f22000
[email protected]:~# cat /proc/4110/maps 
08048000-08049000 r-xp 00000000 08:01 1708324    /root/learn/learnMalloc/brk
08049000-0804a000 rw-p 00000000 08:01 1708324    /root/learn/learnMalloc/brk
b757e000-b757f000 rw-p 00000000 00:00 0 
......

我们关闭ASLR

[email protected]:~/learn/learnMalloc# sudo -s
[email protected]:~/learn/learnMalloc# echo 0 >/proc/sys/kernel/randomize_va_space
[email protected]:~/learn/learnMalloc# exit
exit

再看看,就像样了

[email protected]:~/learn/learnMalloc# ./brk 
Welcome to sbrk example:4564
Program Break Location1:0x804a000
[email protected]:~# cat /proc/4564/maps 
08048000-08049000 r-xp 00000000 08:01 1708324    /root/learn/learnMalloc/brk
08049000-0804a000 rw-p 00000000 08:01 1708324    /root/learn/learnMalloc/brk
b7e12000-b7e13000 rw-p 00000000 00:00 0
......

start_brk = brk = end_data = 0x804a000.

After increasing program break location: 

在下面我可以看到一个heap段,program break location也确实增大了4096,即0x1000

此时

start_brk = end_data = 0x804a000

brk = 0x804b000.

[email protected]:~/learn/learnMalloc# ./brk 
Welcome to sbrk example:4564
Program Break Location1:0x804a000
Program break Location2:0x804b000
[email protected]:~# cat /proc/4564/maps 
08048000-08049000 r-xp 00000000 08:01 1708324    /root/learn/learnMalloc/brk
08049000-0804a000 rw-p 00000000 08:01 1708324    /root/learn/learnMalloc/brk
0804a000-0804b000 rw-p 00000000 00:00 0          [heap]
b7e12000-b7e13000 rw-p 00000000 00:00 0 
......

0804a000-0804b000这个虚拟地址段中

rw-p             是权限标志(Read, Write, NoeXecute, Private)

00000000     是文件偏移,还没被文件映射的话这里是0

00:00            是Major/Minor device number,没被文件映射也是0

0                   是索引结点,没被文件映射也是0

[heap]           是heap段的意思

在调整到起始的位置,这时heap段没有了

[email protected]:~/learn/learnMalloc# ./brk 
Welcome to sbrk example:4564
Program Break Location1:0x804a000
Program break Location2:0x804b000
Program Break Location3:0x804a000
[email protected]:~# cat /proc/4564/maps 
08048000-08049000 r-xp 00000000 08:01 1708324    /root/learn/learnMalloc/brk
08049000-0804a000 rw-p 00000000 08:01 1708324    /root/learn/learnMalloc/brk
b7e12000-b7e13000 rw-p 00000000 00:00 0 
......

mmap

malloc使用mmap创建一个私有匿名的mapping段

首要的目的申请新的内存(这是用0填充的)

这新的内存会只给申请的进程使用

还是看看代码

/* Private anonymous mapping example using mmap syscall */
#include <stdio.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
void static inline errExit(const char* msg)
{
        printf("%s failed. Exiting the process\n", msg);
        exit(-1);
}
int main()
{
        int ret = -1;
        printf("Welcome to private anonymous mapping example::PID:%d\n", getpid());
        printf("Before mmap\n");
        getchar();
        char* addr = NULL;
        addr = mmap(NULL, (size_t)132*1024, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
        if (addr == MAP_FAILED)
                errExit("mmap");
        printf("After mmap\n");
        getchar();
        /* Unmap mapped region. */
        ret = munmap(addr, (size_t)132*1024);
        if(ret == -1)
                errExit("munmap");
        printf("After munmap\n");
        getchar();
        return 0;
}

在代码中,我们申请了132kb的内存,属性是可以读写,MAP属性是private和anonymous的

之后我们又Unmap掉

Before mmap

可以看到只有共享库libc.so and ld-linux.so有内存映射的段(memory mapping segment)

[email protected]:~# cat /proc/4830/maps 
08048000-08049000 r-xp 00000000 08:01 1708326    /root/learn/learnMalloc/mmap
08049000-0804a000 rw-p 00000000 08:01 1708326    /root/learn/learnMalloc/mmap
b7e12000-b7e13000 rw-p 00000000 00:00 0 
b7e13000-b7fb7000 r-xp 00000000 08:01 1311582    /lib/i386-linux-gnu/i686/cmov/libc-2.19.so
b7fb7000-b7fb9000 r--p 001a4000 08:01 1311582    /lib/i386-linux-gnu/i686/cmov/libc-2.19.so
b7fb9000-b7fba000 rw-p 001a6000 08:01 1311582    /lib/i386-linux-gnu/i686/cmov/libc-2.19.so
b7fba000-b7fbd000 rw-p 00000000 00:00 0 
b7fd7000-b7fdb000 rw-p 00000000 00:00 0 
b7fdb000-b7fdd000 r--p 00000000 00:00 0          [vvar]
b7fdd000-b7fdf000 r-xp 00000000 00:00 0          [vdso]
b7fdf000-b7ffe000 r-xp 00000000 08:01 1311723    /lib/i386-linux-gnu/ld-2.19.so
b7ffe000-b7fff000 r--p 0001f000 08:01 1311723    /lib/i386-linux-gnu/ld-2.19.so
b7fff000-b8000000 rw-p 00020000 08:01 1311723    /lib/i386-linux-gnu/ld-2.19.so
bffdf000-c0000000 rw-p 00000000 00:00 0          [stack]

After mmap

[email protected]:~# cat /proc/4830/maps 
08048000-08049000 r-xp 00000000 08:01 1708326    /root/learn/learnMalloc/mmap
08049000-0804a000 rw-p 00000000 08:01 1708326    /root/learn/learnMalloc/mmap
b7df1000-b7e13000 rw-p 00000000 00:00 0 
b7e13000-b7fb7000 r-xp 00000000 08:01 1311582    /lib/i386-linux-gnu/i686/cmov/libc-2.19.so
b7fb7000-b7fb9000 r--p 001a4000 08:01 1311582    /lib/i386-linux-gnu/i686/cmov/libc-2.19.so
b7fb9000-b7fba000 rw-p 001a6000 08:01 1311582    /lib/i386-linux-gnu/i686/cmov/libc-2.19.so
b7fba000-b7fbd000 rw-p 00000000 00:00 0 
b7fd7000-b7fdb000 rw-p 00000000 00:00 0 
b7fdb000-b7fdd000 r--p 00000000 00:00 0          [vvar]
b7fdd000-b7fdf000 r-xp 00000000 00:00 0          [vdso]
b7fdf000-b7ffe000 r-xp 00000000 08:01 1311723    /lib/i386-linux-gnu/ld-2.19.so
b7ffe000-b7fff000 r--p 0001f000 08:01 1311723    /lib/i386-linux-gnu/ld-2.19.so
b7fff000-b8000000 rw-p 00020000 08:01 1311723    /lib/i386-linux-gnu/ld-2.19.so
bffdf000-c0000000 rw-p 00000000 00:00 0          [stack]

这样看有点难看,我们看一下文本对比工具吧

blob.png

我们算一下

blob.png

我们申请的132KB的映射段合并到原来有的地方了

那么这一行表示啥呢

b7df1000-b7e13000 rw-p 00000000 00:00 0

直接复制了,上面翻译过了,其实不翻译还更好一点

b7df1000-b7e13000 is Virtual address range for this segment

rw-p is Flags (Read, Write, NoeXecute, Private)

00000000 is File offset – Since its not mapped from any file, its zero here

00:00 is Major/Minor device number – Since its not mapped from any file, its zero here

0 is Inode number – Since its not mapped from any file, its zero here

After munmap

我们看到那个映射区域的地址又恢复了,我们申请的已经返还给系统了

[email protected]:~# cat /proc/4830/maps 
08048000-08049000 r-xp 00000000 08:01 1708326    /root/learn/learnMalloc/mmap
08049000-0804a000 rw-p 00000000 08:01 1708326    /root/learn/learnMalloc/mmap
b7e12000-b7e13000 rw-p 00000000 00:00 0 
b7e13000-b7fb7000 r-xp 00000000 08:01 1311582    /lib/i386-linux-gnu/i686/cmov/libc-2.19.so
b7fb7000-b7fb9000 r--p 001a4000 08:01 1311582    /lib/i386-linux-gnu/i686/cmov/libc-2.19.so
b7fb9000-b7fba000 rw-p 001a6000 08:01 1311582    /lib/i386-linux-gnu/i686/cmov/libc-2.19.so
b7fba000-b7fbd000 rw-p 00000000 00:00 0 
b7fd7000-b7fdb000 rw-p 00000000 00:00 0 
b7fdb000-b7fdd000 r--p 00000000 00:00 0          [vvar]
b7fdd000-b7fdf000 r-xp 00000000 00:00 0          [vdso]
b7fdf000-b7ffe000 r-xp 00000000 08:01 1311723    /lib/i386-linux-gnu/ld-2.19.so
b7ffe000-b7fff000 r--p 0001f000 08:01 1311723    /lib/i386-linux-gnu/ld-2.19.so
b7fff000-b8000000 rw-p 00020000 08:01 1311723    /lib/i386-linux-gnu/ld-2.19.so
bffdf000-c0000000 rw-p 00000000 00:00 0          [stack]

那么什么时候使用brk,什么时候使用mmap呢

可以看看堆管理

http://www.giantbranch.cn/?p=705

https://sploitfun.wordpress.com/2015/02/10/understanding-glibc-malloc/comment-page-1/

如果一次性申请超过128字节( lets say malloc(132*1024)),而且没有足够的arena能够满足用户的需求时,系统使用的是mmap系统调用来申请内存,而不管是从main arena还是thread arena申请的

thread arena都是通过mmap申请的,把图复制过来

打赏作者
喜欢本博客,打赏让博客永久运行,多少你说了算

您的支持将鼓励我们继续创作!

[微信] 扫描二维码打赏

[支付宝] 扫描二维码打赏

发表评论

电子邮件地址不会被公开。 必填项已用*标注