fishhook解析

一些基础概念:

编译连接的过程

主要有四个阶段:

  1. 预处理阶段:

    1.1 删除展开对应宏定义

    1.2 处理条件编译指令

    1.3包含的文件插入

    1.4 删除注释

  2. 编译

    生成.s文件

  3. 汇编

    生成.o目标文件

  4. 链接

    把多个目标文件链接成可执行文件

iOS是如何调用外部函数的

内部函数(主工程里的,或者一些静态库里的c函数)
  • 如果自定义函数只在该文件内使用,会在编译阶段确定地址,然后生成的是mach-o文件的__text段,

    __text 段属于 __TEXT,是只读类型。

  • 如果A文件引用了B文件的函数,在汇编阶段编译器暂时用占位替代着,把真正地址计算工作留给链接器。连接器是如何知道需要那些地址需要重新链接呢? 在目标文件有一个重定位表,专门保存这些与重定位相关的信息。

所以非动态库里的函数地址的确定是在静态链接期间完成的,由于__TEXT只读类型,所以不能被更改。要是想hook自定义的一些函数可以通过inlinehook实现,这里先不说。

调用动态库的c函数

注意:外部静态变量也被地址在动态链接时也会写入__got段。

主可执行文件里面用的一些函数,比如NSLog(属于foundation库),在完成静态链接之后是不知道NSLog的地址的。那么,怎么才能让主可执行文件调用到外部的函数呢?

苹果为了解决这个问题,使用了PIC技术:PIC(position independent code) 位置无关代码

为了实现这个功能苹果通过__got__la_symbol_ptr 两个Data段来存储外部调用地址。其中__got为非懒加,__la_symbol_ptr为懒加载。__got会在动态链接过程中写入外部地址。__la_symbol_ptr为函数第一次调用的时候写入。

具体的寻址过程是什么样的呢?

如图:

fishhook_1.png

fishhook

概述:fishhook正式利用了pic的机制,实现了对动态库的c函数和一些静态变量的hook。

fishhook核心代码解读

我在核心代码做了详细的注释说明。

rebind_symbols
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
  int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
  if (retval < 0) {
    return retval;
  }
  // If this was the first call, register callback for image additions (which is also invoked for
  // existing images, otherwise, just run on existing images
  if (!_rebindings_head->next) {
    /**
     注册回调函数_rebind_symbols_for_image,
     
     有新的 image 被 load时 调用_rebind_symbols_for_image
     已存在的 image 都会调用一遍_rebind_symbols_for_image
    */ 

    _dyld_register_func_for_add_image(_rebind_symbols_for_image);
  } else {
    uint32_t c = _dyld_image_count();
    for (uint32_t i = 0; i < c; i++) {
      _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
    }
  }
  return retval;
}
回调函数 _rebind_symbols_for_image
static void _rebind_symbols_for_image(const struct mach_header *header,
                                      intptr_t slide) {
    rebind_symbols_for_image(_rebindings_head, header, slide);
}


static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                     const struct mach_header *header,
                                     intptr_t slide) {
  Dl_info info;
  if (dladdr(header, &info) == 0) {
    return;
  }

  segment_command_t *cur_seg_cmd;
    //linkedit 加载命令
  segment_command_t *linkedit_segment = NULL;
    // 符号表加载命令
  struct symtab_command* symtab_cmd = NULL;
    //间接符号表加载指令
  struct dysymtab_command* dysymtab_cmd = NULL;

  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
        linkedit_segment = cur_seg_cmd;
      }
    } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
      symtab_cmd = (struct symtab_command*)cur_seg_cmd;
    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
      dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
    }
  }

  if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
      !dysymtab_cmd->nindirectsyms) {
    return;
  }

    // Find base symbol/string table addresses
    // slide 文件被加载到虚拟内存的偏移量,是一种保护机制。
    // 这里为什么要用 vmaddr - fileoff;vmaddr表示虚拟内存的偏移,理论上vmaddr >= fileoff;
    // 在程序运行时,mach-O 文件会被加载到虚拟内存中。但是 segment 会按照一定的方式进行内存对齐,所以存在vmaddr >= fileoff的可能,
    //注意⚠️_LINKEDIT 也属于 segment, command 指向 __LINKEDIT 这个段。只是在machOView软件上没有体现。
    
    // linkedit_base 就是由于页对齐造成的空白大小
    // 由于 symtab_cmd、dysymtab_cmd加载命令分别用 symoff、stroff找到地址,故需要计算出linkedit_base。
    
  uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);

  // Get indirect symbol table (array of uint32_t indices into symbol table)
  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);

  cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
        continue;
      }
      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
        section_t *sect =
          (section_t *)(cur + sizeof(segment_command_t)) + j;
          // section是懒加载表类型加载命令(__got)
        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
          //section是非懒加表类型加载命令(__la_symbol_ptr)
        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
      }
    }
  }
}
绑定 perform_rebinding_with_section
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
                                           section_t *section,
                                           intptr_t slide,
                                           nlist_t *symtab,
                                           char *strtab,
                                           uint32_t *indirect_symtab) {
  /**
  用"指针数组" 代指 __got 和 __la_symbol_ptr  DATA数据
    __got 和 __la_symbol_ptr  DATA 段可以看作是一个指针数组,其中懒加载表存的是外部函数指针,非懒加载表存的是__stub_help 或者 外部函数指针。
    通过section(__got or __la_symbol_ptr )加载命令可以知道,指针数组起始位置的函数指针对应的符号在间   接符号表的起始位置。
    有一点需要注意⚠️ 间接符号表里面包含了非懒加载和懒加载符号在符号表里的索引。
    通过间接符号表的索引就能最终知道,"指针数组"里的指针到底对应的是哪一个符号。
  */
  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);

  for (uint i = 0; i < section->size / sizeof(void *); i++) {
    uint32_t symtab_index = indirect_symbol_indices[i];
    if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
        symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
      continue;
    }
    uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
    char *symbol_name = strtab + strtab_offset;
    bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
    struct rebindings_entry *cur = rebindings;
    while (cur) {
      for (uint j = 0; j < cur->rebindings_nel; j++) {
        if (symbol_name_longer_than_1 && strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
          kern_return_t err;

          if (cur->rebindings[j].replaced != NULL && indirect_symbol_bindings[i] != cur->rebindings[j].replacement)
            // ⚠️这里保存原始外部函数指针
            *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];

          /**
           * 1. Moved the vm protection modifying codes to here to reduce the
           *    changing scope.
           * 2. Adding VM_PROT_WRITE mode unconditionally because vm_region
           *    API on some iOS/Mac reports mismatch vm protection attributes.
           * -- Lianfu Hao Jun 16th, 2021
           **/
          err = vm_protect (mach_task_self (), (uintptr_t)indirect_symbol_bindings, section->size, 0, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY);
          if (err == KERN_SUCCESS) {
            /**
             * Once we failed to change the vm protection, we
             * MUST NOT continue the following write actions!
             * iOS 15 has corrected the const segments prot.
             * -- Lionfore Hao Jun 11th, 2021
             **/
            // ⚠️在这里进行函数指针的替换
            indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
          }
          goto symbol_loop;
        }
      }
      cur = cur->next;
    }
  symbol_loop:;
  }
}

引用一个官方的图:

这张图直观的展现了如何找到’‘指针数组’‘里的指针所对应的符号的。

fishhook.png

其他的一些注意点

在iOS13之前要hook懒加载系统函数,需要先调用一遍,让外部函数地址写入懒加载表。

iOS13之后,不用调用,可能dyld3闭包提前做了处理。这个需要再验证。

iOS15有一个bug修复。因为有些数据段的读写权限发生了变化,这里通过vm_protect 函数改变其读写权限。