fishhook解析
一些基础概念:
编译连接的过程
主要有四个阶段:
-
预处理阶段:
1.1 删除展开对应宏定义
1.2 处理条件编译指令
1.3包含的文件插入
1.4 删除注释
-
编译
生成.s文件
-
汇编
生成.o目标文件
-
链接
把多个目标文件链接成可执行文件
iOS是如何调用外部函数的
内部函数(主工程里的,或者一些静态库里的c函数)
-
如果自定义函数只在该文件内使用,会在编译阶段确定地址,然后生成的是mach-o文件的__text段,
__text
段属于__TEXT
,是只读类型。 -
如果A文件引用了B文件的函数,在汇编阶段编译器暂时用占位替代着,把真正地址计算工作留给链接器。连接器是如何知道需要那些地址需要重新链接呢? 在目标文件有一个重定位表,专门保存这些与重定位相关的信息。
所以非动态库里的函数地址的确定是在静态链接期间完成的,由于__TEXT
只读类型,所以不能被更改。要是想hook自定义的一些函数可以通过inlinehook实现,这里先不说。
调用动态库的c函数
注意:外部静态变量也被地址在动态链接时也会写入__got
段。
主可执行文件里面用的一些函数,比如NSLog(属于foundation库),在完成静态链接之后是不知道NSLog的地址的。那么,怎么才能让主可执行文件调用到外部的函数呢?
苹果为了解决这个问题,使用了PIC技术:PIC(position independent code) 位置无关代码
为了实现这个功能苹果通过__got
和__la_symbol_ptr
两个Data段来存储外部调用地址。其中__got
为非懒加,__la_symbol_ptr
为懒加载。__got
会在动态链接过程中写入外部地址。__la_symbol_ptr
为函数第一次调用的时候写入。
具体的寻址过程是什么样的呢?
如图:
fishhook
概述:fishhook正式利用了pic的机制,实现了对动态库的c函数和一些静态变量的hook。
fishhook核心代码解读
我在核心代码做了详细的注释说明。
rebind_symbols
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
if (retval < 0) {
return retval;
}
// If this was the first call, register callback for image additions (which is also invoked for
// existing images, otherwise, just run on existing images
if (!_rebindings_head->next) {
/**
注册回调函数_rebind_symbols_for_image,
有新的 image 被 load时 调用_rebind_symbols_for_image
已存在的 image 都会调用一遍_rebind_symbols_for_image
*/
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
uint32_t c = _dyld_image_count();
for (uint32_t i = 0; i < c; i++) {
_rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
}
}
return retval;
}
回调函数 _rebind_symbols_for_image
static void _rebind_symbols_for_image(const struct mach_header *header,
intptr_t slide) {
rebind_symbols_for_image(_rebindings_head, header, slide);
}
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
const struct mach_header *header,
intptr_t slide) {
Dl_info info;
if (dladdr(header, &info) == 0) {
return;
}
segment_command_t *cur_seg_cmd;
//linkedit 加载命令
segment_command_t *linkedit_segment = NULL;
// 符号表加载命令
struct symtab_command* symtab_cmd = NULL;
//间接符号表加载指令
struct dysymtab_command* dysymtab_cmd = NULL;
uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
linkedit_segment = cur_seg_cmd;
}
} else if (cur_seg_cmd->cmd == LC_SYMTAB) {
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
} else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}
if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
!dysymtab_cmd->nindirectsyms) {
return;
}
// Find base symbol/string table addresses
// slide 文件被加载到虚拟内存的偏移量,是一种保护机制。
// 这里为什么要用 vmaddr - fileoff;vmaddr表示虚拟内存的偏移,理论上vmaddr >= fileoff;
// 在程序运行时,mach-O 文件会被加载到虚拟内存中。但是 segment 会按照一定的方式进行内存对齐,所以存在vmaddr >= fileoff的可能,
//注意⚠️_LINKEDIT 也属于 segment, command 指向 __LINKEDIT 这个段。只是在machOView软件上没有体现。
// linkedit_base 就是由于页对齐造成的空白大小
// 由于 symtab_cmd、dysymtab_cmd加载命令分别用 symoff、stroff找到地址,故需要计算出linkedit_base。
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
// Get indirect symbol table (array of uint32_t indices into symbol table)
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect =
(section_t *)(cur + sizeof(segment_command_t)) + j;
// section是懒加载表类型加载命令(__got)
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
//section是非懒加表类型加载命令(__la_symbol_ptr)
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
}
绑定 perform_rebinding_with_section
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
/**
用"指针数组" 代指 __got 和 __la_symbol_ptr DATA数据
__got 和 __la_symbol_ptr DATA 段可以看作是一个指针数组,其中懒加载表存的是外部函数指针,非懒加载表存的是__stub_help 或者 外部函数指针。
通过section(__got or __la_symbol_ptr )加载命令可以知道,指针数组起始位置的函数指针对应的符号在间 接符号表的起始位置。
有一点需要注意⚠️ 间接符号表里面包含了非懒加载和懒加载符号在符号表里的索引。
通过间接符号表的索引就能最终知道,"指针数组"里的指针到底对应的是哪一个符号。
*/
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
for (uint i = 0; i < section->size / sizeof(void *); i++) {
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
char *symbol_name = strtab + strtab_offset;
bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
struct rebindings_entry *cur = rebindings;
while (cur) {
for (uint j = 0; j < cur->rebindings_nel; j++) {
if (symbol_name_longer_than_1 && strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
kern_return_t err;
if (cur->rebindings[j].replaced != NULL && indirect_symbol_bindings[i] != cur->rebindings[j].replacement)
// ⚠️这里保存原始外部函数指针
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
/**
* 1. Moved the vm protection modifying codes to here to reduce the
* changing scope.
* 2. Adding VM_PROT_WRITE mode unconditionally because vm_region
* API on some iOS/Mac reports mismatch vm protection attributes.
* -- Lianfu Hao Jun 16th, 2021
**/
err = vm_protect (mach_task_self (), (uintptr_t)indirect_symbol_bindings, section->size, 0, VM_PROT_READ | VM_PROT_WRITE | VM_PROT_COPY);
if (err == KERN_SUCCESS) {
/**
* Once we failed to change the vm protection, we
* MUST NOT continue the following write actions!
* iOS 15 has corrected the const segments prot.
* -- Lionfore Hao Jun 11th, 2021
**/
// ⚠️在这里进行函数指针的替换
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
}
goto symbol_loop;
}
}
cur = cur->next;
}
symbol_loop:;
}
}
引用一个官方的图:
这张图直观的展现了如何找到’‘指针数组’‘里的指针所对应的符号的。
其他的一些注意点
在iOS13之前要hook懒加载系统函数,需要先调用一遍,让外部函数地址写入懒加载表。
iOS13之后,不用调用,可能dyld3闭包提前做了处理。这个需要再验证。
iOS15有一个bug修复。因为有些数据段的读写权限发生了变化,这里通过vm_protect 函数改变其读写权限。