pthread 监听

北邙山之光

2022-07-23 约 1930 字预计阅读 4 分钟

/posts/pthread-%E7%9B%91%E5%90%AC/pthread.jpeg

简介

所谓的 native thread，其实就是只我们使用 c/c++ 做开发时，使用的 POSIX 标准的 pthread

pthread 函数在 libc 中，而 Android 中使用的是 bionic libc(不是 GNU libc)

pthread 常见方法

pthread_create
pthread_join
pthread_detach
pthread_exit
pthread_getattr_np
pthread_attr_init
pthread_atter_getXXX

比较常用的是 create/join/detach 三个方法，后续的几个方法均是 pthread set/get 一些额外属性所需要的

三个方法的函数原型如下：

int pthread_create(pthread_t* __pthread_ptr, pthread_attr_t const* __attr, void* (*__start_routine)(void*), void*);

int pthread_detach(pthread_t __pthread);

int pthread_join(pthread_t __pthread, void** __return_value_ptr);

详细介绍两个方法 pthread_create 和 pthread_exit

pthread_create
就是创建一个 pthread 线程，其中需要注意的是 void* (__start_routine)(void)。顾名思义，这是一个函数指针，代表了这个函数接受任意类型的参数(void*) 和任意类型的返回值(void*)。start_routine 也是我们要在子线程执行的具体函数
pthread_exit
线程退出。其实 start_routine 结束时，也会自动调用。源码如下：

// bionic/libc/bionic/pthread_create.cpp
static int __pthread_start(void* arg) {
  pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);

  __hwasan_thread_enter();

  // Wait for our creating thread to release us. This lets it have time to
  // notify gdb about this thread before we start doing anything.
  // This also provides the memory barrier needed to ensure that all memory
  // accesses previously made by the creating thread are visible to us.
  thread->startup_handshake_lock.lock();

  __set_stack_and_tls_vma_name(false);
  __init_additional_stacks(thread);
  __rt_sigprocmask(SIG_SETMASK, &thread->start_mask, nullptr, sizeof(thread->start_mask));
#ifdef __aarch64__
  // Chrome's sandbox prevents this prctl, so only reset IA if the target SDK level is high enough.
  // Furthermore, processes loaded from vendor partitions may have their own sandboxes that would
  // reject the prctl. Because no devices launched with PAC enabled before S, we can avoid issues on
  // upgrading devices by checking for PAC support before issuing the prctl.
  static const bool pac_supported = getauxval(AT_HWCAP) & HWCAP_PACA;
  if (pac_supported && android_get_application_target_sdk_version() >= __ANDROID_API_S__) {
    prctl(PR_PAC_RESET_KEYS, PR_PAC_APIAKEY, 0, 0, 0);
  }
#endif

  void* result = thread->start_routine(thread->start_routine_arg);

  // start_routine 执行完毕以后，直接自动调用了 pthread_exit
  pthread_exit(result);

  return 0;
}

pthread 泄露的原因

以下分析基于 bionic libc(Android 使用的是这个 libc)

pthread_create 部分源码如下：

// bionic/libc/bionic/pthread_create.cpp

int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
                   void* (*start_routine)(void*), void* arg) {
  ErrnoRestorer errno_restorer;

  pthread_attr_t thread_attr;
  ScopedTrace trace("pthread_create");
  if (attr == nullptr) {
    pthread_attr_init(&thread_attr);
  } else {
    thread_attr = *attr;
    attr = nullptr; // Prevent misuse below.
  }

  bionic_tcb* tcb = nullptr;
  void* child_stack = nullptr;
  // 内存分配
  int result = __allocate_thread(&thread_attr, &tcb, &child_stack);
  if (result != 0) {
    return result;
  }

  pthread_internal_t* thread = tcb->thread();
  // 省略很多代码
}

其中 __allocate_thread 会进行内存分配，目前看 bionic 使用的是 mmap

假如，我们仅仅只是使用了 pthread_create 创建了线程，之后就不管了，那么 mmap 映射的这块内存区间是永远不会释放的，同时，pthread_internal 内部会维护一个全局的 thread 链表，链表节点也不会被删除，这也就是 pthread 会产生内存泄露的根本原因。

而在 pthread_join 中进行了 unmmap 释放了这块内存

int pthread_join(pthread_t t, void** return_value) {
  ScopedTrace trace("pthread_join");
  if (t == pthread_self()) {
    return EDEADLK;
  }

  pthread_internal_t* thread = __pthread_internal_find(t, "pthread_join");
  if (thread == nullptr) {
    return ESRCH;
  }

  ThreadJoinState old_state = THREAD_NOT_JOINED;
  while ((old_state == THREAD_NOT_JOINED || old_state == THREAD_EXITED_NOT_JOINED) &&
         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_JOINED)) {
  }

  if (old_state == THREAD_DETACHED || old_state == THREAD_JOINED) {
    return EINVAL;
  }

  pid_t tid = thread->tid;
  volatile int* tid_ptr = &thread->tid;

  // We set thread->join_state to THREAD_JOINED with atomic operation,
  // so no one is going to remove this thread except us.

  // Wait for the thread to actually exit, if it hasn't already.
  while (*tid_ptr != 0) {
    __futex_wait(tid_ptr, tid, nullptr);
  }

  if (return_value) {
    *return_value = thread->return_value;
  }
  // 内部调用了 munmap，同时清除 global thread_internal_list
  __pthread_internal_remove_and_free(thread);
  return 0;
}

再来看看 pthread_detach，你会发现在一些特殊的状态时刻，detach 和 join 等效——直接调用 pthread_join 方法。
而大多数时候，调用 detach 后，thread 都是 not_joined 状态，上面的 CAS 比较，只会让 state 变为 DETACHED，不会走入 join 的 if 分支中

int pthread_detach(pthread_t t) {
  pthread_internal_t* thread = __pthread_internal_find(t, "pthread_detach");
  if (thread == nullptr) {
    return ESRCH;
  }

  ThreadJoinState old_state = THREAD_NOT_JOINED;
  while (old_state == THREAD_NOT_JOINED &&
         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_DETACHED)) {
  }

  if (old_state == THREAD_NOT_JOINED) {
    return 0;
  } else if (old_state == THREAD_EXITED_NOT_JOINED) {
    // Use pthread_join to clean it up.
    // 是不是很神奇···调用的 pthread_join···
    return pthread_join(t, nullptr);
  }
  return EINVAL;
}

那这么看来，pthread_detach 代码中并没有释放内存的操作，岂不是依然还是存在泄露的问题？当然不是，但是这里的逻辑比较神奇。先来看看 pthread_exit

void pthread_exit(void* return_value) {
  // Call dtors for thread_local objects first.
  __cxa_thread_finalize();

  pthread_internal_t* thread = __get_thread();
  thread->return_value = return_value;

  // Call the cleanup handlers.
  while (thread->cleanup_stack) {
    __pthread_cleanup_t* c = thread->cleanup_stack;
    thread->cleanup_stack = c->__cleanup_prev;
    c->__cleanup_routine(c->__cleanup_arg);
  }

  // Call the TLS destructors. It is important to do that before removing this
  // thread from the global list. This will ensure that if someone else deletes
  // a TLS key, the corresponding value will be set to NULL in this thread's TLS
  // space (see pthread_key_delete).
  pthread_key_clean_all();

  if (thread->alternate_signal_stack != nullptr) {
    // Tell the kernel to stop using the alternate signal stack.
    stack_t ss;
    memset(&ss, 0, sizeof(ss));
    ss.ss_flags = SS_DISABLE;
    sigaltstack(&ss, nullptr);

    // Free it.
    munmap(thread->alternate_signal_stack, SIGNAL_STACK_SIZE);
    thread->alternate_signal_stack = nullptr;
  }

  ThreadJoinState old_state = THREAD_NOT_JOINED;
  while (old_state == THREAD_NOT_JOINED &&
         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_EXITED_NOT_JOINED)) {
  }

  // android_run_on_all_threads() needs to see signals blocked atomically with setting the
  // terminating flag, so take the creation lock while doing these operations.
  {
    ScopedReadLock locker(&g_thread_creation_lock);
    atomic_store(&thread->terminating, true);

    // We don't want to take a signal after unmapping the stack, the shadow call stack, or dynamic
    // TLS memory.
    sigset64_t set;
    sigfillset64(&set);
    __rt_sigprocmask(SIG_BLOCK, &set, nullptr, sizeof(sigset64_t));
  }

#ifdef __aarch64__
  // Free the shadow call stack and guard pages.
  munmap(thread->shadow_call_stack_guard_region, SCS_GUARD_REGION_SIZE);
#endif

  __free_dynamic_tls(__get_bionic_tcb());

  // 这里单独处理了 detached 状态
  if (old_state == THREAD_DETACHED) {
    // The thread is detached, no one will use pthread_internal_t after pthread_exit.
    // So we can free mapped space, which includes pthread_internal_t and thread stack.
    // First make sure that the kernel does not try to clear the tid field
    // because we'll have freed the memory before the thread actually exits.
    __set_tid_address(nullptr);

    // pthread_internal_t is freed below with stack, not here.
    __pthread_internal_remove(thread);

    if (thread->mmap_size != 0) {
      // We need to free mapped space for detached threads when they exit.
      // That's not something we can do in C.
      __notify_thread_exit_callbacks();
      __hwasan_thread_exit();
      _exit_with_stack_teardown(thread->mmap_base, thread->mmap_size);
    }
  }

  // No need to free mapped space. Either there was no space mapped, or it is left for
  // the pthread_join caller to clean up.
  __notify_thread_exit_callbacks();
  __hwasan_thread_exit();

  __exit(0);
}

你会发现 pthread_exit 其实本身也会清理一些内存空间，但是有一些独有的关于 detached 的状态逻辑。

可以看到会调用 pthread_internal_remove 方法删除掉节点，同时调用了一个神奇的函数 exit_with_stack_teardown

而这个方法是汇编代码(为什么呢？)，这里是调用了一个 system call，munmap 也是一个 system call

#include <private/bionic_asm.h>

// void _exit_with_stack_teardown(void* stackBase, size_t stackSize)
ENTRY_PRIVATE(_exit_with_stack_teardown)
  ldr r7, =__NR_munmap
  swi #0
  // If munmap failed, we ignore the failure and exit anyway.

  mov r0, #0
  ldr r7, =__NR_exit
  swi #0
  // The exit syscall does not return.
END(_exit_with_stack_teardown)

这里调用了 munmap，释放了内存空间！

所以，正确的使用 pthread 的方式是：

创建 detached 的 pthread
pthread_create 后，调用 pthread_detach/pthread_join

pthread 泄露检测原理

目前参照 matrix 和 koom 的实现方式。

这种方式主要是通过 hook pthread_create/exit/join/detach 方法。

由之前的分析我们已经知道，pthread_create 以后，pthread_join 或者 pthread_detach 没有被调用，那么其实就是存在泄露。

所以我们的目标就是：检测 pthread_join/detach 和 pthread_create 是否是成对出现的

所以，我们只需要在 pthread_create 时将其记录到一个 map 中，当调用 pthread_detach/join 时删掉 map 中的记录，最终留在 map 中的则是泄露的 thread。

为了更进一步的监听，matrix 和 koom 也都监听了 exit 方法。如果 exit 执行以后，超过一定的时间阈值才去执行 detach/join 同样认为可能存在泄露。

具体细节，可以参考开源的 matrix 和 koom 的实现

目录

pthread 监听

简介

pthread 常见方法

pthread 泄露的原因

pthread 泄露检测原理