由于OOM,avm一直被内核kill -9杀掉。最终会出现avm重启报错
bos_em_service: Fatal glibc error: pthread_mutex_lock.c:450 (__pthread_mutex_lock_full): assertion failed: e != ESRCH || !robust。这个锁是共享内存上的一个robust互斥锁。
而且该BUG报了好几例。正常的来讲如果用户态没有出现内存踩踏,不将用户态的lock值踩踏或者将线程的robust list踩坏,是不会出现这种错误的。
大概率是oom_reaper的原因,待深入分析
如上图,看assert的地方可知,当pthread_mutex_lock尝试通过CAS直接上锁失败,将系统调用futex_lock_pi陷入内核态,然后futex_lock_pi返回了ESRCH错误码,但是根据robust互斥锁的设计初衷,他是不可能返回ESRCH错误码的。
如上图,跟踪futex_lock_pi,只有这一个地方返回ESRCH。根据他的注释是线程的robust链表损坏,或者锁的lock值被踩踏损坏才会出现这错误。
接下来深度分析robust相关futex源码。
用户态上锁核心代码:
1)THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
(void *) (((uintptr_t) &mutex->__data.__list.__next)
| 1));
2)
oldval = atomic_compare_and_exchange_val_acq (&mutex->__data.__lock,
newval, 0);
3)
if (oldval != 0)
{
/* The mutex is locked. The kernel will now take care of
everything. /
int private = (robust
? PTHREAD_ROBUST_MUTEX_PSHARED (mutex)
: PTHREAD_MUTEX_PSHARED (mutex));
int e = __futex_lock_pi64 (&mutex->__data.__lock, 0 / unused /,
NULL, private);
if (e == ESRCH || e == EDEADLK)
{
assert (e != EDEADLK
|| (kind != PTHREAD_MUTEX_ERRORCHECK_NP
&& kind != PTHREAD_MUTEX_RECURSIVE_NP));
/ ESRCH can happen only for non-robust PI mutexes where
the owner of the lock died. */
assert (e != ESRCH || !robust);
/* Delay the thread indefinitely. /
while (1)
__futex_abstimed_wait64 (&(unsigned int){0}, 0,
0 / ignored */, NULL, private);
}
oldval = mutex->__data.__lock;
assert (robust || (oldval & FUTEX_OWNER_DIED) == 0);
}
4)
__asm ("" ::: "memory");
ENQUEUE_MUTEX (mutex);
/* We need to clear op_pending after we enqueue the mutex. */
__asm ("" ::: "memory");
THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
大概解释下上述代码意图。robust互斥锁上锁前,先将锁加入线程的list_op_pending链表。然后尝试通过CAS在用户态上锁,如果上锁失败则进入步骤3.
如果上锁成功,则将锁加入线程的list链表,并从list_op_pending移除。list_op_pending的设计意图是防止上锁成功后,还未加入list链表,被kill,内核在do_exit的时候就无法释放锁。
接下来着重分析用户态上锁失败调用futex_lock_p系统调用,及内核在do_exit释放进程的时候是如何处理robust互斥锁的。
可以猜到,do_exit的时候,内核会遍历线程的list_op_pending和list链表,然后做处理。
futex_cleanup_begin是上一个锁mutex_lock(&tsk->futex_exit_mutex);tsk->futex_state = FUTEX_STATE_EXITING;
futex_cleanup_end是 mutex_unlock(&tsk->futex_exit_mutex);
重点在cleanup.他调用exit_robust_list遍历list_op_pending和list链表,执行handle_futex_death函数。
static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr,
bool pi, bool pending_op)
{
u32 uval, nval, mval;
pid_t owner;
int err;
/* Futex address must be 32bit aligned */
if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0)
return -1;
retry:
if (get_user(uval, uaddr))
return -1;
owner = uval & FUTEX_TID_MASK;
if (pending_op && !pi && !owner) {
futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
return 0;
}
if (owner != task_pid_vnr(curr))
return 0;
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;修改用户态锁的值
if ((err = futex_cmpxchg_value_locked(&nval, uaddr, uval, mval))) {
switch (err) {
case -EFAULT:
if (fault_in_user_writeable(uaddr))
return -1;
goto retry;
case -EAGAIN:
cond_resched();
goto retry;
default:
WARN_ON_ONCE(1);
return err;
}
}
if (nval != uval)
goto retry;
if (!pi && (uval & FUTEX_WAITERS))
futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
return 0;
}
接下来看系统调用
int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
union futex_key *key,
struct futex_pi_state **ps,
struct task_struct *task,
struct task_struct **exiting,
int set_waiters)
{
u32 uval, newval, vpid = task_pid_vnr(task);
struct futex_q *top_waiter;
int ret;
if (futex_get_value_locked(&uval, uaddr))
return -EFAULT;
if (unlikely(should_fail_futex(true)))
return -EFAULT;
if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
return -EDEADLK;
if ((unlikely(should_fail_futex(true))))
return -EDEADLK;
top_waiter = futex_top_waiter(hb, key);
if (top_waiter)//优先级反转处理,感兴趣自行分析
return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
if (!(uval & FUTEX_TID_MASK)) {//内核在handle_futex_death,改了锁的值,进入if
newval = uval & FUTEX_OWNER_DIED;
newval |= vpid;
ret = lock_pi_update_atomic(uaddr, uval, newval);
if (ret)
return ret;
return 1;//上锁成功
}
newval = uval | FUTEX_WAITERS;
ret = lock_pi_update_atomic(uaddr, uval, newval);//修改锁的值为newval
if (ret)
return ret;
return attach_to_pi_owner(uaddr, newval, key, ps, exiting);//会 调用handle_exit_race
}
static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
struct futex_pi_state **ps,
struct task_struct **exiting)
{
pid_t pid = uval & FUTEX_TID_MASK;
struct task_struct *p;
if (!pid)
return -EAGAIN;
p = find_get_task_by_vpid(pid);
if (!p)
return handle_exit_race(uaddr, uval, NULL);
if (unlikely(p->flags & PF_KTHREAD)) {
put_task_struct(p);
return -EPERM;
}
raw_spin_lock_irq(&p->pi_lock);
if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
int ret = handle_exit_race(uaddr, uval, p);
raw_spin_unlock_irq(&p->pi_lock);
if (ret == -EBUSY)
*exiting = p;
else
put_task_struct(p);
return ret;
}
留给读者的问题:do_exit和pthread_mutex_lock并发修改lock值,它怎么做的同步
&spm=1001.2101.3001.5002&articleId=157646537&d=1&t=3&u=26aa89f7a84e41e5af9886e1ea3bf7d3)
1132

被折叠的 条评论
为什么被折叠?



