init进程会读取rc文件,然后孵化很多其他系统服务进程,为防止子进程死亡后称为僵尸进程,init需要监测子进程是否死亡,如果死亡,则清除子进程资源,并重新拉起进程。
system/core/init/init.cpp
InstallSignalFdHandler(&epoll);
init进程第二阶段启动的时候,创建了epoll,并将子进程相关处理注册到epoll中
static void InstallSignalFdHandler(Epoll* epoll) {
// Applying SA_NOCLDSTOP to a defaulted SIGCHLD handler prevents the signalfd from receiving
// SIGCHLD when a child process stops or continues (b/77867680#comment9).
const struct sigaction act { .sa_handler = SIG_DFL, .sa_flags = SA_NOCLDSTOP };
sigaction(SIGCHLD, &act, nullptr);
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGCHLD);
if (!IsRebootCapable()) {
// If init does not have the CAP_SYS_BOOT capability, it is running in a container.
// In that case, receiving SIGTERM will cause the system to shut down.
sigaddset(&mask, SIGTERM);
}
if (sigprocmask(SIG_BLOCK, &mask, nullptr) == -1) {
PLOG(FATAL) << "failed to block signals";
}
// Register a handler to unblock signals in the child processes.
const int result = pthread_atfork(nullptr, nullptr, &UnblockSignals);
if (result != 0) {
LOG(FATAL) << "Failed to register a fork handler: " << strerror(result);
}
signal_fd = signalfd(-1, &mask, SFD_CLOEXEC | SFD_NONBLOCK);
if (signal_fd == -1) {
PLOG(FATAL) << "failed to create signalfd";
}
constexpr int flags = EPOLLIN | EPOLLPRI;
auto handler = std::bind(HandleSignalFd, false);
if (auto result = epoll->RegisterHandler(signal_fd, handler, flags); !result.ok()) {
LOG(FATAL) << result.error();
}
}
使用epoll注册监听信号量fd,子进程死亡会发送信号给init进程,这里就会调用epoll中注册的回调函数HandleSignalFd
static void HandleSignalFd(bool one_off) {
signalfd_siginfo siginfo;
auto started = std::chrono::steady_clock::now();
do {
ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo)));
if (bytes_read < 0 && errno == EAGAIN) {
auto now = std::chrono::steady_clock::now();
std::chrono::duration<double> waited = now - started;
if (waited >= kDiagnosticTimeout) {
LOG(ERROR) << "epoll() woke us up, but we waited with no SIGCHLD!";
started = now;
}
std::this_thread::sleep_for(100ms);
continue;
}
if (bytes_read != sizeof(siginfo)) {
PLOG(ERROR) << "Failed to read siginfo from signal_fd";
return;
}
break;
} while (!one_off);
switch (siginfo.ssi_signo) {
case SIGCHLD:
ReapAnyOutstandingChildren();
break;
case SIGTERM:
HandleSigtermSignal(siginfo);
break;
default:
PLOG(ERROR) << "signal_fd: received unexpected signal " << siginfo.ssi_signo;
break;
}
}
对于SIGCHLD信号,调用ReapAnyOutstandingChildren
system/core/init/sigchld_handler.cpp
void ReapAnyOutstandingChildren() {
while (ReapOneProcess() != 0) {
}
}
static pid_t ReapOneProcess() {
siginfo_t siginfo = {};
// This returns a zombie pid or informs us that there are no zombies left to be reaped.
// It does NOT reap the pid; that is done below.
if (TEMP_FAILURE_RETRY(waitid(P_ALL, 0, &siginfo, WEXITED | WNOHANG | WNOWAIT)) != 0) {
PLOG(ERROR) << "waitid failed";
return 0;
}
auto pid = siginfo.si_pid;
if (pid == 0) return 0;
// At this point we know we have a zombie pid, so we use this scopeguard to reap the pid
// whenever the function returns from this point forward.
// We do NOT want to reap the zombie earlier as in Service::Reap(), we kill(-pid, ...) and we
// want the pid to remain valid throughout that (and potentially future) usages.
auto reaper = make_scope_guard([pid] { TEMP_FAILURE_RETRY(waitpid(pid, nullptr, WNOHANG)); });
std::string name;
std::string wait_string;
Service* service = nullptr;
if (SubcontextChildReap(pid)) {
name = "Subcontext";
} else {
//查找死亡进程pid
service = ServiceList::GetInstance().FindService(pid, &Service::pid);
if (service) {
name = StringPrintf("Service '%s' (pid %d)", service->name().c_str(), pid);
if (service->flags() & SVC_EXEC) {
auto exec_duration = boot_clock::now() - service->time_started();
auto exec_duration_ms =
std::chrono::duration_cast<std::chrono::milliseconds>(exec_duration).count();
wait_string = StringPrintf(" waiting took %f seconds", exec_duration_ms / 1000.0f);
} else if (service->flags() & SVC_ONESHOT) {
auto exec_duration = boot_clock::now() - service->time_started();
auto exec_duration_ms =
std::chrono::duration_cast<std::chrono::milliseconds>(exec_duration)
.count();
wait_string = StringPrintf(" oneshot service took %f seconds in background",
exec_duration_ms / 1000.0f);
}
} else {
name = StringPrintf("Untracked pid %d", pid);
}
}
if (siginfo.si_code == CLD_EXITED) {
LOG(INFO) << name << " exited with status " << siginfo.si_status << wait_string;
} else {
LOG(INFO) << name << " received signal " << siginfo.si_status << wait_string;
}
if (!service) {
LOG(INFO) << name << " did not have an associated service entry and will not be reaped";
return pid;
}
//杀死进程,清除资源,并设置重启标志
service->Reap(siginfo);
if (service->flags() & SVC_TEMPORARY) {
ServiceList::GetInstance().RemoveService(*service);
}
return pid;
}
调用Reap函数清理子进程
system/core/init/service.cpp
void Service::Reap(const siginfo_t& siginfo) {
//杀死进程
if (!(flags_ & SVC_ONESHOT) || (flags_ & SVC_RESTART)) {
KillProcessGroup(SIGKILL, false);
} else {
// Legacy behavior from ~2007 until Android R: this else branch did not exist and we did not
// kill the process group in this case.
if (SelinuxGetVendorAndroidVersion() >= __ANDROID_API_R__) {
// The new behavior in Android R is to kill these process groups in all cases. The
// 'true' parameter instructions KillProcessGroup() to report a warning message where it
// detects a difference in behavior has occurred.
KillProcessGroup(SIGKILL, true);
}
}
// Remove any socket resources we may have created.
//清除socket节点
for (const auto& socket : sockets_) {
if (socket.persist) {
continue;
}
auto path = ANDROID_SOCKET_DIR "/" + socket.name;
unlink(path.c_str());
}
for (const auto& f : reap_callbacks_) {
f(siginfo);
}
if ((siginfo.si_code != CLD_EXITED || siginfo.si_status != 0) && on_failure_reboot_target_) {
LOG(ERROR) << "Service with 'reboot_on_failure' option failed, shutting down system.";
trigger_shutdown(*on_failure_reboot_target_);
}
if (flags_ & SVC_EXEC) UnSetExec();
if (name_ == "zygote" || name_ == "zygote64") {
//如果是zygote,则清除所有服务进程
removeAllEmptyProcessGroups();
}
if (flags_ & SVC_TEMPORARY) return;
pid_ = 0;
flags_ &= (~SVC_RUNNING);
start_order_ = 0;
// Oneshot processes go into the disabled state on exit,
// except when manually restarted.
if ((flags_ & SVC_ONESHOT) && !(flags_ & SVC_RESTART) && !(flags_ & SVC_RESET)) {
flags_ |= SVC_DISABLED;
}
// Disabled and reset processes do not get restarted automatically.
if (flags_ & (SVC_DISABLED | SVC_RESET)) {
NotifyStateChange("stopped");
return;
}
#if INIT_FULL_SOURCES
static bool is_apex_updatable = android::sysprop::ApexProperties::updatable().value_or(false);
#else
static bool is_apex_updatable = false;
#endif
const bool is_process_updatable = !use_bootstrap_ns_ && is_apex_updatable;
// If we crash > 4 times in 'fatal_crash_window_' minutes or before boot_completed,
// reboot into bootloader or set crashing property
boot_clock::time_point now = boot_clock::now();
if (((flags_ & SVC_CRITICAL) || is_process_updatable) && !(flags_ & SVC_RESTART)) {
bool boot_completed = GetBoolProperty("sys.boot_completed", false);
if (now < time_crashed_ + fatal_crash_window_ || !boot_completed) {
if (++crash_count_ > 4) {
auto exit_reason = boot_completed ?
"in " + std::to_string(fatal_crash_window_.count()) + " minutes" :
"before boot completed";
if (flags_ & SVC_CRITICAL) {
if (!GetBoolProperty("init.svc_debug.no_fatal." + name_, false)) {
// Aborts into `fatal_reboot_target_'.
SetFatalRebootTarget(fatal_reboot_target_);
LOG(FATAL) << "critical process '" << name_ << "' exited 4 times "
<< exit_reason;
}
} else {
LOG(ERROR) << "process with updatable components '" << name_
<< "' exited 4 times " << exit_reason;
// Notifies update_verifier and apexd
SetProperty("sys.init.updatable_crashing_process_name", name_);
SetProperty("sys.init.updatable_crashing", "1");
}
}
} else {
time_crashed_ = now;
crash_count_ = 1;
}
}
flags_ &= (~SVC_RESTART);
//设置重启标志位
flags_ |= SVC_RESTARTING;
// Execute all onrestart commands for this service.
//启动所有rc中标志位onrestart的服务
onrestart_.ExecuteAllCommands();
NotifyStateChange("restarting");
return;
}
这里清除了进程的资源,并设置该进程标志为SVC_RESTARTING,并启动rc中定义的onrestart相关服务
while (true) {
...
if (!IsShuttingDown()) {
auto next_process_action_time = HandleProcessActions();
// If there's a process that needs restarting, wake up in time for that.
if (next_process_action_time) {
epoll_timeout = std::chrono::ceil<std::chrono::milliseconds>(
*next_process_action_time - boot_clock::now());
if (*epoll_timeout < 0ms) epoll_timeout = 0ms;
}
}
...
}
init进程的死循环中HandleProcessActions会处理服务重启
static std::optional<boot_clock::time_point> HandleProcessActions() {
std::optional<boot_clock::time_point> next_process_action_time;
//遍历所有的service
for (const auto& s : ServiceList::GetInstance()) {
if ((s->flags() & SVC_RUNNING) && s->timeout_period()) {
auto timeout_time = s->time_started() + *s->timeout_period();
if (boot_clock::now() > timeout_time) {
s->Timeout();
} else {
if (!next_process_action_time || timeout_time < *next_process_action_time) {
next_process_action_time = timeout_time;
}
}
}
//如果没有标志为SVC_RESTARTING,则跳过
if (!(s->flags() & SVC_RESTARTING)) continue;
auto restart_time = s->time_started() + s->restart_period();
if (boot_clock::now() > restart_time) {
//调用Start重启服务
if (auto result = s->Start(); !result.ok()) {
LOG(ERROR) << "Could not restart process '" << s->name() << "': " << result.error();
}
} else {
if (!next_process_action_time || restart_time < *next_process_action_time) {
next_process_action_time = restart_time;
}
}
}
return next_process_action_time;
}
调用Start函数执行
Result<void> Service::Start() {
...
pid_t pid = -1;
if (namespaces_.flags) {
pid = clone(nullptr, nullptr, namespaces_.flags | SIGCHLD, nullptr);
} else {
//新建一个进程
pid = fork();
}
if (pid == 0) {
//子进程中执行,运行子进程服务
umask(077);
RunService(override_mount_namespace, descriptors, std::move(pipefd));
_exit(127);
}
if (pid < 0) {
pid_ = 0;
return ErrnoError() << "Failed to fork";
}
...
}
fork子进程,并调用RunService执行service
void Service::RunService(const std::optional<MountNamespace>& override_mount_namespace,
const std::vector<Descriptor>& descriptors,
std::unique_ptr<std::array<int, 2>, decltype(&ClosePipe)> pipefd) {
...
if (!ExpandArgsAndExecv(args_, sigstop_)) {
PLOG(ERROR) << "cannot execv('" << args_[0]
<< "'). See the 'Debugging init' section of init's README.md for tips";
}
}
调用ExpandArgsAndExecv执行
static bool ExpandArgsAndExecv(const std::vector<std::string>& args, bool sigstop) {
std::vector<std::string> expanded_args;
std::vector<char*> c_strings;
expanded_args.resize(args.size());
c_strings.push_back(const_cast<char*>(args[0].data()));
for (std::size_t i = 1; i < args.size(); ++i) {
auto expanded_arg = ExpandProps(args[i]);
if (!expanded_arg.ok()) {
LOG(FATAL) << args[0] << ": cannot expand arguments': " << expanded_arg.error();
}
expanded_args[i] = *expanded_arg;
c_strings.push_back(expanded_args[i].data());
}
c_strings.push_back(nullptr);
if (sigstop) {
kill(getpid(), SIGSTOP);
}
return execv(c_strings[0], c_strings.data()) == 0;
}
调用execv执行可执行文件
——init进程对子进程的监控(基于Android13)&spm=1001.2101.3001.5002&articleId=130485247&d=1&t=3&u=bc8834aa28974afca965329269a4e1e0)
389

被折叠的 条评论
为什么被折叠?



