起因
在幽兰代码本上运行Waydroid时可能会遇到无法启动的情况,通过查看dmesg信息可以知道,这是由于lmkd服务无法启动导致安卓的init进程启动失败。
[ 5007.044079] libprocessgroup: Successfully killed process cgroup uid 1069 pid 512 in 0ms
[ 5007.044325] init: critical process 'lmkd' exited 4 times before boot completed
[ 5007.050376] init: InitFatalReboot: signal 6
[ 5007.079301] init: #00 pc 00000000000aee54 /system/bin/init (android::init::InitFatalReboot(int)+208)
[ 5007.079471] init: #01 pc 000000000004d61c /system/bin/init (android::init::InitAborter(char const*)+48)
[ 5007.079567] init: #02 pc 0000000000013978 /system/lib64/libbase.so (android::base::SetAborter(std::__1::function<void (char const*)>&&)::$_3::__invoke(char const*)+76)
[ 5007.079610] init: #03 pc 0000000000012fa4 /system/lib64/libbase.so (android::base::LogMessage::~LogMessage()+320)
[ 5007.079702] init: #04 pc 0000000000065294 /system/bin/init (android::init::Service::Reap(siginfo const&)+1336)
[ 5007.079739] init: #05 pc 00000000000b418c /system/bin/init (android::init::ReapOneProcess()+496)
[ 5007.079775] init: #06 pc 00000000000b3f8c /system/bin/init (android::init::ReapAnyOutstandingChildren()+8)
[ 5007.079812] init: #07 pc 0000000000084320 /system/bin/init (android::init::SecondStageMain(int, char**)+6964)
[ 5007.079848] init: #08 pc 000000000002f184 /system/bin/init (main+304)
[ 5007.079883] init: #09 pc 000000000008506c /system/lib64/bootstrap/libc.so (__libc_init+108)
[ 5007.079969] init: Reboot ending, jumping to kernel
[ 5007.099096] init: InitFatalReboot: signal 6
[ 5007.104781] init: InitFatalReboot: signal 11
[ 5007.127388] init: #00 pc 00000000000aee54 /system/bin/init (android::init::InitFatalReboot(int)+208)
[ 5007.127450] init: #01 pc 00000000000af234 /system/bin/init (android::init::InstallRebootSignalHandlers()::$_22::__invoke(int)+32)
[ 5007.127487] init: #02 pc 00000000000007c0 [vdso:0000007fb83a6000]
[ 5007.127524] init: #03 pc 00000000000cec24 /system/bin/init (android::properties::PropertyInfoArea::GetPropertyInfoIndexes(char const*, unsigned int*, unsigned int*) const+28)
[ 5007.127563] init: #04 pc 00000000000cefc4 /system/bin/init (android::properties::PropertyInfoArea::GetPropertyInfo(char const*, char const**, char const**) const+56)
[ 5007.127600] init: #05 pc 00000000000cefc4 /system/bin/init (android::properties::PropertyInfoArea::GetPropertyInfo(char const*, char const**, char const**) const+56)
[ 5007.127643] init: #06 pc 000000000009b73c /system/bin/init (android::init::CheckPermissions(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, ucred const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*)+240)
[ 5007.127689] init: #07 pc 000000000009b89c /system/bin/init (android::init::HandlePropertySet(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > const&, ucred const&, android::init::SocketConnection*, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >*)+68)
[ 5007.127726] init: #08 pc 00000000000a0ddc /system/bin/init (android::init::handle_property_set_fd()+748)
[ 5007.127762] init: #09 pc 000000000009fee8 /system/bin/init (android::init::PropertyServiceThread()+704)
[ 5007.127794] init: #10 pc 000000000006e0a0 /system/bin/init
[ 5007.127830] init: #11 pc 00000000000ec8ec /system/lib64/bootstrap/libc.so (__pthread_start(void*)+64)
[ 5007.127921] init: #12 pc 000000000008ba84 /system/lib64/bootstrap/libc.so (__start_thread+64)
[ 5007.127953] init: Reboot ending, jumping to kernel
[ 377.026793] binder: release 3788:3788 transaction 21474 out, still active
[ 377.026804] binder: undelivered TRANSACTION_COMPLETE
[ 377.026866] binder: release 3768:3768 transaction 21208 out, still active
[ 377.026871] binder: undelivered TRANSACTION_COMPLETE
[ 377.027203] binder: release 3065:3129 transaction 21897 in, still active
[ 377.027300] binder: send failed reply for transaction 21897 to 3279:3310
[ 377.027420] binder: release 3065:3065 transaction 21898 out, still active
[ 377.027427] binder: undelivered TRANSACTION_COMPLETE
[ 377.027621] binder: release 2985:3087 transaction 21898 in, still active
[ 377.027673] binder: send failed reply for transaction 21898, target dead
lmkd简介
lmkd的全称是低内存杀手守护进程(low memory killer daemon),由于安卓系统运行多个进程时可能会遇到内存耗尽的问题,所以会通过内存的cgroup进行内存分配,并通过lmkd对内存进行监视,在内存不足时杀死进程,降低内存压力。
lmkd通常使用vmPressure
或PSI
监视内存压力信息,由于vmPressure
通常包含大量误报, lmkd需要确定内存是否真的紧缺,导致消耗额外的资源,所以一般会使用 PSI
进行更准确的内存压力监视,减小资源的开销。
内核支持
lmkd作为用户态程序无法代替操作系统进行硬件资源的管理,所以lmkd监视内存需要内核的帮助。
对于Linux内核而言,需要打开下面的选项开启cgroup和PSI的支持。
CONFIG_ANDROID_LOW_MEMORY_KILLER=n
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_PSI=y
PSI的开启
此时lmkd服务启动失败,考虑到lmkd对内核的依赖,所以首先排查内核当中相关配置选择是否开启。
通过观察虚文件config.gz可以确定,lmkd需要的内核选项都已经处于开启状态。
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_MEMCG_KMEM=y
CONFIG_SLUB_MEMCG_SYSFS_ON=y
# CONFIG_TEST_MEMCAT_P is not set
CONFIG_PSI=y
CONFIG_PSI_DEFAULT_DISABLED=y
由于dmesg中的报错信息存在binder
通信失败的情况,考虑到驱动为了和用户态程序的通信,一般在用户空间中的proc
目录下创建虚文件,所以此时排查psi驱动有没有建立虚文件,提供lmkd服务与内核通信的接口。
在查看proc
目录后,可以确认psi并没有建立与用户态程序进行通信的接口,lmkd尝试借助binder与psi虚文件沟通自然也会失败。
ls -lh /proc/pressure/
ls: cannot access '/proc/pressure/': No such file or directory
在上面的内核选项中可以看到psi功能默认是禁用的,查询内核代码可以知道,只有当CONFIG_PSI_DEFAULT_DISABLED
关闭时,psi_enable
才会一直保持开启状态,否则则需要进行赋值。
#ifdef CONFIG_PSI_DEFAULT_DISABLED
static bool psi_enable;
#else
static bool psi_enable = true;
#endif
static int __init setup_psi(char *str)
{
return kstrtobool(str, &psi_enable) == 0;
}
__setup("psi=", setup_psi);
当psi_enable
处于关闭状态时,会直接影响到psi驱动的初始化行为,并导致其不会创建虚文件。
void __init psi_init(void)
{
if (!psi_enable) {
static_branch_enable(&psi_disabled);
return;
}
if (!cgroup_psi_enabled())
static_branch_disable(&psi_cgroups_enabled);
psi_period = jiffies_to_nsecs(PSI_FREQ);
group_init(&psi_system);
}
static int __init psi_proc_init(void)
{
if (psi_enable) {
proc_mkdir("pressure", NULL);
proc_create("pressure/io", 0, NULL, &psi_io_proc_ops);
proc_create("pressure/memory", 0, NULL, &psi_memory_proc_ops);
proc_create("pressure/cpu", 0, NULL, &psi_cpu_proc_ops);
}
return 0;
}
在内核代码中可以知道,当CONFIG_PSI_DEFAULT_DISABLED
开启时,psi_enable
的数值由__setup
宏决定。
psi驱动通过__setup
宏在.init.setup
段中注册检索值psi=
和处理函数setup_psi
.
#define __setup_param(str, unique_id, fn, early) \
static const char __setup_str_##unique_id[] __initconst \
__aligned(1) = str; \
static struct obs_kernel_param __setup_##unique_id \
__used __section(".init.setup") \
__attribute__((aligned((sizeof(long))))) \
= { __setup_str_##unique_id, fn, early }
当内核开始初始化时,内核启动函数会将内核命令行传递给unknown_bootoption
。
after_dashes = parse_args("Booting kernel",
static_command_line, __start___param,
__stop___param - __start___param,
-1, -1, NULL, &unknown_bootoption);
unknown_bootoption
函数内会先通过obsolete_checksetup
检查启动固件传递过来的内核命令行。
static int __init unknown_bootoption(char *param, char *val,
const char *unused, void *arg)
{
size_t len = strlen(param);
repair_env_string(param, val);
/* Handle obsolete-style parameters */
if (obsolete_checksetup(param))
return 0;
/* Unused module parameter. */
if (strnchr(param, len, '.'))
return 0;
if (panic_later)
return 0;
if (val) {
/* Environment option */
unsigned int i;
for (i = 0; envp_init[i]; i++) {
if (i == MAX_INIT_ENVS) {
panic_later = "env";
panic_param = param;
}
if (!strncmp(param, envp_init[i], len+1))
break;
}
envp_init[i] = param;
} else {
/* Command line option */
unsigned int i;
for (i = 0; argv_init[i]; i++) {
if (i == MAX_INIT_ARGS) {
panic_later = "init";
panic_param = param;
}
}
argv_init[i] = param;
}
return 0;
}
obsolete_checksetup
函数会检查.init.setup
中注册的信息,并根据对应的处理函数进行处理。
static bool __init obsolete_checksetup(char *line)
{
const struct obs_kernel_param *p;
bool had_early_param = false;
p = __setup_start;
do {
int n = strlen(p->str);
if (parameqn(line, p->str, n)) {
if (p->early) {
/* Already done in parse_early_param?
* (Needs exact match on param part).
* Keep iterating, as we can have early
* params and __setups of same names 8( */
if (line[n] == '\0' || line[n] == '=')
had_early_param = true;
} else if (!p->setup_func) {
pr_warn("Parameter %s is obsolete, ignored\n",
p->str);
return true;
} else if (p->setup_func(line + n))
return true;
}
p++;
} while (p < __setup_end);
return had_early_param;
}
所以当启动固件uboot传递的内核命令行包含psi=1
时,psi_enable
就会处于开启状态。
此时修改uboot传递的内核命令行添加psi=1
后,再次启动系统,就可以发现psi的虚文件被正确注册,且Waydroid可以启动运行。
ls -lh /proc/pressure/
total 0
-r--r--r-- 1 root root 0 Apr 15 14:17 cpu
-r--r--r-- 1 root root 0 Apr 15 14:17 io
-r--r--r-- 1 root root 0 Apr 15 14:17 memory
最后编辑:admin 更新时间:2024-12-20 17:01