genode/repos/libports/src/lib/libc/kernel.cc

443 lines
11 KiB
C++

/*
* \brief Libc kernel for main and pthreads user contexts
* \author Christian Helmuth
* \author Emery Hemingway
* \author Norman Feske
* \date 2016-01-22
*/
/*
* Copyright (C) 2016-2020 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
/* libc-internal includes */
#include <internal/kernel.h>
Libc::Kernel * Libc::Kernel::_kernel_ptr;
/**
* Blockade for main context
*/
inline void Libc::Main_blockade::block()
{
Check check { _woken_up };
do {
_timeout_ms = Kernel::kernel().suspend(check, _timeout_ms);
_expired = _timeout_valid && !_timeout_ms;
} while (!woken_up() && !expired());
}
inline void Libc::Main_blockade::wakeup()
{
_woken_up = true;
Kernel::kernel().resume_main();
}
/**
* Main context execution was suspended (on fork)
*
* This function is executed in the context of the initial thread.
*/
static void suspended_callback()
{
Libc::Kernel::kernel().entrypoint_suspended();
}
/**
* Resume main context execution (after fork)
*
* This function is executed in the context of the initial thread.
*/
static void resumed_callback()
{
Libc::Kernel::kernel().entrypoint_resumed();
}
size_t Libc::Kernel::_user_stack_size()
{
size_t size = Component::stack_size();
if (!_cloned)
return size;
_libc_env.libc_config().with_sub_node("stack", [&] (Xml_node stack) {
size = stack.attribute_value("size", 0UL); });
return size;
}
void Libc::Kernel::schedule_suspend(void(*original_suspended_callback) ())
{
if (_state != USER) {
error(__PRETTY_FUNCTION__, " called from non-user context");
return;
}
/*
* We hook into suspend-resume callback chain to destruct and
* reconstruct parts of the kernel from the context of the initial
* thread, i.e., without holding any object locks.
*/
_original_suspended_callback = original_suspended_callback;
_env.ep().schedule_suspend(suspended_callback, resumed_callback);
if (!_setjmp(_user_context)) {
_valid_user_context = true;
_suspend_scheduled = true;
_switch_to_kernel();
} else {
_valid_user_context = false;
}
}
void Libc::Kernel::reset_malloc_heap()
{
_malloc_ram.construct(_heap, _env.ram());
_cloned_heap_ranges.for_each([&] (Registered<Cloned_malloc_heap_range> &r) {
destroy(_heap, &r); });
Heap &raw_malloc_heap = *_malloc_heap;
construct_at<Heap>(&raw_malloc_heap, *_malloc_ram, _env.rm());
reinit_malloc(raw_malloc_heap);
}
void Libc::Kernel::_init_file_descriptors()
{
auto init_fd = [&] (Xml_node const &node, char const *attr,
int libc_fd, unsigned flags)
{
if (!node.has_attribute(attr))
return;
typedef String<Vfs::MAX_PATH_LEN> Path;
Path const path = node.attribute_value(attr, Path());
struct stat out_stat { };
if (stat(path.string(), &out_stat) != 0)
return;
File_descriptor *fd = _vfs.open(path.string(), flags, libc_fd);
if (!fd)
return;
if (fd->libc_fd != libc_fd) {
error("could not allocate fd ",libc_fd," for ",path,", "
"got fd ",fd->libc_fd);
_vfs.close(fd);
return;
}
fd->cloexec = node.attribute_value("cloexec", false);
/*
* We need to manually register the path. Normally this is done
* by '_open'. But we call the local 'open' function directly
* because we want to explicitly specify the libc fd ID.
*/
if (fd->fd_path)
warning("may leak former FD path memory");
{
char *dst = (char *)_heap.alloc(path.length());
copy_cstring(dst, path.string(), path.length());
fd->fd_path = dst;
}
::off_t const seek = node.attribute_value("seek", 0ULL);
if (seek)
_vfs.lseek(fd, seek, SEEK_SET);
};
if (_vfs.root_dir_has_dirents()) {
Xml_node const node = _libc_env.libc_config();
typedef String<Vfs::MAX_PATH_LEN> Path;
if (node.has_attribute("cwd"))
chdir(node.attribute_value("cwd", Path()).string());
init_fd(node, "stdin", 0, O_RDONLY);
init_fd(node, "stdout", 1, O_WRONLY);
init_fd(node, "stderr", 2, O_WRONLY);
node.for_each_sub_node("fd", [&] (Xml_node fd) {
unsigned const id = fd.attribute_value("id", 0U);
bool const rd = fd.attribute_value("readable", false);
bool const wr = fd.attribute_value("writeable", false);
unsigned const flags = rd ? (wr ? O_RDWR : O_RDONLY)
: (wr ? O_WRONLY : 0);
if (!fd.has_attribute("path"))
warning("Invalid <fd> node, 'path' attribute is missing");
init_fd(fd, "path", id, flags);
});
/* prevent use of IDs of stdin, stdout, and stderr for other files */
for (unsigned fd = 0; fd <= 2; fd++)
file_descriptor_allocator()->preserve(fd);
}
/**
* Call 'fn' with root directory and path to ioctl pseudo file as arguments
*
* If no matching ioctl pseudo file exists, 'fn' is not called.
*/
auto with_ioctl_path = [&] (File_descriptor const *fd, char const *file, auto fn)
{
if (!fd || !fd->fd_path)
return;
Absolute_path const ioctl_dir = Vfs_plugin::ioctl_dir(*fd);
Absolute_path path = ioctl_dir;
path.append_element(file);
_vfs.with_root_dir([&] (Directory &root_dir) {
if (root_dir.file_exists(path.string()))
fn(root_dir, path.string()); });
};
/*
* Watch stdout's 'info' pseudo file to detect terminal-resize events
*/
File_descriptor const * const stdout_fd =
file_descriptor_allocator()->find_by_libc_fd(STDOUT_FILENO);
with_ioctl_path(stdout_fd, "info", [&] (Directory &root_dir, char const *path) {
_terminal_resize_handler.construct(root_dir, path, *this,
&Kernel::_handle_terminal_resize); });
/*
* Watch stdin's 'interrupts' pseudo file to detect control-c events
*/
File_descriptor const * const stdin_fd =
file_descriptor_allocator()->find_by_libc_fd(STDIN_FILENO);
with_ioctl_path(stdin_fd, "interrupts", [&] (Directory &root_dir, char const *path) {
_user_interrupt_handler.construct(root_dir, path,
*this, &Kernel::_handle_user_interrupt); });
}
void Libc::Kernel::_handle_terminal_resize()
{
_signal.charge(SIGWINCH);
_resume_main();
}
void Libc::Kernel::_handle_user_interrupt()
{
_signal.charge(SIGINT);
_resume_main();
}
void Libc::Kernel::_clone_state_from_parent()
{
struct Range { void *at; size_t size; };
auto range_attr = [&] (Xml_node node)
{
return Range {
.at = (void *)node.attribute_value("at", 0UL),
.size = node.attribute_value("size", 0UL)
};
};
/*
* Allocate local memory for the backing store of the application heap,
* mirrored from the parent.
*
* This step must precede the creation of the 'Clone_connection' because
* the shared-memory buffer of the clone session may otherwise potentially
* interfere with such a heap region.
*/
_libc_env.libc_config().for_each_sub_node("heap", [&] (Xml_node node) {
Range const range = range_attr(node);
new (_heap)
Registered<Cloned_malloc_heap_range>(_cloned_heap_ranges,
_env.ram(), _env.rm(),
range.at, range.size); });
_clone_connection.construct(_env);
/* fetch heap content */
_cloned_heap_ranges.for_each([&] (Cloned_malloc_heap_range &heap_range) {
heap_range.import_content(*_clone_connection); });
/* fetch user contex of the parent's application */
_clone_connection->memory_content(&_user_context, sizeof(_user_context));
_valid_user_context = true;
_libc_env.libc_config().for_each_sub_node([&] (Xml_node node) {
auto copy_from_parent = [&] (Range range)
{
_clone_connection->memory_content(range.at, range.size);
};
/* clone application stack */
if (node.type() == "stack")
copy_from_parent(range_attr(node));
/* clone RW segment of a shared library or the binary */
if (node.type() == "rw") {
typedef String<64> Name;
Name const name = node.attribute_value("name", Name());
/*
* The blacklisted segments are initialized via the
* regular startup of the child.
*/
bool const blacklisted = (name == "ld.lib.so")
|| (name == "libc.lib.so")
|| (name == "libm.lib.so")
|| (name == "posix.lib.so")
|| (strcmp(name.string(), "vfs", 3) == 0);
if (!blacklisted)
copy_from_parent(range_attr(node));
}
});
/* import application-heap state from parent */
_clone_connection->object_content(_malloc_heap);
init_malloc_cloned(*_clone_connection);
}
extern void (*libc_select_notify)();
void Libc::Kernel::handle_io_progress()
{
/*
* TODO: make VFS I/O completion checks during
* kernel time to avoid flapping between stacks
*/
if (_io_ready) {
_io_ready = false;
/* some contexts may have been deblocked from select() */
if (libc_select_notify)
libc_select_notify();
/*
* resume all as any VFS context may have
* been deblocked from blocking I/O
*/
Kernel::resume_all();
}
}
void Libc::execute_in_application_context(Application_code &app_code)
{
/*
* The libc execution model builds on the main entrypoint, which handles
* all relevant signals (e.g., timing and VFS). Additional component
* entrypoints or pthreads should never call with_libc() but we catch this
* here and just execute the application code directly.
*/
if (!Kernel::kernel().main_context()) {
app_code.execute();
return;
}
static bool nested = false;
if (nested) {
if (Kernel::kernel().main_suspended()) {
Kernel::kernel().nested_execution(app_code);
} else {
app_code.execute();
}
return;
}
nested = true;
Kernel::kernel().run(app_code);
nested = false;
}
static void close_file_descriptors_on_exit()
{
for (;;) {
int const fd = Libc::file_descriptor_allocator()->any_open_fd();
if (fd == -1)
break;
close(fd);
}
}
Libc::Kernel::Kernel(Genode::Env &env, Genode::Allocator &heap)
:
_env(env), _heap(heap)
{
atexit(close_file_descriptors_on_exit);
init_semaphore_support(_timer_accessor);
init_pthread_support(*this, *this, _timer_accessor);
_env.ep().register_io_progress_handler(*this);
if (_cloned) {
_clone_state_from_parent();
} else {
_malloc_heap.construct(*_malloc_ram, _env.rm());
init_malloc(*_malloc_heap);
}
init_fork(_env, _libc_env, _heap, *_malloc_heap, _pid, *this, *this, _signal,
*this, _binary_name);
init_execve(_env, _heap, _user_stack, *this, _binary_name,
*file_descriptor_allocator());
init_plugin(*this);
init_sleep(*this);
init_vfs_plugin(*this);
init_time(*this, _rtc_path, *this);
init_select(*this, *this, *this, _signal);
init_socket_fs(*this);
init_passwd(_passwd_config());
init_signal(_signal);
_init_file_descriptors();
_kernel_ptr = this;
/*
* Acknowledge the completion of 'fork' to the parent
*
* This must be done after '_init_file_descriptors' to ensure that pipe FDs
* of the parent are opened at the child before the parent continues.
* Otherwise, the parent would potentially proceed with closing the pipe
* FDs before the child has a chance to open them. In this situation, the
* pipe reference counter may reach an intermediate value of zero,
* triggering the destruction of the pipe.
*/
if (_cloned)
_clone_connection.destruct();
}