init: health monitoring of child components

Fixes #3039
This commit is contained in:
Norman Feske 2018-11-14 16:19:30 +01:00 committed by Christian Helmuth
parent d56a7beadc
commit 19d7a488de
24 changed files with 384 additions and 11 deletions

View File

@ -38,7 +38,8 @@ SRC_CC += stack_area.cc \
signal_transmitter_proxy.cc \
signal_receiver.cc \
thread_start.cc \
trace_session_component.cc
trace_session_component.cc \
heartbeat.cc
INC_DIR += $(REP_DIR)/src/core/include \
$(GEN_CORE_DIR)/include \
@ -77,3 +78,4 @@ vpath dump_alloc.cc $(GEN_CORE_DIR)
vpath stack_area.cc $(GEN_CORE_DIR)
vpath pager_ep.cc $(GEN_CORE_DIR)
vpath platform_rom_modules.cc $(GEN_CORE_DIR)
vpath heartbeat.cc $(GEN_CORE_DIR)

View File

@ -38,7 +38,8 @@ SRC_CC += stack_area.cc \
signal_transmitter_proxy.cc \
signal_receiver.cc \
thread_start.cc \
trace_session_component.cc
trace_session_component.cc \
heartbeat.cc
INC_DIR += $(REP_DIR)/src/core/include \
$(GEN_CORE_DIR)/include \
@ -70,5 +71,6 @@ vpath signal_receiver.cc $(GEN_CORE_DIR)
vpath core_rpc_cap_alloc.cc $(GEN_CORE_DIR)
vpath core_region_map.cc $(GEN_CORE_DIR)
vpath platform_rom_modules.cc $(GEN_CORE_DIR)
vpath heartbeat.cc $(GEN_CORE_DIR)
vpath %.cc $(REP_DIR)/src/core
vpath %.cc $(REP_DIR)/src/lib/base

View File

@ -64,6 +64,7 @@ SRC_CC += kernel/object.cc
SRC_CC += init_main_thread.cc
SRC_CC += capability.cc
SRC_CC += stack_area_addr.cc
SRC_CC += heartbeat.cc
# provide Genode version information
include $(BASE_DIR)/src/core/version.inc

View File

@ -33,6 +33,7 @@ SRC_CC = main.cc \
core_log_out.cc \
default_log.cc \
env_reinit.cc \
heartbeat.cc \
thread.cc thread_myself.cc
INC_DIR += $(REP_DIR)/src/core/include \
@ -65,6 +66,7 @@ vpath signal_receiver.cc $(GEN_CORE_DIR)
vpath trace_session_component.cc $(GEN_CORE_DIR)
vpath core_rpc_cap_alloc.cc $(GEN_CORE_DIR)
vpath default_log.cc $(GEN_CORE_DIR)
vpath heartbeat.cc $(GEN_CORE_DIR)
vpath thread.cc $(BASE_DIR)/src/lib/base
vpath thread_myself.cc $(BASE_DIR)/src/lib/base
vpath trace.cc $(BASE_DIR)/src/lib/base

View File

@ -40,7 +40,8 @@ SRC_CC += stack_area.cc \
bios_data_area.cc \
trace_session_component.cc \
signal_transmitter_noinit.cc \
signal_receiver.cc
signal_receiver.cc \
heartbeat.cc
INC_DIR = $(REP_DIR)/src/core/include \
$(REP_DIR)/src/include \
@ -73,4 +74,5 @@ vpath dump_alloc.cc $(GEN_CORE_DIR)
vpath platform_rom_modules.cc $(GEN_CORE_DIR)
vpath platform_services.cc $(GEN_CORE_DIR)/spec/x86
vpath stack_area.cc $(GEN_CORE_DIR)
vpath heartbeat.cc $(GEN_CORE_DIR)
vpath %.cc $(REP_DIR)/src/core

View File

@ -41,7 +41,8 @@ SRC_CC += stack_area.cc \
signal_transmitter_proxy.cc \
signal_receiver.cc \
thread_start.cc \
trace_session_component.cc
trace_session_component.cc \
heartbeat.cc
INC_DIR += $(REP_DIR)/src/core/include $(GEN_CORE_DIR)/include \
$(REP_DIR)/src/include $(GEN_SRC_DIR)/include
@ -75,3 +76,4 @@ vpath default_log.cc $(GEN_CORE_DIR)
vpath stack_area.cc $(GEN_CORE_DIR)
vpath pager_ep.cc $(GEN_CORE_DIR)
vpath platform_rom_modules.cc $(GEN_CORE_DIR)
vpath heartbeat.cc $(GEN_CORE_DIR)

View File

@ -39,7 +39,8 @@ SRC_CC = stack_area.cc \
signal_transmitter_proxy.cc \
signal_receiver.cc \
thread_start.cc \
trace_session_component.cc
trace_session_component.cc \
heartbeat.cc
INC_DIR += $(REP_DIR)/src/core/include $(GEN_CORE_DIR)/include \
$(REP_DIR)/src/include $(GEN_SRC_DIR)/include
@ -73,3 +74,4 @@ vpath core_region_map.cc $(GEN_CORE_DIR)
vpath stack_area.cc $(GEN_CORE_DIR)
vpath pager_ep.cc $(GEN_CORE_DIR)
vpath platform_rom_modules.cc $(GEN_CORE_DIR)
vpath heartbeat.cc $(GEN_CORE_DIR)

View File

@ -21,7 +21,8 @@ GEN_SRC_CC += \
rom_session_component.cc \
signal_receiver.cc \
signal_transmitter_proxy.cc \
trace_session_component.cc
trace_session_component.cc \
heartbeat.cc
REP_SRC_CC += \
capability_space.cc \

View File

@ -312,11 +312,15 @@ class Genode::Child : protected Rpc_object<Parent>,
Signal_context_capability _resource_avail_sigh { };
Signal_context_capability _yield_sigh { };
Signal_context_capability _session_sigh { };
Signal_context_capability _heartbeat_sigh { };
/* arguments fetched by the child in response to a yield signal */
Lock _yield_request_lock { };
Resource_args _yield_request_args { };
/* number of unanswered heartbeat signals */
unsigned _outstanding_heartbeats = 0;
/* sessions opened by the child */
Id_space<Client> _id_space { };
@ -799,6 +803,17 @@ class Genode::Child : protected Rpc_object<Parent>,
*/
void notify_resource_avail() const;
/**
* Notify the child to give a lifesign
*/
void heartbeat();
/**
* Return number of missing heartbeats since the last response from
* the child
*/
unsigned skipped_heartbeats() const;
/**********************
** Parent interface **
@ -820,6 +835,8 @@ class Genode::Child : protected Rpc_object<Parent>,
void yield_sigh(Signal_context_capability) override;
Resource_args yield_request() override;
void yield_response() override;
void heartbeat_sigh(Signal_context_capability) override;
void heartbeat_response() override;
};
#endif /* _INCLUDE__BASE__CHILD_H_ */

View File

@ -69,6 +69,11 @@ struct Genode::Parent_client : Rpc_client<Parent>
Resource_args yield_request() override { return call<Rpc_yield_request>(); }
void yield_response() override { call<Rpc_yield_response>(); }
void heartbeat_sigh(Signal_context_capability sigh) override {
call<Rpc_heartbeat_sigh>(sigh); }
void heartbeat_response() override { call<Rpc_heartbeat_response>(); }
};
#endif /* _INCLUDE__PARENT__CLIENT_H_ */

View File

@ -280,6 +280,25 @@ class Genode::Parent
*/
virtual void yield_response() = 0;
/*
* Health monitoring
*/
/**
* Register heartbeat handler
*
* The parent may issue heartbeat signals to the child at any time
* and expects a call of the 'heartbeat_response' RPC function as
* response. When oberving the RPC call, the parent infers that the
* child is still able to respond to external events.
*/
virtual void heartbeat_sigh(Signal_context_capability sigh) = 0;
/**
* Deliver response to a heartbeat signal
*/
virtual void heartbeat_response() = 0;
/*********************
** RPC declaration **
@ -315,13 +334,16 @@ class Genode::Parent
GENODE_RPC(Rpc_yield_sigh, void, yield_sigh, Signal_context_capability);
GENODE_RPC(Rpc_yield_request, Resource_args, yield_request);
GENODE_RPC(Rpc_yield_response, void, yield_response);
GENODE_RPC(Rpc_heartbeat_sigh, void, heartbeat_sigh, Signal_context_capability);
GENODE_RPC(Rpc_heartbeat_response, void, heartbeat_response);
GENODE_RPC_INTERFACE(Rpc_exit, Rpc_announce, Rpc_session_sigh,
Rpc_session, Rpc_session_cap, Rpc_upgrade,
Rpc_close, Rpc_session_response, Rpc_main_thread,
Rpc_deliver_session_cap, Rpc_resource_avail_sigh,
Rpc_resource_request, Rpc_yield_sigh,
Rpc_yield_request, Rpc_yield_response);
Rpc_yield_request, Rpc_yield_response,
Rpc_heartbeat_sigh, Rpc_heartbeat_response);
};

View File

@ -1,6 +1,6 @@
SRC_CC += log_console.cc default_log.cc
SRC_CC += env_deprecated.cc stack_area.cc env_reinit.cc main_thread_cap.cc
SRC_CC += rpc_cap_alloc.cc
SRC_CC += rpc_cap_alloc.cc heartbeat.cc
vpath %.cc $(REP_DIR)/src/lib/base
vpath %.cc $(BASE_DIR)/src/lib/base

View File

@ -236,6 +236,8 @@ _ZN6Genode4SlabD2Ev T
_ZN6Genode5AlarmD0Ev T
_ZN6Genode5AlarmD1Ev T
_ZN6Genode5AlarmD2Ev T
_ZN6Genode5Child9heartbeatEv T
_ZNK6Genode5Child18skipped_heartbeatsEv T
_ZN6Genode5Child10yield_sighENS_10CapabilityINS_14Signal_contextEEE T
_ZN6Genode5Child11session_capENS_8Id_spaceINS_6Parent6ClientEE2IdE T
_ZN6Genode5Child12session_sighENS_10CapabilityINS_14Signal_contextEEE T
@ -557,7 +559,7 @@ _ZTVN6Genode4HeapE D 72
_ZTVN6Genode4SlabE D 72
_ZTVN6Genode5AlarmE D 40
_ZTVN6Genode5Child14Initial_threadE D 48
_ZTVN6Genode5ChildE D 408
_ZTVN6Genode5ChildE D 440
_ZTVN6Genode6OutputE D 48
_ZTVN6Genode6ThreadE D 48
_ZTVN6Genode7ConsoleE D 48

View File

@ -0,0 +1,23 @@
/*
* \brief Omit heartbeat monitoring because core has no parent
* \author Norman Feske
* \date 2018-11-15
*/
/*
* Copyright (C) 2018 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
/* Genode includes */
#include <base/env.h>
/* base-internal includes */
#include <base/internal/globals.h>
void Genode::init_heartbeat_monitoring(Env &) { }
void Genode::deinit_heartbeat_monitoring() { }

View File

@ -37,6 +37,8 @@ namespace Genode {
void init_root_proxy(Env &);
void init_log();
void init_parent_resource_requests(Env &);
void init_heartbeat_monitoring(Env &);
void deinit_heartbeat_monitoring();
void exec_static_constructors();
void destroy_signal_thread();

View File

@ -721,6 +721,42 @@ Parent::Resource_args Child::yield_request()
void Child::yield_response() { _policy.yield_response(); }
void Child::heartbeat()
{
/*
* Issue heartbeat requests not before the component has registered a
* handler
*/
if (!_heartbeat_sigh.valid())
return;
_outstanding_heartbeats++;
Signal_transmitter(_heartbeat_sigh).submit();
}
unsigned Child::skipped_heartbeats() const
{
/*
* An '_outstanding_heartbeats' value of 1 is fine because the child needs
* some time to respond to the heartbeat signal. However, at the time when
* the second (or later) heartbeat signal is triggered, the first one
* should have been answered.
*/
return (_outstanding_heartbeats > 1) ? _outstanding_heartbeats - 1 : 0;
}
void Child::heartbeat_sigh(Signal_context_capability sigh)
{
_heartbeat_sigh = sigh;
}
void Child::heartbeat_response() { _outstanding_heartbeats = 0; }
namespace {
/**

View File

@ -146,6 +146,7 @@ void Entrypoint::_process_incoming_signals()
_suspend_dispatcher.destruct();
_sig_rec.destruct();
dissolve(_signal_proxy);
deinit_heartbeat_monitoring();
_signal_proxy_cap = Capability<Signal_proxy>();
_rpc_ep.destruct();
destroy_signal_thread();
@ -156,6 +157,7 @@ void Entrypoint::_process_incoming_signals()
init_signal_thread(_env);
_rpc_ep.construct(&_env.pd(), Component::stack_size(), initial_ep_name());
init_heartbeat_monitoring(_env);
_signal_proxy_cap = manage(_signal_proxy);
_sig_rec.construct();
@ -289,6 +291,8 @@ namespace {
*/
init_parent_resource_requests(env);
init_heartbeat_monitoring(env);
Component::construct(env);
}
};

View File

@ -0,0 +1,71 @@
/*
* \brief Heartbeat monitoring support
* \author Norman Feske
* \date 2018-11-15
*/
/*
* Copyright (C) 2018 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
/* Genode includes */
#include <base/env.h>
#include <base/log.h>
/* base-internal includes */
#include <base/internal/globals.h>
#include <base/internal/unmanaged_singleton.h>
using namespace Genode;
namespace {
/*
* Respond to heartbeat requests from the parent
*/
struct Heartbeat_handler
{
Env &_env;
void _handle() { _env.parent().heartbeat_response(); }
Io_signal_handler<Heartbeat_handler> _handler {
_env.ep(), *this, &Heartbeat_handler::_handle };
Heartbeat_handler(Env &env) : _env(env)
{
_env.parent().heartbeat_sigh(_handler);
}
~Heartbeat_handler()
{
_env.parent().heartbeat_sigh(Signal_context_capability());
}
};
}
static Constructible<Heartbeat_handler> *_heartbeat_handler_ptr = nullptr;
void Genode::init_heartbeat_monitoring(Env &env)
{
if (_heartbeat_handler_ptr)
return;
_heartbeat_handler_ptr = unmanaged_singleton<Constructible<Heartbeat_handler>>();
_heartbeat_handler_ptr->construct(env);
}
void Genode::deinit_heartbeat_monitoring()
{
if (!_heartbeat_handler_ptr)
return;
_heartbeat_handler_ptr->destruct();
_heartbeat_handler_ptr = nullptr;
}

View File

@ -1400,6 +1400,69 @@
<sleep ms="150"/>
<message string="heartbeat monitoring"/>
<init_config version="flaky component is responsive">
<report/>
<heartbeat rate_ms="100"/>
<parent-provides>
<service name="ROM"/>
<service name="CPU"/>
<service name="PD"/>
<service name="LOG"/>
</parent-provides>
<default caps="100"/>
<start name="flaky">
<binary name="dummy"/>
<heartbeat/>
<resource name="RAM" quantum="2M"/>
<config>
<log string="started"/>
<!-- respond to heartbeats -->
</config>
<route> <any-service> <parent/> </any-service> </route>
</start>
</init_config>
<expect_log string="[init -> flaky] started"/>
<sleep ms="1000"/>
<expect_init_state>
<node name="child">
<attribute name="name" value="flaky"/>
<!-- 'skipped_heartbeats' attribute must not be present -->
<attribute name="skipped_heartbeats" value=""/>
</node>
</expect_init_state>
<init_config version="flaky component freezes">
<report/>
<heartbeat rate_ms="100"/>
<parent-provides>
<service name="ROM"/>
<service name="CPU"/>
<service name="PD"/>
<service name="LOG"/>
</parent-provides>
<default caps="100"/>
<start name="flaky">
<binary name="dummy"/>
<heartbeat/>
<resource name="RAM" quantum="2M"/>
<config version="freeze">
<log string="getting stuck..."/>
<sleep_forever/> <!-- heartbeat stops -->
</config>
<route> <any-service> <parent/> </any-service> </route>
</start>
</init_config>
<expect_log string="[init -> flaky] getting stuck..."/>
<sleep ms="500"/>
<expect_init_state>
<node name="child">
<attribute name="name" value="flaky"/>
<attribute name="skipped_heartbeats" higher="0"/>
</node>
</expect_init_state>
<message string="test destruction of async env sessions"/>
<init_config version="async env session">

View File

@ -332,6 +332,9 @@ struct Dummy::Main
_timer->msleep(node.attribute_value("ms", 100UL));
}
if (node.type() == "sleep_forever")
sleep_forever();
if (node.type() == "log")
log(node.attribute_value("string", String<50>()));

View File

@ -135,6 +135,8 @@ Init::Child::apply_config(Xml_node start_node)
*/
_binary_name = _binary_from_xml(start_node, _unique_name);
_heartbeat_enabled = start_node.has_sub_node("heartbeat");
/* import new start node */
_start_node.construct(_alloc, start_node);
}
@ -349,6 +351,9 @@ void Init::Child::report_state(Xml_generator &xml, Report_detail const &detail)
if (_exited)
xml.attribute("exited", _exit_value);
if (_heartbeat_enabled && _child.skipped_heartbeats())
xml.attribute("skipped_heartbeats", _child.skipped_heartbeats());
if (detail.child_ram() && _child.ram_session_cap().valid()) {
xml.node("ram", [&] () {
@ -730,6 +735,7 @@ Init::Child::Child(Env &env,
_ram_limit_accessor(ram_limit_accessor),
_cap_limit_accessor(cap_limit_accessor),
_name_registry(name_registry),
_heartbeat_enabled(start_node.has_sub_node("heartbeat")),
_resources(_resources_from_start_node(start_node, prio_levels, affinity_space,
default_caps_accessor.default_caps(), cap_limit)),
_resources_clamped_to_limit((_clamp_resources(ram_limit, cap_limit), true)),

View File

@ -132,6 +132,9 @@ class Init::Child : Child_policy, Routed_service::Wakeup
/* updated on configuration update */
Binary_name _binary_name { _binary_from_xml(_start_node->xml(), _unique_name) };
/* initialized in constructor, updated by 'apply_config' */
bool _heartbeat_enabled;
/**
* Resources assigned to the child
*/
@ -527,6 +530,17 @@ class Init::Child : Child_policy, Routed_service::Wakeup
void apply_upgrade();
void apply_downgrade();
void heartbeat()
{
if (_heartbeat_enabled)
_child.heartbeat();
}
unsigned skipped_heartbeats() const
{
return _heartbeat_enabled ? _child.skipped_heartbeats() : 0;
}
void report_state(Xml_generator &xml, Report_detail const &detail) const;

View File

@ -0,0 +1,89 @@
/*
* \brief Heartbeat monitoring
* \author Norman Feske
* \date 2018-11-15
*/
/*
* Copyright (C) 2018 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
#ifndef _SRC__INIT__HEARTBEAT_H_
#define _SRC__INIT__HEARTBEAT_H_
/* local includes */
#include <state_reporter.h>
#include <child_registry.h>
#include <util/noncopyable.h>
namespace Init { class Heartbeat; }
class Init::Heartbeat : Genode::Noncopyable
{
private:
Env &_env;
Child_registry &_children;
Report_update_trigger &_report_update_trigger;
Constructible<Timer::Connection> _timer { };
unsigned _rate_ms = 0;
Signal_handler<Heartbeat> _timer_handler;
void _handle_timer()
{
bool any_skipped_heartbeats = false;
_children.for_each_child([&] (Child &child) {
if (child.skipped_heartbeats())
any_skipped_heartbeats = true;
child.heartbeat();
});
if (any_skipped_heartbeats)
_report_update_trigger.trigger_report_update();
}
public:
Heartbeat(Env &env, Child_registry &children,
Report_update_trigger &report_update_trigger)
:
_env(env), _children(children),
_report_update_trigger(report_update_trigger),
_timer_handler(_env.ep(), *this, &Heartbeat::_handle_timer)
{ }
void apply_config(Xml_node config)
{
bool const enabled = config.has_sub_node("heartbeat");
_timer.conditional(enabled, _env);
if (!enabled) {
_rate_ms = 0;
return;
}
unsigned const rate_ms =
config.sub_node("heartbeat").attribute_value("rate_ms", 1000UL);
if (rate_ms != _rate_ms) {
_rate_ms = rate_ms;
_timer->sigh(_timer_handler);
_timer->trigger_periodic(_rate_ms*1000);
}
}
};
#endif /* _SRC__INIT__HEARTBEAT_H_ */

View File

@ -16,11 +16,10 @@
#include <base/attached_rom_dataspace.h>
/* local includes */
#include <child_registry.h>
#include <child.h>
#include <alias.h>
#include <state_reporter.h>
#include <server.h>
#include <heartbeat.h>
namespace Init { struct Main; }
@ -141,6 +140,8 @@ struct Init::Main : State_reporter::Producer,
State_reporter _state_reporter { _env, *this };
Heartbeat _heartbeat { _env, _children, _state_reporter };
Signal_handler<Main> _resource_avail_handler {
_env.ep(), *this, &Main::_handle_resource_avail };
@ -304,6 +305,7 @@ void Init::Main::_handle_config()
_verbose.construct(_config_xml);
_state_reporter.apply_config(_config_xml);
_heartbeat.apply_config(_config_xml);
/* determine default route for resolving service requests */
try {