genode/base-nova/src/core/platform.cc
Norman Feske 5fe29e8e4a Express affinities via Cartesian coordinates
This patch introduces new types for expressing CPU affinities. Instead
of dealing with physical CPU numbers, affinities are expressed as
rectangles in a grid of virtual CPU nodes. This clears the way to
conveniently assign sets of adjacent CPUs to subsystems, each of them
managing their respective viewport of the coordinate space.

By using 2D Cartesian coordinates, the locality of CPU nodes can be
modeled for different topologies such as SMP (simple Nx1 grid), grids of
NUMA nodes, or ring topologies.
2013-08-13 17:08:24 +02:00

658 lines
20 KiB
C++

/*
* \brief Platform interface implementation
* \author Norman Feske
* \author Sebastian Sumpf
* \author Alexander Boettcher
* \date 2009-10-02
*/
/*
* Copyright (C) 2009-2013 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU General Public License version 2.
*/
/* Genode includes */
#include <base/printf.h>
#include <base/sleep.h>
#include <base/thread.h>
#include <base/cap_sel_alloc.h>
/* core includes */
#include <core_parent.h>
#include <platform.h>
#include <nova_util.h>
#include <util.h>
/* NOVA includes */
#include <nova/syscalls.h>
using namespace Genode;
using namespace Nova;
enum { verbose_boot_info = true };
Native_utcb *main_thread_utcb();
/**
* Initial value of esp register, saved by the crt0 startup code.
* This value contains the address of the hypervisor information page.
*/
extern addr_t __initial_sp;
/**
* Pointer to the UTCB of the main thread
*/
Utcb *__main_thread_utcb;
/**
* Virtual address range consumed by core's program image
*/
extern unsigned _prog_img_beg, _prog_img_end;
/**
* Capability selector of root PD
*/
addr_t __core_pd_sel;
/**
* Returns true if the phys address intersects with some other reserved area.
*/
static bool intersects(addr_t const phys, const Hip::Mem_desc * mem_desc_const,
size_t const num_mem_desc)
{
/* check whether phys intersects with some reserved area */
Hip::Mem_desc const * mem_desc = mem_desc_const;
for (unsigned i = 0; i < num_mem_desc; i++, mem_desc++) {
if (mem_desc->type == Hip::Mem_desc::AVAILABLE_MEMORY) continue;
if (mem_desc->addr > ~0UL) continue;
addr_t base = trunc_page(mem_desc->addr);
size_t size;
/* truncate size if base+size larger then natural 32/64 bit boundary */
if (mem_desc->addr >= ~0UL - mem_desc->size + 1)
size = round_page(~0UL - mem_desc->addr + 1);
else
size = round_page(mem_desc->addr + mem_desc->size) - base;
if (base <= phys && phys < base + size)
return true;
}
/* check whether phys is part of available memory */
mem_desc = mem_desc_const;
for (unsigned i = 0; i < num_mem_desc; i++, mem_desc++) {
if (mem_desc->type != Hip::Mem_desc::AVAILABLE_MEMORY) continue;
if (mem_desc->addr > ~0UL) continue;
addr_t base = trunc_page(mem_desc->addr);
size_t size;
/* truncate size if base+size larger then natural 32/64 bit boundary */
if (mem_desc->addr >= ~0UL - mem_desc->size + 1)
size = round_page(~0UL - mem_desc->addr + 1);
else
size = round_page(mem_desc->addr + mem_desc->size) - base;
if (base <= phys && phys < base + size)
return false;
}
return true;
}
/**
* Map preserved physical page for the exclusive read-execute-only used by core
*/
addr_t Platform::_map_page(addr_t const phys_page, addr_t const pages,
bool const extra_page)
{
addr_t const phys_addr = phys_page << get_page_size_log2();
addr_t const size = pages << get_page_size_log2();
addr_t const size_extra = size + (extra_page ? get_page_size() : 0);
/* try to reserve contiguous virtual area */
void * core_local_ptr = 0;
if (!region_alloc()->alloc(size_extra, &core_local_ptr))
return 0;
/* free the virtual area, but re allocate it later in two pieces */
region_alloc()->free(core_local_ptr, size_extra);
addr_t const core_local_addr = reinterpret_cast<addr_t>(core_local_ptr);
/* allocate two separate regions - first part */
if (region_alloc()->alloc_addr(size, core_local_addr).is_error())
return 0;
/* allocate two separate regions - second part */
if (extra_page) {
/* second region can be freed separately now, if unneeded */
if (region_alloc()->alloc_addr(get_page_size(), core_local_addr +
size).is_error())
return 0;
}
/* map first part */
int res = map_local(__main_thread_utcb, phys_addr, core_local_addr, pages,
Nova::Rights(true, true, true), true);
/* map second part - if requested */
if (!res && extra_page)
res = map_local(__main_thread_utcb, phys_addr + size,
core_local_addr + size, 1,
Nova::Rights(true, true, false), true);
return res ? 0 : core_local_addr;
}
void Platform::_unmap_page(addr_t const phys, addr_t const virt,
addr_t const pages)
{
/* unmap page */
unmap_local(__main_thread_utcb, virt, pages);
/* put virtual address back to allocator */
region_alloc()->free((void *)(virt), pages << get_page_size_log2());
/* put physical address back to allocator */
if (phys != 0)
ram_alloc()->add_range(phys, pages << get_page_size_log2());
}
/*****************************
** Core page-fault handler **
*****************************/
enum { CORE_PAGER_UTCB_ADDR = 0xbff02000 };
/**
* IDC handler for the page-fault portal
*/
static void page_fault_handler()
{
Utcb *utcb = (Utcb *)CORE_PAGER_UTCB_ADDR;
addr_t pf_addr = utcb->qual[1];
addr_t pf_ip = utcb->ip;
addr_t pf_sp = utcb->sp;
addr_t pf_type = utcb->qual[0];
print_page_fault("\nPAGE-FAULT IN CORE", pf_addr, pf_ip,
(Genode::Rm_session::Fault_type)pf_type, ~0UL);
/* dump stack trace */
struct Core_img
{
addr_t _beg;
addr_t _end;
addr_t *_ip;
Core_img(addr_t sp)
{
extern addr_t _dtors_end;
_beg = (addr_t)&_prog_img_beg;
_end = (addr_t)&_dtors_end;
_ip = (addr_t *)sp;
for (;!ip_valid(); _ip++) {}
}
addr_t *ip() { return _ip; }
void next_ip() { _ip = ((addr_t *)*(_ip - 1)) + 1;}
bool ip_valid() { return (*_ip >= _beg) && (*_ip < _end); }
};
int count = 1;
printf(" #%d %08lx %08lx\n", count++, pf_sp, pf_ip);
Core_img dump(pf_sp);
while (dump.ip_valid()) {
printf(" #%d %p %08lx\n", count++, dump.ip(), *dump.ip());
dump.next_ip();
}
sleep_forever();
}
static addr_t core_pager_stack_top()
{
enum { STACK_SIZE = 4*1024 };
static char stack[STACK_SIZE];
return (addr_t)&stack[STACK_SIZE - sizeof(addr_t)];
}
/**
* Startup handler for core threads
*/
static void startup_handler()
{
Utcb *utcb = (Utcb *)CORE_PAGER_UTCB_ADDR;
/* initial IP is on stack */
utcb->ip = *reinterpret_cast<addr_t *>(utcb->sp);
utcb->mtd = Mtd::EIP | Mtd::ESP;
utcb->set_msg_word(0);
reply((void*)core_pager_stack_top());
}
static void init_core_page_fault_handler()
{
/* create echo EC */
enum {
GLOBAL = false,
EXC_BASE = 0
};
addr_t ec_sel = cap_selector_allocator()->alloc();
uint8_t ret = create_ec(ec_sel, __core_pd_sel, boot_cpu(),
CORE_PAGER_UTCB_ADDR, core_pager_stack_top(),
EXC_BASE, GLOBAL);
if (ret)
PDBG("create_ec returned %u", ret);
/* set up page-fault portal */
create_pt(PT_SEL_PAGE_FAULT, __core_pd_sel, ec_sel,
Mtd(Mtd::QUAL | Mtd::ESP | Mtd::EIP),
(addr_t)page_fault_handler);
revoke(Obj_crd(PT_SEL_PAGE_FAULT, 0, Obj_crd::RIGHT_PT_CTRL));
/* startup portal for global core threads */
create_pt(PT_SEL_STARTUP, __core_pd_sel, ec_sel,
Mtd(Mtd::EIP | Mtd::ESP),
(addr_t)startup_handler);
revoke(Obj_crd(PT_SEL_STARTUP, 0, Obj_crd::RIGHT_PT_CTRL));
}
/**************
** Platform **
**************/
Platform::Platform() :
_io_mem_alloc(core_mem_alloc()), _io_port_alloc(core_mem_alloc()),
_irq_alloc(core_mem_alloc()),
_vm_base(0x1000), _vm_size(0), _cpus(Affinity::Space(1,1))
{
Hip *hip = (Hip *)__initial_sp;
/* check for right API version */
if (hip->api_version != 6)
nova_die();
/*
* Determine number of available CPUs
*
* XXX As of now, we assume a one-dimensional affinity space, ignoring
* the y component of the affinity location. When adding support
* for two-dimensional affinity spaces, look out and adjust the use of
* 'Platform_thread::_location' in 'platform_thread.cc'. Also look
* at the 'Thread_base::start' function in core/thread_start.cc.
*/
_cpus = Affinity::Space(hip->cpus(), 1);
/* register UTCB of main thread */
__main_thread_utcb = (Utcb *)(__initial_sp - get_page_size());
/* set core pd selector */
__core_pd_sel = hip->sel_exc;
/* create lock used by capability allocator */
Nova::create_sm(Nova::SM_SEL_EC, __core_pd_sel, 0);
/* locally map the whole I/O port range */
enum { ORDER_64K = 16 };
map_local_one_to_one(__main_thread_utcb, Io_crd(0, ORDER_64K));
/* map BDA region, console reads IO ports at BDA_VIRT_ADDR + 0x400 */
enum { BDA_PHY = 0x0U, BDA_VIRT = 0x1U, BDA_VIRT_ADDR = 0x1000U };
map_local_phys_to_virt(__main_thread_utcb,
Mem_crd(BDA_PHY, 0, Rights(true, false, false)),
Mem_crd(BDA_VIRT, 0, Rights(true, false, false)));
/*
* Now that we can access the I/O ports for comport 0, printf works...
*/
/* sanity checks */
if (hip->sel_exc + 3 > NUM_INITIAL_PT_RESERVED) {
printf("configuration error\n");
nova_die();
}
/* configure virtual address spaces */
#ifdef __x86_64__
_vm_size = 0x800000000000UL - _vm_base;
#else
_vm_size = 0xc0000000UL - _vm_base;
#endif
/* set up page fault handler for core - for debugging */
init_core_page_fault_handler();
if (verbose_boot_info) {
printf("Hypervisor %s VMX\n", hip->has_feature_vmx() ? "features" : "does not feature");
printf("Hypervisor %s SVM\n", hip->has_feature_svm() ? "features" : "does not feature");
printf("Hypervisor reports %ux%u CPU%c - boot CPU is %lu\n",
_cpus.width(), _cpus.height(), _cpus.total() > 1 ? 's' : ' ', boot_cpu());
}
/* initialize core allocators */
size_t const num_mem_desc = (hip->hip_length - hip->mem_desc_offset)
/ hip->mem_desc_size;
if (verbose_boot_info)
printf("Hypervisor info page contains %zd memory descriptors:\n", num_mem_desc);
addr_t mem_desc_base = ((addr_t)hip + hip->mem_desc_offset);
/* define core's virtual address space */
addr_t virt_beg = _vm_base;
addr_t virt_end = _vm_size;
_core_mem_alloc.virt_alloc()->add_range(virt_beg, virt_end - virt_beg);
/* exclude core image from core's virtual address allocator */
addr_t core_virt_beg = trunc_page((addr_t)&_prog_img_beg);
addr_t core_virt_end = round_page((addr_t)&_prog_img_end);
size_t core_size = core_virt_end - core_virt_beg;
region_alloc()->remove_range(core_virt_beg, core_size);
/* preserve Bios Data Area (BDA) in core's virtual address space */
region_alloc()->remove_range(BDA_VIRT_ADDR, 0x1000);
/* preserve context area in core's virtual address space */
region_alloc()->remove_range(Native_config::context_area_virtual_base(),
Native_config::context_area_virtual_size());
/* exclude utcb of core pager thread + empty guard pages before and after */
region_alloc()->remove_range(CORE_PAGER_UTCB_ADDR - get_page_size(),
get_page_size() * 3);
/* exclude utcb of echo thread + empty guard pages before and after */
region_alloc()->remove_range(Echo::ECHO_UTCB_ADDR - get_page_size(),
get_page_size() * 3);
/* exclude utcb of main thread and hip + empty guard pages before and after */
region_alloc()->remove_range((addr_t)__main_thread_utcb - get_page_size(),
get_page_size() * 4);
/* sanity checks */
addr_t check [] = {
reinterpret_cast<addr_t>(__main_thread_utcb), CORE_PAGER_UTCB_ADDR,
Echo::ECHO_UTCB_ADDR, BDA_VIRT_ADDR
};
for (unsigned i = 0; i < sizeof(check) / sizeof(check[0]); i++) {
if (Native_config::context_area_virtual_base() <= check[i] &&
check[i] < Native_config::context_area_virtual_base() +
Native_config::context_area_virtual_size())
{
PERR("overlapping area - [%lx, %lx) vs %lx",
Native_config::context_area_virtual_base(),
Native_config::context_area_virtual_base() +
Native_config::context_area_virtual_size(), check[i]);
nova_die();
}
}
/* initialize core's physical-memory and I/O memory allocator */
_io_mem_alloc.add_range(0, ~0xfffUL);
Hip::Mem_desc *mem_desc = (Hip::Mem_desc *)mem_desc_base;
for (unsigned i = 0; i < num_mem_desc; i++, mem_desc++) {
if (mem_desc->type != Hip::Mem_desc::AVAILABLE_MEMORY) continue;
if (verbose_boot_info)
printf("detected physical memory: 0x%16llx - size: 0x%llx\n",
mem_desc->addr, mem_desc->size);
/* skip regions above 4G on 32 bit, no op on 64 bit */
if (mem_desc->addr > ~0UL) continue;
addr_t base = round_page(mem_desc->addr);
size_t size;
/* truncate size if base+size larger then natural 32/64 bit boundary */
if (mem_desc->addr >= ~0UL - mem_desc->size + 1)
size = trunc_page(~0UL - mem_desc->addr + 1);
else
size = trunc_page(mem_desc->addr + mem_desc->size) - base;
if (verbose_boot_info)
printf("use physical memory: 0x%16lx - size: 0x%zx\n", base, size);
_io_mem_alloc.remove_range(base, size);
ram_alloc()->add_range(base, size);
}
/* exclude all non-available memory from physical allocator */
mem_desc = (Hip::Mem_desc *)mem_desc_base;
for (unsigned i = 0; i < num_mem_desc; i++, mem_desc++) {
if (mem_desc->type == Hip::Mem_desc::AVAILABLE_MEMORY) continue;
/* skip regions above 4G on 32 bit, no op on 64 bit */
if (mem_desc->addr > ~0UL) continue;
addr_t base = trunc_page(mem_desc->addr);
size_t size;
/* truncate size if base+size larger then natural 32/64 bit boundary */
if (mem_desc->addr >= ~0UL - mem_desc->size + 1)
size = round_page(~0UL - mem_desc->addr + 1);
else
size = round_page(mem_desc->addr + mem_desc->size) - base;
_io_mem_alloc.add_range(base, size);
ram_alloc()->remove_range(base, size);
}
/* needed as I/O memory by the VESA driver */
_io_mem_alloc.add_range(0, 0x1000);
ram_alloc()->remove_range(0, 0x1000);
/* exclude pages holding multi-boot command lines from core allocators */
mem_desc = (Hip::Mem_desc *)mem_desc_base;
addr_t prev_cmd_line_page = ~0, curr_cmd_line_page = 0;
for (unsigned i = 0; i < num_mem_desc; i++, mem_desc++) {
if (mem_desc->type != Hip::Mem_desc::MULTIBOOT_MODULE) continue;
if (!mem_desc->aux) continue;
curr_cmd_line_page = mem_desc->aux >> get_page_size_log2();
if (curr_cmd_line_page == prev_cmd_line_page) continue;
ram_alloc()->remove_range(curr_cmd_line_page << get_page_size_log2(),
get_page_size() * 2);
prev_cmd_line_page = curr_cmd_line_page;
}
/*
* From now on, it is save to use the core allocators...
*/
/*
* Allocate ever an extra page behind the command line pointer. If it turns
* out that this page is unused, because the command line was short enough,
* the mapping is revoked and the virtual and physical regions are put back
* to the allocator.
*/
mem_desc = (Hip::Mem_desc *)mem_desc_base;
prev_cmd_line_page = ~0UL, curr_cmd_line_page = 0;
addr_t mapped_cmd_line = 0;
addr_t aux = ~0UL;
size_t aux_len = 0;
/* build ROM file system */
for (unsigned i = 0; i < num_mem_desc; i++, mem_desc++) {
if (mem_desc->type != Hip::Mem_desc::MULTIBOOT_MODULE) continue;
if (!mem_desc->addr || !mem_desc->size || !mem_desc->aux) continue;
/* convenience */
addr_t const rom_mem_start = trunc_page(mem_desc->addr);
addr_t const rom_mem_end = round_page(mem_desc->addr + mem_desc->size);
addr_t const rom_mem_size = rom_mem_end - rom_mem_start;
bool const aux_in_rom_area = (rom_mem_start <= mem_desc->aux) &&
(mem_desc->aux < rom_mem_end);
/* map ROM + extra page for the case aux crosses page boundary */
addr_t core_local_addr = _map_page(rom_mem_start >> get_page_size_log2(),
rom_mem_size >> get_page_size_log2(),
aux_in_rom_area);
if (!core_local_addr) {
PERR("could not map multi boot module");
nova_die();
}
/* adjust core_local_addr of module if it was not page aligned */
core_local_addr += mem_desc->addr - rom_mem_start;
if (verbose_boot_info)
printf("map multi-boot module: physical 0x%8lx -> [0x%8lx-0x%8lx)"
" - ", (addr_t)mem_desc->addr, (addr_t)core_local_addr,
(addr_t)(core_local_addr + mem_desc->size));
char * name;
if (aux_in_rom_area) {
aux = core_local_addr + (mem_desc->aux - mem_desc->addr);
aux_len = strlen(reinterpret_cast<char const *>(aux)) + 1;
/* if last page is unused, free it up */
if (aux + aux_len <= round_page(core_local_addr) + rom_mem_size) {
bool overlap = intersects(rom_mem_end,
(Hip::Mem_desc *)mem_desc_base,
num_mem_desc);
_unmap_page(overlap ? 0 : rom_mem_end,
round_page(core_local_addr) + rom_mem_size, 1);
}
/* all behind rom module will be cleared, copy the command line */
char *name_tmp = commandline_to_basename(reinterpret_cast<char *>(aux));
unsigned name_tmp_size = aux_len - (name_tmp - reinterpret_cast<char *>(aux));
name = new (core_mem_alloc()) char [name_tmp_size];
memcpy(name, name_tmp, name_tmp_size);
} else {
curr_cmd_line_page = mem_desc->aux >> get_page_size_log2();
if (curr_cmd_line_page != prev_cmd_line_page) {
int err = 1;
if (curr_cmd_line_page == prev_cmd_line_page + 1) {
/* try to allocate subsequent virtual region */
addr_t const virt = mapped_cmd_line + get_page_size() * 2;
addr_t const phys = round_page(mem_desc->aux);
if (region_alloc()->alloc_addr(get_page_size(), virt).is_ok()) {
/* we got the virtual region */
err = map_local(__main_thread_utcb, phys, virt, 1,
Nova::Rights(true, false, false), true);
if (!err) {
/* we got the mapping */
mapped_cmd_line += get_page_size();
prev_cmd_line_page += 1;
}
}
}
/* allocate new pages if it was not successful beforehand */
if (err) {
/* check whether we can free up unused page */
if (aux + aux_len <= mapped_cmd_line + get_page_size()) {
addr_t phys = (curr_cmd_line_page + 1) << get_page_size_log2();
phys = intersects(phys, (Hip::Mem_desc *)mem_desc_base,
num_mem_desc) ? 0 : phys;
_unmap_page(phys, mapped_cmd_line + get_page_size(), 1);
}
mapped_cmd_line = _map_page(curr_cmd_line_page, 1, true);
prev_cmd_line_page = curr_cmd_line_page;
if (!mapped_cmd_line) {
PERR("could not map command line");
nova_die();
}
}
}
aux = mapped_cmd_line + (mem_desc->aux - trunc_page(mem_desc->aux));
aux_len = strlen(reinterpret_cast<char const *>(aux)) + 1;
name = commandline_to_basename(reinterpret_cast<char *>(aux));
}
/* set zero out range */
addr_t const zero_out = core_local_addr + mem_desc->size;
/* zero out behind rom module */
memset(reinterpret_cast<void *>(zero_out), 0, round_page(zero_out) -
zero_out);
printf("%s\n", name);
/* revoke write permission on rom module */
unmap_local(__main_thread_utcb, trunc_page(core_local_addr),
rom_mem_size >> get_page_size_log2(), true,
Nova::Rights(false, true, false));
/* create rom module */
Rom_module *rom_module = new (core_mem_alloc())
Rom_module(core_local_addr, mem_desc->size, name);
_rom_fs.insert(rom_module);
}
/* export hypervisor info page as ROM module */
_rom_fs.insert(new (core_mem_alloc())
Rom_module((addr_t)hip, get_page_size(), "hypervisor_info_page"));
/* I/O port allocator (only meaningful for x86) */
_io_port_alloc.add_range(0, 0x10000);
/* IRQ allocator */
_irq_alloc.add_range(0, hip->sel_gsi - 1);
_gsi_base_sel = (hip->mem_desc_offset - hip->cpu_desc_offset) / hip->cpu_desc_size;
/* remap main utcb to default utcb address */
if (map_local(__main_thread_utcb, (addr_t)__main_thread_utcb,
(addr_t)main_thread_utcb(), 1, Rights(true, true, false))) {
PERR("could not remap utcb of main thread");
nova_die();
}
if (verbose_boot_info) {
printf(":virt_alloc: "); _core_mem_alloc.virt_alloc()->raw()->dump_addr_tree();
printf(":phys_alloc: "); _core_mem_alloc.phys_alloc()->raw()->dump_addr_tree();
printf(":io_mem_alloc: "); _io_mem_alloc.raw()->dump_addr_tree();
}
}
/****************************************
** Support for core memory management **
****************************************/
bool Core_mem_allocator::Mapped_mem_allocator::_map_local(addr_t virt_addr,
addr_t phys_addr,
unsigned size_log2)
{
map_local((Utcb *)Thread_base::myself()->utcb(), phys_addr,
virt_addr, 1 << (size_log2 - get_page_size_log2()),
Rights(true, true, true), true);
return true;
}
/********************************
** Generic platform interface **
********************************/
void Platform::wait_for_exit() { sleep_forever(); }
void Core_parent::exit(int exit_value) { }